67 | 235M | } Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 23.1M | const float* HWY_RESTRICT biases) { | 38 | 23.1M | const Rebind<float, DI> df; | 39 | | | 40 | 23.1M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 23.1M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 23.1M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 23.1M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 23.1M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 23.1M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 23.1M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 23.1M | const auto bias = | 64 | 23.1M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 23.1M | return IfThenElse(is_01, one_bias, bias); | 67 | 23.1M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_ZEN4::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0> >(hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_SPR::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_SPR::Simd<int, 16ul, 0> >(hwy::N_AVX3_SPR::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 211M | const float* HWY_RESTRICT biases) { | 38 | 211M | const Rebind<float, DI> df; | 39 | | | 40 | 211M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 211M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 211M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 211M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 211M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 211M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 211M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 211M | const auto bias = | 64 | 211M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 211M | return IfThenElse(is_01, one_bias, bias); | 67 | 211M | } |
Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3::Simd<int, 16ul, 0> >(hwy::N_AVX3::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_ZEN4::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0> >(hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_ZEN4::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>::Rebind<float>)())) jxl::N_AVX3_SPR::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX3_SPR::Simd<int, 16ul, 0> >(hwy::N_AVX3_SPR::Simd<int, 16ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX3_SPR::Simd<int, 16ul, 0>)())), float const*) Unexecuted instantiation: dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) |