67 | 63.8M | } dec_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) Line | Count | Source | 37 | 15.1M | const float* HWY_RESTRICT biases) { | 38 | 15.1M | const Rebind<float, DI> df; | 39 | | | 40 | 15.1M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 15.1M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 15.1M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 15.1M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 15.1M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 15.1M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 15.1M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 15.1M | const auto bias = | 64 | 15.1M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 15.1M | return IfThenElse(is_01, one_bias, bias); | 67 | 15.1M | } |
dec_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 28.7M | const float* HWY_RESTRICT biases) { | 38 | 28.7M | const Rebind<float, DI> df; | 39 | | | 40 | 28.7M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 28.7M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 28.7M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 28.7M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 28.7M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 28.7M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 28.7M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 28.7M | const auto bias = | 64 | 28.7M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 28.7M | return IfThenElse(is_01, one_bias, bias); | 67 | 28.7M | } |
dec_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) Line | Count | Source | 37 | 19.8M | const float* HWY_RESTRICT biases) { | 38 | 19.8M | const Rebind<float, DI> df; | 39 | | | 40 | 19.8M | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 19.8M | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 19.8M | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 19.8M | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 19.8M | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 19.8M | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 19.8M | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 19.8M | const auto bias = | 64 | 19.8M | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 19.8M | return IfThenElse(is_01, one_bias, bias); | 67 | 19.8M | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE4::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE4::Simd<int, 4ul, 0> >(hwy::N_SSE4::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE4::Simd<int, 4ul, 0>)())), float const*) enc_group.cc:decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>::Rebind<float>)())) jxl::N_AVX2::(anonymous namespace)::AdjustQuantBias<hwy::N_AVX2::Simd<int, 8ul, 0> >(hwy::N_AVX2::Simd<int, 8ul, 0>, unsigned long, decltype (Zero((hwy::N_AVX2::Simd<int, 8ul, 0>)())), float const*) Line | Count | Source | 37 | 1.92k | const float* HWY_RESTRICT biases) { | 38 | 1.92k | const Rebind<float, DI> df; | 39 | | | 40 | 1.92k | const auto quant = ConvertTo(df, quant_i); | 41 | | | 42 | | // Compare |quant|, keep sign bit for negating result. | 43 | 1.92k | const auto kSign = BitCast(df, Set(di, INT32_MIN)); | 44 | 1.92k | const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig | 45 | 1.92k | const auto abs_quant = AndNot(kSign, quant); | 46 | | | 47 | | // If |x| is 1, kZeroBias creates a different bias for each channel. | 48 | | // We're implementing the following: | 49 | | // if (quant == 0) return 0; | 50 | | // if (quant == 1) return biases[c]; | 51 | | // if (quant == -1) return -biases[c]; | 52 | | // return quant - biases[3] / quant; | 53 | | | 54 | | // Integer comparison is not helpful because Clang incurs bypass penalties | 55 | | // from unnecessarily mixing integer and float. | 56 | 1.92k | const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); | 57 | 1.92k | const auto not_0 = Gt(abs_quant, Zero(df)); | 58 | | | 59 | | // Bitwise logic is faster than quant * biases[c]. | 60 | 1.92k | const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); | 61 | | | 62 | | // About 2E-5 worse than ReciprocalNR or division. | 63 | 1.92k | const auto bias = | 64 | 1.92k | NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); | 65 | | | 66 | 1.92k | return IfThenElse(is_01, one_bias, bias); | 67 | 1.92k | } |
Unexecuted instantiation: enc_group.cc:decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>::Rebind<float>)())) jxl::N_SSE2::(anonymous namespace)::AdjustQuantBias<hwy::N_SSE2::Simd<int, 4ul, 0> >(hwy::N_SSE2::Simd<int, 4ul, 0>, unsigned long, decltype (Zero((hwy::N_SSE2::Simd<int, 4ul, 0>)())), float const*) |