52 | 1.19G | } Unexecuted instantiation: enc_cluster.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const enc_cluster.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Line | Count | Source | 46 | 783M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 783M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 783M | } |
Unexecuted instantiation: enc_cluster.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: enc_cluster.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: enc_cluster.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_cluster.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const enc_lz77.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Line | Count | Source | 46 | 1.22M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 1.22M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 1.22M | } |
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const enc_xyb.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Line | Count | Source | 46 | 110M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 110M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 110M | } |
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: enc_xyb.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Line | Count | Source | 46 | 4.64M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 4.64M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 4.64M | } |
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Line | Count | Source | 46 | 4.30M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 4.30M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 4.30M | } |
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: jxl_cms.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_ma.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const enc_ma.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Line | Count | Source | 46 | 99.9M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 99.9M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 99.9M | } |
Unexecuted instantiation: enc_ma.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: enc_ma.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: enc_ma.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: enc_ma.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: quant_weights.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: quant_weights.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: quant_weights.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: quant_weights.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const quant_weights.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 4ul> >::operator()(hwy::N_AVX2::Vec128<float, 4ul>, hwy::N_AVX2::Vec128<float, 4ul>) const Line | Count | Source | 46 | 19.6M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 19.6M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 19.6M | } |
Unexecuted instantiation: quant_weights.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const stage_from_linear.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Line | Count | Source | 46 | 175M | V operator()(const V n, const V d) const { | 47 | | #if JXL_TRUE // Faster on SKX | 48 | 175M | return Div(n, d); | 49 | | #else | 50 | | return n * ReciprocalNR(d); | 51 | | #endif | 52 | 175M | } |
Unexecuted instantiation: stage_from_linear.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: stage_from_linear.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: stage_from_linear.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: stage_from_linear.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: stage_from_linear.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: stage_to_linear.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 4ul> >::operator()(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec256<float> >::operator()(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 4ul> >::operator()(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: stage_tone_mapping.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::FastDivision<float, hwy::N_SSE4::Vec128<float, 1ul> >::operator()(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) const Unexecuted instantiation: splines.cc:jxl::N_AVX2::(anonymous namespace)::FastDivision<float, hwy::N_AVX2::Vec128<float, 1ul> >::operator()(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) const Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::FastDivision<float, hwy::N_SSE2::Vec128<float, 1ul> >::operator()(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) const |
97 | 1.19G | } Unexecuted instantiation: enc_cluster.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) enc_cluster.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 783M | const T (&q)[NQ]) { | 63 | 783M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 783M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 783M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 783M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 783M | HWY_FENCE; | 72 | 783M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 783M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 783M | HWY_FENCE; | 75 | 783M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 783M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 783M | HWY_FENCE; | 78 | 783M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 783M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 783M | HWY_FENCE; | 81 | 783M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 783M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 783M | HWY_FENCE; | 84 | 783M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 783M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 783M | HWY_FENCE; | 87 | 783M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 783M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 783M | HWY_FENCE; | 90 | 783M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 783M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 783M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 783M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 783M | return FastDivision<T, V>()(yp, yq); | 97 | 783M | } |
Unexecuted instantiation: enc_cluster.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_cluster.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_cluster.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_cluster.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) enc_lz77.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 1.22M | const T (&q)[NQ]) { | 63 | 1.22M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 1.22M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 1.22M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 1.22M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 1.22M | HWY_FENCE; | 72 | 1.22M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 1.22M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 1.22M | HWY_FENCE; | 75 | 1.22M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 1.22M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 1.22M | HWY_FENCE; | 78 | 1.22M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 1.22M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 1.22M | HWY_FENCE; | 81 | 1.22M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 1.22M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 1.22M | HWY_FENCE; | 84 | 1.22M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 1.22M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 1.22M | HWY_FENCE; | 87 | 1.22M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 1.22M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 1.22M | HWY_FENCE; | 90 | 1.22M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 1.22M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 1.22M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 1.22M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 1.22M | return FastDivision<T, V>()(yp, yq); | 97 | 1.22M | } |
Unexecuted instantiation: enc_xyb.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) enc_xyb.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [20ul], float const (&) [20ul]) Line | Count | Source | 62 | 110M | const T (&q)[NQ]) { | 63 | 110M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 110M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 110M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 110M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 110M | HWY_FENCE; | 72 | 110M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 110M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 110M | HWY_FENCE; | 75 | 110M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 110M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 110M | HWY_FENCE; | 78 | 110M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 110M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 110M | HWY_FENCE; | 81 | 110M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 110M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 110M | HWY_FENCE; | 84 | 110M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 110M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 110M | HWY_FENCE; | 87 | 110M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 110M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 110M | HWY_FENCE; | 90 | 110M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 110M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 110M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 110M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 110M | return FastDivision<T, V>()(yp, yq); | 97 | 110M | } |
Unexecuted instantiation: enc_xyb.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: enc_xyb.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_xyb.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_xyb.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) enc_adaptive_quantization.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 4.64M | const T (&q)[NQ]) { | 63 | 4.64M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 4.64M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 4.64M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 4.64M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 4.64M | HWY_FENCE; | 72 | 4.64M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 4.64M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 4.64M | HWY_FENCE; | 75 | 4.64M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 4.64M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 4.64M | HWY_FENCE; | 78 | 4.64M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 4.64M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 4.64M | HWY_FENCE; | 81 | 4.64M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 4.64M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 4.64M | HWY_FENCE; | 84 | 4.64M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 4.64M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 4.64M | HWY_FENCE; | 87 | 4.64M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 4.64M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 4.64M | HWY_FENCE; | 90 | 4.64M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 4.64M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 4.64M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 4.64M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 4.64M | return FastDivision<T, V>()(yp, yq); | 97 | 4.64M | } |
Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_adaptive_quantization.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_ac_strategy.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) enc_ac_strategy.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 4.30M | const T (&q)[NQ]) { | 63 | 4.30M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 4.30M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 4.30M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 4.30M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 4.30M | HWY_FENCE; | 72 | 4.30M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 4.30M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 4.30M | HWY_FENCE; | 75 | 4.30M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 4.30M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 4.30M | HWY_FENCE; | 78 | 4.30M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 4.30M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 4.30M | HWY_FENCE; | 81 | 4.30M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 4.30M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 4.30M | HWY_FENCE; | 84 | 4.30M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 4.30M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 4.30M | HWY_FENCE; | 87 | 4.30M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 4.30M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 4.30M | HWY_FENCE; | 90 | 4.30M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 4.30M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 4.30M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 4.30M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 4.30M | return FastDivision<T, V>()(yp, yq); | 97 | 4.30M | } |
Unexecuted instantiation: enc_ac_strategy.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: jxl_cms.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_ma.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) enc_ma.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 99.9M | const T (&q)[NQ]) { | 63 | 99.9M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 99.9M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 99.9M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 99.9M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 99.9M | HWY_FENCE; | 72 | 99.9M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 99.9M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 99.9M | HWY_FENCE; | 75 | 99.9M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 99.9M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 99.9M | HWY_FENCE; | 78 | 99.9M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 99.9M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 99.9M | HWY_FENCE; | 81 | 99.9M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 99.9M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 99.9M | HWY_FENCE; | 84 | 99.9M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 99.9M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 99.9M | HWY_FENCE; | 87 | 99.9M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 99.9M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 99.9M | HWY_FENCE; | 90 | 99.9M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 99.9M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 99.9M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 99.9M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 99.9M | return FastDivision<T, V>()(yp, yq); | 97 | 99.9M | } |
Unexecuted instantiation: enc_ma.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_ma.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_ma.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: enc_ma.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: quant_weights.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: quant_weights.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: quant_weights.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: quant_weights.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) quant_weights.cc:hwy::N_AVX2::Vec128<float, 4ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 4ul, 0>, hwy::N_AVX2::Vec128<float, 4ul>, float>(hwy::N_AVX2::Simd<float, 4ul, 0>, hwy::N_AVX2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 19.6M | const T (&q)[NQ]) { | 63 | 19.6M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 19.6M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 19.6M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 19.6M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 19.6M | HWY_FENCE; | 72 | 19.6M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 19.6M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 19.6M | HWY_FENCE; | 75 | 19.6M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 19.6M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 19.6M | HWY_FENCE; | 78 | 19.6M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 19.6M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 19.6M | HWY_FENCE; | 81 | 19.6M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 19.6M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 19.6M | HWY_FENCE; | 84 | 19.6M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 19.6M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 19.6M | HWY_FENCE; | 87 | 19.6M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 19.6M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 19.6M | HWY_FENCE; | 90 | 19.6M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 19.6M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 19.6M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 19.6M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 19.6M | return FastDivision<T, V>()(yp, yq); | 97 | 19.6M | } |
Unexecuted instantiation: quant_weights.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) stage_from_linear.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [20ul], float const (&) [20ul]) Line | Count | Source | 62 | 172M | const T (&q)[NQ]) { | 63 | 172M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 172M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 172M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 172M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 172M | HWY_FENCE; | 72 | 172M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 172M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 172M | HWY_FENCE; | 75 | 172M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 172M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 172M | HWY_FENCE; | 78 | 172M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 172M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 172M | HWY_FENCE; | 81 | 172M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 172M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 172M | HWY_FENCE; | 84 | 172M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 172M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 172M | HWY_FENCE; | 87 | 172M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 172M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 172M | HWY_FENCE; | 90 | 172M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 172M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 172M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 172M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 172M | return FastDivision<T, V>()(yp, yq); | 97 | 172M | } |
stage_from_linear.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Line | Count | Source | 62 | 3.13M | const T (&q)[NQ]) { | 63 | 3.13M | constexpr size_t kDegP = NP / 4 - 1; | 64 | 3.13M | constexpr size_t kDegQ = NQ / 4 - 1; | 65 | 3.13M | auto yp = LoadDup128(d, &p[kDegP * 4]); | 66 | 3.13M | auto yq = LoadDup128(d, &q[kDegQ * 4]); | 67 | | // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a | 68 | | // compiler warning that the index is out of bounds since we are already | 69 | | // checking that it is not out of bounds with (kDegP >= n) and the access | 70 | | // will be optimized away. Similarly with q and kDegQ. | 71 | 3.13M | HWY_FENCE; | 72 | 3.13M | if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); | 73 | 3.13M | if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); | 74 | 3.13M | HWY_FENCE; | 75 | 3.13M | if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); | 76 | 3.13M | if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); | 77 | 3.13M | HWY_FENCE; | 78 | 3.13M | if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); | 79 | 3.13M | if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); | 80 | 3.13M | HWY_FENCE; | 81 | 3.13M | if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); | 82 | 3.13M | if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); | 83 | 3.13M | HWY_FENCE; | 84 | 3.13M | if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); | 85 | 3.13M | if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); | 86 | 3.13M | HWY_FENCE; | 87 | 3.13M | if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); | 88 | 3.13M | if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); | 89 | 3.13M | HWY_FENCE; | 90 | 3.13M | if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); | 91 | 3.13M | if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); | 92 | | | 93 | 3.13M | static_assert(kDegP < 8, "Polynomial degree is too high"); | 94 | 3.13M | static_assert(kDegQ < 8, "Polynomial degree is too high"); | 95 | | | 96 | 3.13M | return FastDivision<T, V>()(yp, yq); | 97 | 3.13M | } |
Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_from_linear.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_to_linear.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float>(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float>(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<20ul, 20ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [20ul], float const (&) [20ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float>(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: stage_tone_mapping.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: splines.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float>(hwy::N_SSE4::Simd<float, 1ul, 0>, hwy::N_SSE4::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: splines.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float>(hwy::N_AVX2::Simd<float, 1ul, 0>, hwy::N_AVX2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) Unexecuted instantiation: splines.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::EvalRationalPolynomial<12ul, 12ul, hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float>(hwy::N_SSE2::Simd<float, 1ul, 0>, hwy::N_SSE2::Vec128<float, 1ul>, float const (&) [12ul], float const (&) [12ul]) |