/src/libjxl/lib/jxl/enc_entropy_coder.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_entropy_coder.h" |
7 | | |
8 | | #include <cstddef> |
9 | | #include <cstdint> |
10 | | #include <vector> |
11 | | |
12 | | #include "lib/jxl/base/rect.h" |
13 | | #include "lib/jxl/enc_ans.h" |
14 | | #include "lib/jxl/frame_dimensions.h" |
15 | | #include "lib/jxl/frame_header.h" |
16 | | |
17 | | #undef HWY_TARGET_INCLUDE |
18 | | #define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc" |
19 | | #include <hwy/foreach_target.h> |
20 | | #include <hwy/highway.h> |
21 | | |
22 | | #include "lib/jxl/ac_context.h" |
23 | | #include "lib/jxl/ac_strategy.h" |
24 | | #include "lib/jxl/base/bits.h" |
25 | | #include "lib/jxl/base/compiler_specific.h" |
26 | | #include "lib/jxl/base/status.h" |
27 | | #include "lib/jxl/coeff_order.h" |
28 | | #include "lib/jxl/coeff_order_fwd.h" |
29 | | #include "lib/jxl/entropy_coder.h" |
30 | | #include "lib/jxl/image.h" |
31 | | #include "lib/jxl/pack_signed.h" |
32 | | |
33 | | HWY_BEFORE_NAMESPACE(); |
34 | | namespace jxl { |
35 | | namespace HWY_NAMESPACE { |
36 | | |
37 | | // These templates are not found via ADL. |
38 | | using hwy::HWY_NAMESPACE::Add; |
39 | | using hwy::HWY_NAMESPACE::AndNot; |
40 | | using hwy::HWY_NAMESPACE::Eq; |
41 | | using hwy::HWY_NAMESPACE::GetLane; |
42 | | |
43 | | // Returns number of non-zero coefficients (but skip LLF). |
44 | | // We cannot rely on block[] being all-zero bits, so first truncate to integer. |
45 | | // Also writes the per-8x8 block nzeros starting at nzeros_pos. |
46 | | int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy, |
47 | | const AcStrategy acs, const size_t covered_blocks, |
48 | | const size_t log2_covered_blocks, |
49 | | const int32_t* JXL_RESTRICT block, |
50 | | const size_t nzeros_stride, |
51 | 1.00M | int32_t* JXL_RESTRICT nzeros_pos) { |
52 | 1.00M | const HWY_CAPPED(int32_t, kBlockDim) di; |
53 | | |
54 | 1.00M | const auto zero = Zero(di); |
55 | | // Add FF..FF for every zero coefficient, negate to get #zeros. |
56 | 1.00M | auto neg_sum_zero = zero; |
57 | | |
58 | 1.00M | { |
59 | | // Mask sufficient for one row of coefficients. |
60 | 1.00M | HWY_ALIGN const int32_t |
61 | 1.00M | llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = { |
62 | 1.00M | -1, -1, -1, -1}; |
63 | | // First cx=1,2,4 elements are FF..FF, others 0. |
64 | 1.00M | const int32_t* llf_mask_pos = |
65 | 1.00M | llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx; |
66 | | |
67 | | // Rows with LLF: mask out the LLF |
68 | 3.06M | for (size_t y = 0; y < cy; y++) { |
69 | 10.5M | for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { |
70 | 8.53M | const auto llf_mask = LoadU(di, llf_mask_pos + x); |
71 | | |
72 | | // LLF counts as zero so we don't include it in nzeros. |
73 | 8.53M | const auto coef = |
74 | 8.53M | AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x])); |
75 | | |
76 | 8.53M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); |
77 | 8.53M | } |
78 | 2.05M | } |
79 | 1.00M | } |
80 | | |
81 | | // Remaining rows: no mask |
82 | 15.3M | for (size_t y = cy; y < cy * kBlockDim; y++) { |
83 | 74.1M | for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { |
84 | 59.7M | const auto coef = Load(di, &block[y * cx * kBlockDim + x]); |
85 | 59.7M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); |
86 | 59.7M | } |
87 | 14.3M | } |
88 | | |
89 | | // We want area - sum_zero, add because neg_sum_zero is already negated. |
90 | 1.00M | const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) + |
91 | 1.00M | GetLane(SumOfLanes(di, neg_sum_zero)); |
92 | | |
93 | 1.00M | const int32_t shifted_nzeros = static_cast<int32_t>( |
94 | 1.00M | (nzeros + covered_blocks - 1) >> log2_covered_blocks); |
95 | | // Need non-canonicalized dimensions! |
96 | 3.50M | for (size_t y = 0; y < acs.covered_blocks_y(); y++) { |
97 | 11.0M | for (size_t x = 0; x < acs.covered_blocks_x(); x++) { |
98 | 8.53M | nzeros_pos[x + y * nzeros_stride] = shifted_nzeros; |
99 | 8.53M | } |
100 | 2.49M | } |
101 | | |
102 | 1.00M | return nzeros; |
103 | 1.00M | } Unexecuted instantiation: jxl::N_SSE4::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*) jxl::N_AVX2::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*) Line | Count | Source | 51 | 1.00M | int32_t* JXL_RESTRICT nzeros_pos) { | 52 | 1.00M | const HWY_CAPPED(int32_t, kBlockDim) di; | 53 | | | 54 | 1.00M | const auto zero = Zero(di); | 55 | | // Add FF..FF for every zero coefficient, negate to get #zeros. | 56 | 1.00M | auto neg_sum_zero = zero; | 57 | | | 58 | 1.00M | { | 59 | | // Mask sufficient for one row of coefficients. | 60 | 1.00M | HWY_ALIGN const int32_t | 61 | 1.00M | llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = { | 62 | 1.00M | -1, -1, -1, -1}; | 63 | | // First cx=1,2,4 elements are FF..FF, others 0. | 64 | 1.00M | const int32_t* llf_mask_pos = | 65 | 1.00M | llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx; | 66 | | | 67 | | // Rows with LLF: mask out the LLF | 68 | 3.06M | for (size_t y = 0; y < cy; y++) { | 69 | 10.5M | for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { | 70 | 8.53M | const auto llf_mask = LoadU(di, llf_mask_pos + x); | 71 | | | 72 | | // LLF counts as zero so we don't include it in nzeros. | 73 | 8.53M | const auto coef = | 74 | 8.53M | AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x])); | 75 | | | 76 | 8.53M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); | 77 | 8.53M | } | 78 | 2.05M | } | 79 | 1.00M | } | 80 | | | 81 | | // Remaining rows: no mask | 82 | 15.3M | for (size_t y = cy; y < cy * kBlockDim; y++) { | 83 | 74.1M | for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { | 84 | 59.7M | const auto coef = Load(di, &block[y * cx * kBlockDim + x]); | 85 | 59.7M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); | 86 | 59.7M | } | 87 | 14.3M | } | 88 | | | 89 | | // We want area - sum_zero, add because neg_sum_zero is already negated. | 90 | 1.00M | const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) + | 91 | 1.00M | GetLane(SumOfLanes(di, neg_sum_zero)); | 92 | | | 93 | 1.00M | const int32_t shifted_nzeros = static_cast<int32_t>( | 94 | 1.00M | (nzeros + covered_blocks - 1) >> log2_covered_blocks); | 95 | | // Need non-canonicalized dimensions! | 96 | 3.50M | for (size_t y = 0; y < acs.covered_blocks_y(); y++) { | 97 | 11.0M | for (size_t x = 0; x < acs.covered_blocks_x(); x++) { | 98 | 8.53M | nzeros_pos[x + y * nzeros_stride] = shifted_nzeros; | 99 | 8.53M | } | 100 | 2.49M | } | 101 | | | 102 | 1.00M | return nzeros; | 103 | 1.00M | } |
Unexecuted instantiation: jxl::N_SSE2::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*) |
104 | | |
105 | | // Specialization for 8x8, where only top-left is LLF/DC. |
106 | | // About 1% overall speedup vs. NumNonZeroExceptLLF. |
107 | | int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block, |
108 | 5.41M | int32_t* JXL_RESTRICT nzeros_pos) { |
109 | 5.41M | const HWY_CAPPED(int32_t, kBlockDim) di; |
110 | | |
111 | 5.41M | const auto zero = Zero(di); |
112 | | // Add FF..FF for every zero coefficient, negate to get #zeros. |
113 | 5.41M | auto neg_sum_zero = zero; |
114 | | |
115 | 5.41M | { |
116 | | // First row has DC, so mask |
117 | 5.41M | const size_t y = 0; |
118 | 5.41M | HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1}; |
119 | | |
120 | 10.8M | for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { |
121 | 5.41M | const auto dc_mask = Load(di, dc_mask_lanes + x); |
122 | | |
123 | | // DC counts as zero so we don't include it in nzeros. |
124 | 5.41M | const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x])); |
125 | | |
126 | 5.41M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); |
127 | 5.41M | } |
128 | 5.41M | } |
129 | | |
130 | | // Remaining rows: no mask |
131 | 43.2M | for (size_t y = 1; y < kBlockDim; y++) { |
132 | 75.7M | for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { |
133 | 37.8M | const auto coef = Load(di, &block[y * kBlockDim + x]); |
134 | 37.8M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); |
135 | 37.8M | } |
136 | 37.8M | } |
137 | | |
138 | | // We want 64 - sum_zero, add because neg_sum_zero is already negated. |
139 | 5.41M | const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) + |
140 | 5.41M | GetLane(SumOfLanes(di, neg_sum_zero)); |
141 | | |
142 | 5.41M | *nzeros_pos = nzeros; |
143 | | |
144 | 5.41M | return nzeros; |
145 | 5.41M | } Unexecuted instantiation: jxl::N_SSE4::NumNonZero8x8ExceptDC(int const*, int*) jxl::N_AVX2::NumNonZero8x8ExceptDC(int const*, int*) Line | Count | Source | 108 | 5.41M | int32_t* JXL_RESTRICT nzeros_pos) { | 109 | 5.41M | const HWY_CAPPED(int32_t, kBlockDim) di; | 110 | | | 111 | 5.41M | const auto zero = Zero(di); | 112 | | // Add FF..FF for every zero coefficient, negate to get #zeros. | 113 | 5.41M | auto neg_sum_zero = zero; | 114 | | | 115 | 5.41M | { | 116 | | // First row has DC, so mask | 117 | 5.41M | const size_t y = 0; | 118 | 5.41M | HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1}; | 119 | | | 120 | 10.8M | for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { | 121 | 5.41M | const auto dc_mask = Load(di, dc_mask_lanes + x); | 122 | | | 123 | | // DC counts as zero so we don't include it in nzeros. | 124 | 5.41M | const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x])); | 125 | | | 126 | 5.41M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); | 127 | 5.41M | } | 128 | 5.41M | } | 129 | | | 130 | | // Remaining rows: no mask | 131 | 43.2M | for (size_t y = 1; y < kBlockDim; y++) { | 132 | 75.7M | for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { | 133 | 37.8M | const auto coef = Load(di, &block[y * kBlockDim + x]); | 134 | 37.8M | neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); | 135 | 37.8M | } | 136 | 37.8M | } | 137 | | | 138 | | // We want 64 - sum_zero, add because neg_sum_zero is already negated. | 139 | 5.41M | const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) + | 140 | 5.41M | GetLane(SumOfLanes(di, neg_sum_zero)); | 141 | | | 142 | 5.41M | *nzeros_pos = nzeros; | 143 | | | 144 | 5.41M | return nzeros; | 145 | 5.41M | } |
Unexecuted instantiation: jxl::N_SSE2::NumNonZero8x8ExceptDC(int const*, int*) |
146 | | |
147 | | // The number of nonzeros of each block is predicted from the top and the left |
148 | | // blocks, with opportune scaling to take into account the number of blocks of |
149 | | // each strategy. The predicted number of nonzeros divided by two is used as a |
150 | | // context; if this number is above 63, a specific context is used. If the |
151 | | // number of nonzeros of a strategy is above 63, it is written directly using a |
152 | | // fixed number of bits (that depends on the size of the strategy). |
153 | | Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, |
154 | | const Rect& rect, |
155 | | const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, |
156 | | const AcStrategyImage& ac_strategy, |
157 | | const YCbCrChromaSubsampling& cs, |
158 | | Image3I* JXL_RESTRICT tmp_num_nzeroes, |
159 | | std::vector<Token>* JXL_RESTRICT output, |
160 | | const ImageB& qdc, const ImageI& qf, |
161 | 7.81k | const BlockCtxMap& block_ctx_map) { |
162 | 7.81k | const size_t xsize_blocks = rect.xsize(); |
163 | 7.81k | const size_t ysize_blocks = rect.ysize(); |
164 | 7.81k | output->clear(); |
165 | | // TODO(user): update the estimate: usually less coefficients are used. |
166 | 7.81k | output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize); |
167 | | |
168 | 7.81k | size_t offset[3] = {}; |
169 | 7.81k | const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow(); |
170 | 191k | for (size_t by = 0; by < ysize_blocks; ++by) { |
171 | 183k | size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1), |
172 | 183k | by >> cs.VShift(2)}; |
173 | 183k | int32_t* JXL_RESTRICT row_nzeros[3] = { |
174 | 183k | tmp_num_nzeroes->PlaneRow(0, sby[0]), |
175 | 183k | tmp_num_nzeroes->PlaneRow(1, sby[1]), |
176 | 183k | tmp_num_nzeroes->PlaneRow(2, sby[2]), |
177 | 183k | }; |
178 | 183k | const int32_t* JXL_RESTRICT row_nzeros_top[3] = { |
179 | 183k | sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1), |
180 | 183k | sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1), |
181 | 183k | sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1), |
182 | 183k | }; |
183 | 183k | const uint8_t* JXL_RESTRICT row_qdc = |
184 | 183k | qdc.ConstRow(rect.y0() + by) + rect.x0(); |
185 | 183k | const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by); |
186 | 183k | AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); |
187 | 4.83M | for (size_t bx = 0; bx < xsize_blocks; ++bx) { |
188 | 4.64M | AcStrategy acs = acs_row[bx]; |
189 | 4.64M | if (!acs.IsFirstBlock()) continue; |
190 | 2.13M | size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1), |
191 | 2.13M | bx >> cs.HShift(2)}; |
192 | 2.13M | size_t cx = acs.covered_blocks_x(); |
193 | 2.13M | size_t cy = acs.covered_blocks_y(); |
194 | 2.13M | const size_t covered_blocks = cx * cy; // = #LLF coefficients |
195 | 2.13M | const size_t log2_covered_blocks = |
196 | 2.13M | Num0BitsBelowLS1Bit_Nonzero(covered_blocks); |
197 | 2.13M | const size_t size = covered_blocks * kDCTBlockSize; |
198 | | |
199 | 2.13M | CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order |
200 | | |
201 | 6.41M | for (int c : {1, 0, 2}) { |
202 | 6.41M | if (sbx[c] << cs.HShift(c) != bx) continue; |
203 | 6.41M | if (sby[c] << cs.VShift(c) != by) continue; |
204 | 6.41M | const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c]; |
205 | | |
206 | 6.41M | int32_t nzeros = |
207 | 6.41M | (covered_blocks == 1) |
208 | 6.41M | ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c]) |
209 | 6.41M | : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks, |
210 | 1.00M | log2_covered_blocks, block, nzeros_stride, |
211 | 1.00M | row_nzeros[c] + sbx[c]); |
212 | | |
213 | 6.41M | int ord = kStrategyOrder[acs.RawStrategy()]; |
214 | 6.41M | const coeff_order_t* JXL_RESTRICT order = |
215 | 6.41M | &orders[CoeffOrderOffset(ord, c)]; |
216 | | |
217 | 6.41M | int32_t predicted_nzeros = |
218 | 6.41M | PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32); |
219 | 6.41M | size_t block_ctx = |
220 | 6.41M | block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c); |
221 | 6.41M | const int32_t nzero_ctx = |
222 | 6.41M | block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx); |
223 | | |
224 | 6.41M | output->emplace_back(nzero_ctx, nzeros); |
225 | 6.41M | const size_t histo_offset = |
226 | 6.41M | block_ctx_map.ZeroDensityContextsOffset(block_ctx); |
227 | | // Skip LLF. |
228 | 6.41M | size_t prev = (nzeros > static_cast<ptrdiff_t>(size / 16) ? 0 : 1); |
229 | 191M | for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) { |
230 | 185M | int32_t coeff = block[order[k]]; |
231 | 185M | size_t ctx = |
232 | 185M | histo_offset + ZeroDensityContext(nzeros, k, covered_blocks, |
233 | 185M | log2_covered_blocks, prev); |
234 | 185M | uint32_t u_coeff = PackSigned(coeff); |
235 | 185M | output->emplace_back(static_cast<uint32_t>(ctx), u_coeff); |
236 | 185M | prev = (coeff != 0) ? 1 : 0; |
237 | 185M | nzeros -= prev; |
238 | 185M | } |
239 | 6.41M | JXL_ENSURE(nzeros == 0); |
240 | 6.41M | offset[c] += size; |
241 | 6.41M | } |
242 | 2.13M | } |
243 | 183k | } |
244 | 7.81k | return true; |
245 | 7.81k | } Unexecuted instantiation: jxl::N_SSE4::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&) jxl::N_AVX2::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&) Line | Count | Source | 161 | 7.81k | const BlockCtxMap& block_ctx_map) { | 162 | 7.81k | const size_t xsize_blocks = rect.xsize(); | 163 | 7.81k | const size_t ysize_blocks = rect.ysize(); | 164 | 7.81k | output->clear(); | 165 | | // TODO(user): update the estimate: usually less coefficients are used. | 166 | 7.81k | output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize); | 167 | | | 168 | 7.81k | size_t offset[3] = {}; | 169 | 7.81k | const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow(); | 170 | 191k | for (size_t by = 0; by < ysize_blocks; ++by) { | 171 | 183k | size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1), | 172 | 183k | by >> cs.VShift(2)}; | 173 | 183k | int32_t* JXL_RESTRICT row_nzeros[3] = { | 174 | 183k | tmp_num_nzeroes->PlaneRow(0, sby[0]), | 175 | 183k | tmp_num_nzeroes->PlaneRow(1, sby[1]), | 176 | 183k | tmp_num_nzeroes->PlaneRow(2, sby[2]), | 177 | 183k | }; | 178 | 183k | const int32_t* JXL_RESTRICT row_nzeros_top[3] = { | 179 | 183k | sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1), | 180 | 183k | sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1), | 181 | 183k | sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1), | 182 | 183k | }; | 183 | 183k | const uint8_t* JXL_RESTRICT row_qdc = | 184 | 183k | qdc.ConstRow(rect.y0() + by) + rect.x0(); | 185 | 183k | const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by); | 186 | 183k | AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); | 187 | 4.83M | for (size_t bx = 0; bx < xsize_blocks; ++bx) { | 188 | 4.64M | AcStrategy acs = acs_row[bx]; | 189 | 4.64M | if (!acs.IsFirstBlock()) continue; | 190 | 2.13M | size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1), | 191 | 2.13M | bx >> cs.HShift(2)}; | 192 | 2.13M | size_t cx = acs.covered_blocks_x(); | 193 | 2.13M | size_t cy = acs.covered_blocks_y(); | 194 | 2.13M | const size_t covered_blocks = cx * cy; // = #LLF coefficients | 195 | 2.13M | const size_t log2_covered_blocks = | 196 | 2.13M | Num0BitsBelowLS1Bit_Nonzero(covered_blocks); | 197 | 2.13M | const size_t size = covered_blocks * kDCTBlockSize; | 198 | | | 199 | 2.13M | CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order | 200 | | | 201 | 6.41M | for (int c : {1, 0, 2}) { | 202 | 6.41M | if (sbx[c] << cs.HShift(c) != bx) continue; | 203 | 6.41M | if (sby[c] << cs.VShift(c) != by) continue; | 204 | 6.41M | const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c]; | 205 | | | 206 | 6.41M | int32_t nzeros = | 207 | 6.41M | (covered_blocks == 1) | 208 | 6.41M | ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c]) | 209 | 6.41M | : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks, | 210 | 1.00M | log2_covered_blocks, block, nzeros_stride, | 211 | 1.00M | row_nzeros[c] + sbx[c]); | 212 | | | 213 | 6.41M | int ord = kStrategyOrder[acs.RawStrategy()]; | 214 | 6.41M | const coeff_order_t* JXL_RESTRICT order = | 215 | 6.41M | &orders[CoeffOrderOffset(ord, c)]; | 216 | | | 217 | 6.41M | int32_t predicted_nzeros = | 218 | 6.41M | PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32); | 219 | 6.41M | size_t block_ctx = | 220 | 6.41M | block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c); | 221 | 6.41M | const int32_t nzero_ctx = | 222 | 6.41M | block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx); | 223 | | | 224 | 6.41M | output->emplace_back(nzero_ctx, nzeros); | 225 | 6.41M | const size_t histo_offset = | 226 | 6.41M | block_ctx_map.ZeroDensityContextsOffset(block_ctx); | 227 | | // Skip LLF. | 228 | 6.41M | size_t prev = (nzeros > static_cast<ptrdiff_t>(size / 16) ? 0 : 1); | 229 | 191M | for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) { | 230 | 185M | int32_t coeff = block[order[k]]; | 231 | 185M | size_t ctx = | 232 | 185M | histo_offset + ZeroDensityContext(nzeros, k, covered_blocks, | 233 | 185M | log2_covered_blocks, prev); | 234 | 185M | uint32_t u_coeff = PackSigned(coeff); | 235 | 185M | output->emplace_back(static_cast<uint32_t>(ctx), u_coeff); | 236 | 185M | prev = (coeff != 0) ? 1 : 0; | 237 | 185M | nzeros -= prev; | 238 | 185M | } | 239 | 6.41M | JXL_ENSURE(nzeros == 0); | 240 | 6.41M | offset[c] += size; | 241 | 6.41M | } | 242 | 2.13M | } | 243 | 183k | } | 244 | 7.81k | return true; | 245 | 7.81k | } |
Unexecuted instantiation: jxl::N_SSE2::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&) |
246 | | |
247 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
248 | | } // namespace HWY_NAMESPACE |
249 | | } // namespace jxl |
250 | | HWY_AFTER_NAMESPACE(); |
251 | | |
252 | | #if HWY_ONCE |
253 | | namespace jxl { |
254 | | HWY_EXPORT(TokenizeCoefficients); |
255 | | Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, |
256 | | const Rect& rect, |
257 | | const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, |
258 | | const AcStrategyImage& ac_strategy, |
259 | | const YCbCrChromaSubsampling& cs, |
260 | | Image3I* JXL_RESTRICT tmp_num_nzeroes, |
261 | | std::vector<Token>* JXL_RESTRICT output, |
262 | | const ImageB& qdc, const ImageI& qf, |
263 | 7.81k | const BlockCtxMap& block_ctx_map) { |
264 | 7.81k | return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)( |
265 | 7.81k | orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf, |
266 | 7.81k | block_ctx_map); |
267 | 7.81k | } |
268 | | |
269 | | } // namespace jxl |
270 | | #endif // HWY_ONCE |