/src/astc-encoder/Source/astcenc_weight_align.cpp
Line | Count | Source |
1 | | // SPDX-License-Identifier: Apache-2.0 |
2 | | // ---------------------------------------------------------------------------- |
3 | | // Copyright 2011-2026 Arm Limited |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | | // use this file except in compliance with the License. You may obtain a copy |
7 | | // of the License at: |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | | // License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // ---------------------------------------------------------------------------- |
17 | | |
18 | | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
19 | | |
20 | | /** |
21 | | * @brief Functions for angular-sum algorithm for weight alignment. |
22 | | * |
23 | | * This algorithm works as follows: |
24 | | * - we compute a complex number P as (cos s*i, sin s*i) for each weight, |
25 | | * where i is the input value and s is a scaling factor based on the spacing between the weights. |
26 | | * - we then add together complex numbers for all the weights. |
27 | | * - we then compute the length and angle of the resulting sum. |
28 | | * |
29 | | * This should produce the following results: |
30 | | * - perfect alignment results in a vector whose length is equal to the sum of lengths of all inputs |
31 | | * - even distribution results in a vector of length 0. |
32 | | * - all samples identical results in perfect alignment for every scaling. |
33 | | * |
34 | | * For each scaling factor within a given set, we compute an alignment factor from 0 to 1. This |
35 | | * should then result in some scalings standing out as having particularly good alignment factors; |
36 | | * we can use this to produce a set of candidate scale/shift values for various quantization levels; |
37 | | * we should then actually try them and see what happens. |
38 | | */ |
39 | | |
40 | | #include "astcenc_internal.h" |
41 | | #include "astcenc_vecmathlib.h" |
42 | | |
43 | | #include <stdio.h> |
44 | | #include <cassert> |
45 | | #include <cstring> |
46 | | #include <cfloat> |
47 | | |
48 | | static constexpr unsigned int ANGULAR_STEPS { 32 }; |
49 | | |
50 | | static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, |
51 | | "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH"); |
52 | | |
53 | | static_assert(ANGULAR_STEPS >= 32, |
54 | | "ANGULAR_STEPS must be at least max(steps_for_quant_level)"); |
55 | | |
56 | | // Store a reduced sin/cos table for 64 possible weight values; this causes |
57 | | // slight quality loss compared to using sin() and cos() directly. Must be 2^N. |
58 | | static constexpr unsigned int SINCOS_STEPS { 64 }; |
59 | | |
60 | | static const uint8_t steps_for_quant_level[12] { |
61 | | 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32 |
62 | | }; |
63 | | |
64 | | ASTCENC_ALIGNAS static float sin_table[SINCOS_STEPS][ANGULAR_STEPS]; |
65 | | ASTCENC_ALIGNAS static float cos_table[SINCOS_STEPS][ANGULAR_STEPS]; |
66 | | |
67 | | #if defined(ASTCENC_DIAGNOSTICS) |
68 | | static bool print_once { true }; |
69 | | #endif |
70 | | |
71 | | /* See header for documentation. */ |
72 | | void prepare_angular_tables() |
73 | 3.62k | { |
74 | 119k | for (unsigned int i = 0; i < ANGULAR_STEPS; i++) |
75 | 116k | { |
76 | 116k | float angle_step = static_cast<float>(i + 1); |
77 | | |
78 | 7.54M | for (unsigned int j = 0; j < SINCOS_STEPS; j++) |
79 | 7.43M | { |
80 | 7.43M | sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j))); |
81 | 7.43M | cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j))); |
82 | 7.43M | } |
83 | 116k | } |
84 | 3.62k | } |
85 | | |
86 | | /** |
87 | | * @brief Compute the angular alignment factors and offsets. |
88 | | * |
89 | | * @param weight_count The number of (decimated) weights. |
90 | | * @param dec_weight_ideal_value The ideal decimated unquantized weight values. |
91 | | * @param max_angular_steps The maximum number of steps to be tested. |
92 | | * @param[out] offsets The output angular offsets array. |
93 | | */ |
94 | | static void compute_angular_offsets( |
95 | | unsigned int weight_count, |
96 | | const float* dec_weight_ideal_value, |
97 | | unsigned int max_angular_steps, |
98 | | float* offsets |
99 | 148k | ) { |
100 | 148k | promise(weight_count > 0); |
101 | 148k | promise(max_angular_steps > 0); |
102 | | |
103 | 148k | ASTCENC_ALIGNAS int isamplev[BLOCK_MAX_WEIGHTS]; |
104 | | |
105 | | // Precompute isample; arrays are always allocated 64 elements long |
106 | 965k | for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) |
107 | 816k | { |
108 | | // Ideal weight can be outside [0, 1] range, so clamp to fit table |
109 | 816k | vfloat ideal_weight = clampzo(loada(dec_weight_ideal_value + i)); |
110 | | |
111 | | // Convert a weight to a sincos table index |
112 | 816k | vfloat sample = ideal_weight * (SINCOS_STEPS - 1.0f); |
113 | 816k | vint isample = float_to_int_rtn(sample); |
114 | 816k | storea(isample, isamplev + i); |
115 | 816k | } |
116 | | |
117 | | // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max |
118 | 148k | vfloat mult(1.0f / (2.0f * astc::PI)); |
119 | | |
120 | 457k | for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) |
121 | 308k | { |
122 | 308k | vfloat anglesum_x = vfloat::zero(); |
123 | 308k | vfloat anglesum_y = vfloat::zero(); |
124 | | |
125 | 5.90M | for (unsigned int j = 0; j < weight_count; j++) |
126 | 5.60M | { |
127 | 5.60M | int isample = isamplev[j]; |
128 | 5.60M | anglesum_x += loada(cos_table[isample] + i); |
129 | 5.60M | anglesum_y += loada(sin_table[isample] + i); |
130 | 5.60M | } |
131 | | |
132 | 308k | vfloat angle = atan2(anglesum_y, anglesum_x); |
133 | | |
134 | | // Suppress NaNs generated if anglesums are both zero |
135 | 308k | angle = select(vfloat::zero(), angle, angle == angle); |
136 | | |
137 | 308k | vfloat ofs = angle * mult; |
138 | 308k | storea(ofs, offsets + i); |
139 | 308k | } |
140 | 148k | } |
141 | | |
142 | | /** |
143 | | * @brief For a given step size compute the lowest and highest weight. |
144 | | * |
145 | | * Compute the lowest and highest weight that results from quantizing using the given stepsize and |
146 | | * offset, and then compute the resulting error. The cut errors indicate the error that results from |
147 | | * forcing samples that should have had one weight value one step up or down. |
148 | | * |
149 | | * @param weight_count The number of (decimated) weights. |
150 | | * @param dec_weight_ideal_value The ideal decimated unquantized weight values. |
151 | | * @param max_angular_steps The maximum number of steps to be tested. |
152 | | * @param max_quant_steps The maximum quantization level to be tested. |
153 | | * @param offsets The angular offsets array. |
154 | | * @param[out] lowest_weight Per angular step, the lowest weight. |
155 | | * @param[out] weight_span Per angular step, the span between lowest and highest weight. |
156 | | * @param[out] error Per angular step, the error. |
157 | | * @param[out] cut_low_weight_error Per angular step, the low weight cut error. |
158 | | * @param[out] cut_high_weight_error Per angular step, the high weight cut error. |
159 | | */ |
160 | | static void compute_lowest_and_highest_weight( |
161 | | unsigned int weight_count, |
162 | | const float* dec_weight_ideal_value, |
163 | | unsigned int max_angular_steps, |
164 | | unsigned int max_quant_steps, |
165 | | const float* offsets, |
166 | | float* lowest_weight, |
167 | | int* weight_span, |
168 | | float* error, |
169 | | float* cut_low_weight_error, |
170 | | float* cut_high_weight_error |
171 | 148k | ) { |
172 | 148k | promise(weight_count > 0); |
173 | 148k | promise(max_angular_steps > 0); |
174 | | |
175 | 148k | vfloat rcp_stepsize = int_to_float(vint::lane_id()) + vfloat(1.0f); |
176 | | |
177 | | // Compute minimum/maximum weights in the weight array. Our remapping |
178 | | // is monotonic, so the min/max rounded weights relate to the min/max |
179 | | // unrounded weights in a straightforward way. |
180 | 148k | vfloat min_weight(FLT_MAX); |
181 | 148k | vfloat max_weight(-FLT_MAX); |
182 | | |
183 | 148k | vint lane_id = vint::lane_id(); |
184 | 965k | for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) |
185 | 816k | { |
186 | 816k | vmask active = lane_id < vint(weight_count); |
187 | 816k | lane_id += vint(ASTCENC_SIMD_WIDTH); |
188 | | |
189 | 816k | vfloat weights = loada(dec_weight_ideal_value + i); |
190 | 816k | min_weight = min(select(min_weight, weights, active), min_weight); |
191 | 816k | max_weight = max(select(max_weight, weights, active), max_weight); |
192 | 816k | } |
193 | | |
194 | 148k | min_weight = hmin(min_weight); |
195 | 148k | max_weight = hmax(max_weight); |
196 | | |
197 | | // Arrays are ANGULAR_STEPS long, so always safe to run full vectors |
198 | 457k | for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) |
199 | 308k | { |
200 | 308k | vfloat errval = vfloat::zero(); |
201 | 308k | vfloat cut_low_weight_err = vfloat::zero(); |
202 | 308k | vfloat cut_high_weight_err = vfloat::zero(); |
203 | 308k | vfloat offset = loada(offsets + sp); |
204 | | |
205 | | // We know the min and max weight values, so we can figure out |
206 | | // the corresponding indices before we enter the loop. |
207 | 308k | vfloat minidx = round(min_weight * rcp_stepsize - offset); |
208 | 308k | vfloat maxidx = round(max_weight * rcp_stepsize - offset); |
209 | | |
210 | 5.90M | for (unsigned int j = 0; j < weight_count; j++) |
211 | 5.60M | { |
212 | 5.60M | vfloat sval = load1(dec_weight_ideal_value + j) * rcp_stepsize - offset; |
213 | 5.60M | vfloat svalrte = round(sval); |
214 | 5.60M | vfloat diff = sval - svalrte; |
215 | 5.60M | errval += diff * diff; |
216 | | |
217 | | // Accumulate errors for minimum index |
218 | 5.60M | vmask mask = svalrte == minidx; |
219 | 5.60M | vfloat accum = cut_low_weight_err + vfloat(1.0f) - vfloat(2.0f) * diff; |
220 | 5.60M | cut_low_weight_err = select(cut_low_weight_err, accum, mask); |
221 | | |
222 | | // Accumulate errors for maximum index |
223 | 5.60M | mask = svalrte == maxidx; |
224 | 5.60M | accum = cut_high_weight_err + vfloat(1.0f) + vfloat(2.0f) * diff; |
225 | 5.60M | cut_high_weight_err = select(cut_high_weight_err, accum, mask); |
226 | 5.60M | } |
227 | | |
228 | | // Write out min weight and weight span; clamp span to a usable range |
229 | 308k | vint span = float_to_int(maxidx - minidx + vfloat(1)); |
230 | 308k | span = min(span, vint(max_quant_steps + 3)); |
231 | 308k | span = max(span, vint(2)); |
232 | 308k | storea(minidx, lowest_weight + sp); |
233 | 308k | storea(span, weight_span + sp); |
234 | | |
235 | | // The cut_(lowest/highest)_weight_error indicate the error that results from forcing |
236 | | // samples that should have had the weight value one step (up/down). |
237 | 308k | vfloat ssize = 1.0f / rcp_stepsize; |
238 | 308k | vfloat errscale = ssize * ssize; |
239 | 308k | storea(errval * errscale, error + sp); |
240 | 308k | storea(cut_low_weight_err * errscale, cut_low_weight_error + sp); |
241 | 308k | storea(cut_high_weight_err * errscale, cut_high_weight_error + sp); |
242 | | |
243 | 308k | rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); |
244 | 308k | } |
245 | 148k | } |
246 | | |
247 | | /** |
248 | | * @brief The main function for the angular algorithm. |
249 | | * |
250 | | * @param weight_count The number of (decimated) weights. |
251 | | * @param dec_weight_ideal_value The ideal decimated unquantized weight values. |
252 | | * @param max_quant_level The maximum quantization level to be tested. |
253 | | * @param[out] low_value Per angular step, the lowest weight value. |
254 | | * @param[out] high_value Per angular step, the highest weight value. |
255 | | */ |
256 | | static void compute_angular_endpoints_for_quant_levels( |
257 | | unsigned int weight_count, |
258 | | const float* dec_weight_ideal_value, |
259 | | unsigned int max_quant_level, |
260 | | float low_value[TUNE_MAX_ANGULAR_QUANT + 1], |
261 | | float high_value[TUNE_MAX_ANGULAR_QUANT + 1] |
262 | 148k | ) { |
263 | 148k | unsigned int max_quant_steps = steps_for_quant_level[max_quant_level]; |
264 | 148k | unsigned int max_angular_steps = steps_for_quant_level[max_quant_level]; |
265 | | |
266 | 148k | ASTCENC_ALIGNAS float angular_offsets[ANGULAR_STEPS]; |
267 | | |
268 | 148k | compute_angular_offsets(weight_count, dec_weight_ideal_value, |
269 | 148k | max_angular_steps, angular_offsets); |
270 | | |
271 | 148k | ASTCENC_ALIGNAS float lowest_weight[ANGULAR_STEPS]; |
272 | 148k | ASTCENC_ALIGNAS int32_t weight_span[ANGULAR_STEPS]; |
273 | 148k | ASTCENC_ALIGNAS float error[ANGULAR_STEPS]; |
274 | 148k | ASTCENC_ALIGNAS float cut_low_weight_error[ANGULAR_STEPS]; |
275 | 148k | ASTCENC_ALIGNAS float cut_high_weight_error[ANGULAR_STEPS]; |
276 | | |
277 | 148k | compute_lowest_and_highest_weight(weight_count, dec_weight_ideal_value, |
278 | 148k | max_angular_steps, max_quant_steps, |
279 | 148k | angular_offsets, lowest_weight, weight_span, error, |
280 | 148k | cut_low_weight_error, cut_high_weight_error); |
281 | | |
282 | | // For each quantization level, find the best error terms. Use packed vectors so data-dependent |
283 | | // branches can become selects. This involves some integer to float casts, but the values are |
284 | | // small enough so they never round the wrong way. |
285 | 148k | vfloat4 best_results[36]; |
286 | | |
287 | | // Initialize the array to some safe defaults |
288 | 148k | promise(max_quant_steps > 0); |
289 | 1.86M | for (unsigned int i = 0; i < (max_quant_steps + 4); i++) |
290 | 1.71M | { |
291 | | // Lane<0> = Best error |
292 | | // Lane<1> = Best scale; -1 indicates no solution found |
293 | | // Lane<2> = Cut low weight |
294 | 1.71M | best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f); |
295 | 1.71M | } |
296 | | |
297 | 148k | promise(max_angular_steps > 0); |
298 | 1.27M | for (unsigned int i = 0; i < max_angular_steps; i++) |
299 | 1.12M | { |
300 | 1.12M | float i_flt = static_cast<float>(i); |
301 | | |
302 | 1.12M | int idx_span = weight_span[i]; |
303 | | |
304 | 1.12M | float error_cut_low = error[i] + cut_low_weight_error[i]; |
305 | 1.12M | float error_cut_high = error[i] + cut_high_weight_error[i]; |
306 | 1.12M | float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i]; |
307 | | |
308 | | // Check best error against record N |
309 | 1.12M | vfloat4 best_result = best_results[idx_span]; |
310 | 1.12M | vfloat4 new_result = vfloat4(error[i], i_flt, 0.0f, 0.0f); |
311 | 1.12M | vmask4 mask = vfloat4(best_result.lane<0>()) > vfloat4(error[i]); |
312 | 1.12M | best_results[idx_span] = select(best_result, new_result, mask); |
313 | | |
314 | | // Check best error against record N-1 with either cut low or cut high |
315 | 1.12M | best_result = best_results[idx_span - 1]; |
316 | | |
317 | 1.12M | new_result = vfloat4(error_cut_low, i_flt, 1.0f, 0.0f); |
318 | 1.12M | mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low); |
319 | 1.12M | best_result = select(best_result, new_result, mask); |
320 | | |
321 | 1.12M | new_result = vfloat4(error_cut_high, i_flt, 0.0f, 0.0f); |
322 | 1.12M | mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_high); |
323 | 1.12M | best_results[idx_span - 1] = select(best_result, new_result, mask); |
324 | | |
325 | | // Check best error against record N-2 with both cut low and high |
326 | 1.12M | best_result = best_results[idx_span - 2]; |
327 | 1.12M | new_result = vfloat4(error_cut_low_high, i_flt, 1.0f, 0.0f); |
328 | 1.12M | mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low_high); |
329 | 1.12M | best_results[idx_span - 2] = select(best_result, new_result, mask); |
330 | 1.12M | } |
331 | | |
332 | 935k | for (unsigned int i = 0; i <= max_quant_level; i++) |
333 | 786k | { |
334 | 786k | unsigned int q = steps_for_quant_level[i]; |
335 | 786k | int bsi = static_cast<int>(best_results[q].lane<1>()); |
336 | | |
337 | | // Did we find anything? |
338 | | #if defined(ASTCENC_DIAGNOSTICS) |
339 | | if ((bsi < 0) && print_once) |
340 | | { |
341 | | print_once = false; |
342 | | printf("INFO: Unable to find full encoding within search error limit.\n\n"); |
343 | | } |
344 | | #endif |
345 | | |
346 | 786k | bsi = astc::max(0, bsi); |
347 | | |
348 | 786k | float lwi = lowest_weight[bsi] + best_results[q].lane<2>(); |
349 | 786k | float hwi = lwi + static_cast<float>(q) - 1.0f; |
350 | | |
351 | 786k | float stepsize = 1.0f / (1.0f + static_cast<float>(bsi)); |
352 | 786k | low_value[i] = (angular_offsets[bsi] + lwi) * stepsize; |
353 | 786k | high_value[i] = (angular_offsets[bsi] + hwi) * stepsize; |
354 | 786k | } |
355 | 148k | } |
356 | | |
357 | | /* See header for documentation. */ |
358 | | void compute_angular_endpoints_1plane( |
359 | | bool only_always, |
360 | | const block_size_descriptor& bsd, |
361 | | const float* dec_weight_ideal_value, |
362 | | unsigned int max_weight_quant, |
363 | | compression_working_buffers& tmpbuf |
364 | 11.1k | ) { |
365 | 11.1k | float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; |
366 | 11.1k | float (&high_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1; |
367 | | |
368 | 11.1k | float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1; |
369 | 11.1k | float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1; |
370 | | |
371 | 11.1k | unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always |
372 | 11.1k | : bsd.decimation_mode_count_selected; |
373 | 11.1k | promise(max_decimation_modes > 0); |
374 | 120k | for (unsigned int i = 0; i < max_decimation_modes; i++) |
375 | 109k | { |
376 | 109k | const decimation_mode& dm = bsd.decimation_modes[i]; |
377 | 109k | if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant))) |
378 | 35.9k | { |
379 | 35.9k | continue; |
380 | 35.9k | } |
381 | | |
382 | 73.0k | unsigned int weight_count = bsd.get_decimation_info(i).weight_count; |
383 | | |
384 | 73.0k | unsigned int max_precision = dm.maxprec_1plane; |
385 | 73.0k | if (max_precision > TUNE_MAX_ANGULAR_QUANT) |
386 | 42.0k | { |
387 | 42.0k | max_precision = TUNE_MAX_ANGULAR_QUANT; |
388 | 42.0k | } |
389 | | |
390 | 73.0k | if (max_precision > max_weight_quant) |
391 | 26.3k | { |
392 | 26.3k | max_precision = max_weight_quant; |
393 | 26.3k | } |
394 | | |
395 | 73.0k | compute_angular_endpoints_for_quant_levels( |
396 | 73.0k | weight_count, |
397 | 73.0k | dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS, |
398 | 73.0k | max_precision, low_values[i], high_values[i]); |
399 | 73.0k | } |
400 | | |
401 | 11.1k | unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always |
402 | 11.1k | : bsd.block_mode_count_1plane_selected; |
403 | 11.1k | promise(max_block_modes > 0); |
404 | 286k | for (unsigned int i = 0; i < max_block_modes; i++) |
405 | 275k | { |
406 | 275k | const block_mode& bm = bsd.block_modes[i]; |
407 | 275k | assert(!bm.is_dual_plane); |
408 | | |
409 | 275k | unsigned int quant_mode = bm.quant_mode; |
410 | 275k | unsigned int decim_mode = bm.decimation_mode; |
411 | | |
412 | 275k | if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) |
413 | 222k | { |
414 | 222k | low_value[i] = low_values[decim_mode][quant_mode]; |
415 | 222k | high_value[i] = high_values[decim_mode][quant_mode]; |
416 | 222k | } |
417 | 53.3k | else |
418 | 53.3k | { |
419 | 53.3k | low_value[i] = 0.0f; |
420 | 53.3k | high_value[i] = 1.0f; |
421 | 53.3k | } |
422 | 275k | } |
423 | 11.1k | } |
424 | | |
425 | | /* See header for documentation. */ |
426 | | void compute_angular_endpoints_2planes( |
427 | | const block_size_descriptor& bsd, |
428 | | const float* dec_weight_ideal_value, |
429 | | unsigned int max_weight_quant, |
430 | | compression_working_buffers& tmpbuf |
431 | 7.06k | ) { |
432 | 7.06k | float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; |
433 | 7.06k | float (&high_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1; |
434 | 7.06k | float (&low_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value2; |
435 | 7.06k | float (&high_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value2; |
436 | | |
437 | 7.06k | float (&low_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1; |
438 | 7.06k | float (&high_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1; |
439 | 7.06k | float (&low_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values2; |
440 | 7.06k | float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values2; |
441 | | |
442 | 7.06k | promise(bsd.decimation_mode_count_selected > 0); |
443 | 72.0k | for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++) |
444 | 65.0k | { |
445 | 65.0k | const decimation_mode& dm = bsd.decimation_modes[i]; |
446 | 65.0k | if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant))) |
447 | 27.1k | { |
448 | 27.1k | continue; |
449 | 27.1k | } |
450 | | |
451 | 37.8k | unsigned int weight_count = bsd.get_decimation_info(i).weight_count; |
452 | | |
453 | 37.8k | unsigned int max_precision = dm.maxprec_2planes; |
454 | 37.8k | if (max_precision > TUNE_MAX_ANGULAR_QUANT) |
455 | 15.6k | { |
456 | 15.6k | max_precision = TUNE_MAX_ANGULAR_QUANT; |
457 | 15.6k | } |
458 | | |
459 | 37.8k | if (max_precision > max_weight_quant) |
460 | 10.1k | { |
461 | 10.1k | max_precision = max_weight_quant; |
462 | 10.1k | } |
463 | | |
464 | 37.8k | compute_angular_endpoints_for_quant_levels( |
465 | 37.8k | weight_count, |
466 | 37.8k | dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS, |
467 | 37.8k | max_precision, low_values1[i], high_values1[i]); |
468 | | |
469 | 37.8k | compute_angular_endpoints_for_quant_levels( |
470 | 37.8k | weight_count, |
471 | 37.8k | dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET, |
472 | 37.8k | max_precision, low_values2[i], high_values2[i]); |
473 | 37.8k | } |
474 | | |
475 | 7.06k | unsigned int start = bsd.block_mode_count_1plane_selected; |
476 | 7.06k | unsigned int end = bsd.block_mode_count_1plane_2plane_selected; |
477 | 77.2k | for (unsigned int i = start; i < end; i++) |
478 | 70.2k | { |
479 | 70.2k | const block_mode& bm = bsd.block_modes[i]; |
480 | 70.2k | unsigned int quant_mode = bm.quant_mode; |
481 | 70.2k | unsigned int decim_mode = bm.decimation_mode; |
482 | | |
483 | 70.2k | if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) |
484 | 58.8k | { |
485 | 58.8k | low_value1[i] = low_values1[decim_mode][quant_mode]; |
486 | 58.8k | high_value1[i] = high_values1[decim_mode][quant_mode]; |
487 | 58.8k | low_value2[i] = low_values2[decim_mode][quant_mode]; |
488 | 58.8k | high_value2[i] = high_values2[decim_mode][quant_mode]; |
489 | 58.8k | } |
490 | 11.3k | else |
491 | 11.3k | { |
492 | 11.3k | low_value1[i] = 0.0f; |
493 | 11.3k | high_value1[i] = 1.0f; |
494 | 11.3k | low_value2[i] = 0.0f; |
495 | 11.3k | high_value2[i] = 1.0f; |
496 | 11.3k | } |
497 | 70.2k | } |
498 | 7.06k | } |
499 | | |
500 | | #endif |