/src/astc-encoder/Source/astcenc_entry.cpp
Line | Count | Source |
1 | | // SPDX-License-Identifier: Apache-2.0 |
2 | | // ---------------------------------------------------------------------------- |
3 | | // Copyright 2011-2026 Arm Limited |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | | // use this file except in compliance with the License. You may obtain a copy |
7 | | // of the License at: |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | | // License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // ---------------------------------------------------------------------------- |
17 | | |
18 | | /** |
19 | | * @brief Functions for the library entrypoint. |
20 | | */ |
21 | | |
22 | | #include <array> |
23 | | #include <cstring> |
24 | | #include <new> |
25 | | |
26 | | #include "astcenc.h" |
27 | | #include "astcenc_diagnostic_trace.h" |
28 | | #include "astcenc_internal_entry.h" |
29 | | #include "astcenc_mathlib.h" |
30 | | |
31 | | /** |
32 | | * @brief Record of the quality tuning parameter values. |
33 | | * |
34 | | * See the @c astcenc_config structure for detailed parameter documentation. |
35 | | * |
36 | | * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit. |
37 | | * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios |
38 | | * for the more through search presets because the underlying db_limit is so much higher. |
39 | | */ |
40 | | struct astcenc_preset_config |
41 | | { |
42 | | float quality; |
43 | | unsigned int tune_partition_count_limit; |
44 | | unsigned int tune_2partition_index_limit; |
45 | | unsigned int tune_3partition_index_limit; |
46 | | unsigned int tune_4partition_index_limit; |
47 | | unsigned int tune_block_mode_limit; |
48 | | unsigned int tune_refinement_limit; |
49 | | unsigned int tune_candidate_limit; |
50 | | unsigned int tune_2partitioning_candidate_limit; |
51 | | unsigned int tune_3partitioning_candidate_limit; |
52 | | unsigned int tune_4partitioning_candidate_limit; |
53 | | float tune_db_limit_a_base; |
54 | | float tune_db_limit_b_base; |
55 | | float tune_mse_overshoot; |
56 | | float tune_2partition_early_out_limit_factor; |
57 | | float tune_3partition_early_out_limit_factor; |
58 | | float tune_2plane_early_out_limit_correlation; |
59 | | float tune_search_mode0_enable; |
60 | | }; |
61 | | |
62 | | /** |
63 | | * @brief The static presets for high bandwidth encodings (x < 25 texels per block). |
64 | | */ |
65 | | static const std::array<astcenc_preset_config, 6> preset_configs_high {{ |
66 | | { |
67 | | ASTCENC_PRE_FASTEST, |
68 | | 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.00f, 1.00f, 0.85f, 0.0f |
69 | | }, { |
70 | | ASTCENC_PRE_FAST, |
71 | | 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.00f, 1.00f, 0.90f, 0.0f |
72 | | }, { |
73 | | ASTCENC_PRE_MEDIUM, |
74 | | 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.10f, 1.05f, 0.95f, 0.0f |
75 | | }, { |
76 | | ASTCENC_PRE_THOROUGH, |
77 | | 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f |
78 | | }, { |
79 | | ASTCENC_PRE_VERYTHOROUGH, |
80 | | 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 0.0f |
81 | | }, { |
82 | | ASTCENC_PRE_EXHAUSTIVE, |
83 | | 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 0.0f |
84 | | } |
85 | | }}; |
86 | | |
87 | | /** |
88 | | * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block). |
89 | | */ |
90 | | static const std::array<astcenc_preset_config, 6> preset_configs_mid {{ |
91 | | { |
92 | | ASTCENC_PRE_FASTEST, |
93 | | 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.00f, 1.00f, 0.80f, 1.0f |
94 | | }, { |
95 | | ASTCENC_PRE_FAST, |
96 | | 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.00f, 1.00f, 0.85f, 1.0f |
97 | | }, { |
98 | | ASTCENC_PRE_MEDIUM, |
99 | | 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.10f, 1.05f, 0.90f, 1.0f |
100 | | }, { |
101 | | ASTCENC_PRE_THOROUGH, |
102 | | 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.40f, 1.20f, 0.95f, 0.0f |
103 | | }, { |
104 | | ASTCENC_PRE_VERYTHOROUGH, |
105 | | 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 0.0f |
106 | | }, { |
107 | | ASTCENC_PRE_EXHAUSTIVE, |
108 | | 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 0.0f |
109 | | } |
110 | | }}; |
111 | | |
112 | | /** |
113 | | * @brief The static presets for low bandwidth encodings (64 <= x texels per block). |
114 | | */ |
115 | | static const std::array<astcenc_preset_config, 6> preset_configs_low {{ |
116 | | { |
117 | | ASTCENC_PRE_FASTEST, |
118 | | 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.00f, 1.00f, 0.80f, 1.0f |
119 | | }, { |
120 | | ASTCENC_PRE_FAST, |
121 | | 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.00f, 1.00f, 0.85f, 1.0f |
122 | | }, { |
123 | | ASTCENC_PRE_MEDIUM, |
124 | | 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.10f, 1.05f, 0.90f, 1.0f |
125 | | }, { |
126 | | ASTCENC_PRE_THOROUGH, |
127 | | 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.30f, 1.20f, 0.97f, 1.0f |
128 | | }, { |
129 | | ASTCENC_PRE_VERYTHOROUGH, |
130 | | 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 1.0f |
131 | | }, { |
132 | | ASTCENC_PRE_EXHAUSTIVE, |
133 | | 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 1.0f |
134 | | } |
135 | | }}; |
136 | | |
137 | | /** |
138 | | * @brief Get the total number of texels in an image. |
139 | | * |
140 | | * This function validates that the total size would fit in a size_t and returns |
141 | | * 0 if it does not. |
142 | | * |
143 | | * @param texels_x Number of texels in the X axis. |
144 | | * @param texels_y Number of texels in the Y axis. |
145 | | * @param texels_z Number of texels in the Z axis. |
146 | | * |
147 | | * @return The number of texels in the image, or zero if total size would not |
148 | | * fit into a size_t. |
149 | | */ |
150 | | static size_t get_texels_count( |
151 | | size_t texels_x, |
152 | | size_t texels_y, |
153 | | size_t texels_z |
154 | 3.34k | ) { |
155 | 3.34k | bool overflow { false }; |
156 | | |
157 | | // Compute texel count |
158 | 3.34k | size_t texels_count = astc::mul_safe(texels_x, texels_y, overflow); |
159 | 3.34k | texels_count = astc::mul_safe(texels_count, texels_z, overflow); |
160 | | |
161 | 3.34k | if (overflow) |
162 | 0 | { |
163 | 0 | return 0; |
164 | 0 | } |
165 | | |
166 | 3.34k | return texels_count; |
167 | 3.34k | } |
168 | | |
169 | | /** |
170 | | * @brief Get the total number of blocks in an image. |
171 | | * |
172 | | * This function also validates that the total size of the compressed image, |
173 | | * in bytes, would fit in a size_t. |
174 | | * |
175 | | * @param blocks_x Number of blocks in the X axis. |
176 | | * @param blocks_y Number of blocks in the Y axis. |
177 | | * @param blocks_z Number of blocks in the Z axis. |
178 | | * |
179 | | * @return The number of blocks in the image, or zero if total size would not |
180 | | * fit into a size_t. |
181 | | */ |
182 | | static size_t get_blocks_count( |
183 | | size_t blocks_x, |
184 | | size_t blocks_y, |
185 | | size_t blocks_z |
186 | 5.58k | ) { |
187 | 5.58k | bool overflow { false }; |
188 | | |
189 | | // Compute block count |
190 | 5.58k | size_t blocks_count = astc::mul_safe(blocks_x, blocks_y, overflow); |
191 | 5.58k | blocks_count = astc::mul_safe(blocks_count, blocks_z, overflow); |
192 | | |
193 | | // Also compute byte count, but we only use overflow and not the result |
194 | 5.58k | astc::mul_safe(blocks_count, 16, overflow); |
195 | | |
196 | 5.58k | if (overflow) |
197 | 0 | { |
198 | 0 | return 0; |
199 | 0 | } |
200 | | |
201 | 5.58k | return blocks_count; |
202 | 5.58k | } |
203 | | |
204 | | /** |
205 | | * @brief Validate CPU floating point meets assumptions made in the codec. |
206 | | * |
207 | | * The codec is written with the assumption that float bit patterns are valid |
208 | | * IEEE754 values that are stored and reloaded with round-to-nearest rounding. |
209 | | * This is always the case in an IEEE-754 compliant system, however not every |
210 | | * system or compilation mode is actually IEEE-754 compliant. This normally |
211 | | * fails if the code is compiled with fast math enabled, for example. |
212 | | * |
213 | | * @return Return @c ASTCENC_SUCCESS if validated, an error on failure. |
214 | | */ |
215 | | static astcenc_error validate_cpu_float() |
216 | 7.25k | { |
217 | 7.25k | volatile float xprec_testval = 2.51f; |
218 | 7.25k | float store = xprec_testval + 12582912.0f; |
219 | 7.25k | float q = store - 12582912.0f; |
220 | | |
221 | 7.25k | if (q != 3.0f) |
222 | 0 | { |
223 | 0 | return ASTCENC_ERR_BAD_CPU_FLOAT; |
224 | 0 | } |
225 | | |
226 | 7.25k | return ASTCENC_SUCCESS; |
227 | 7.25k | } |
228 | | |
229 | | /** |
230 | | * @brief Validate config profile. |
231 | | * |
232 | | * @param profile The profile to check. |
233 | | * |
234 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
235 | | */ |
236 | | static astcenc_error validate_profile( |
237 | | astcenc_profile profile |
238 | 3.60k | ) { |
239 | | // Values in this enum are from an external user, so not guaranteed to be |
240 | | // bounded to the enum values |
241 | 3.60k | switch (static_cast<int>(profile)) |
242 | 3.60k | { |
243 | 1.18k | case ASTCENC_PRF_LDR_SRGB: |
244 | 1.85k | case ASTCENC_PRF_LDR: |
245 | 2.72k | case ASTCENC_PRF_HDR_RGB_LDR_A: |
246 | 3.60k | case ASTCENC_PRF_HDR: |
247 | 3.60k | return ASTCENC_SUCCESS; |
248 | 0 | default: |
249 | 0 | return ASTCENC_ERR_BAD_PROFILE; |
250 | 3.60k | } |
251 | 3.60k | } |
252 | | |
253 | | /** |
254 | | * @brief Validate block size. |
255 | | * |
256 | | * @param block_x The block x dimensions. |
257 | | * @param block_y The block y dimensions. |
258 | | * @param block_z The block z dimensions. |
259 | | * |
260 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
261 | | */ |
262 | | static astcenc_error validate_block_size( |
263 | | unsigned int block_x, |
264 | | unsigned int block_y, |
265 | | unsigned int block_z |
266 | 7.25k | ) { |
267 | | // Test if this is a legal block size at all |
268 | 7.25k | bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || |
269 | 0 | ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z))); |
270 | 7.25k | if (!is_legal) |
271 | 0 | { |
272 | 0 | return ASTCENC_ERR_BAD_BLOCK_SIZE; |
273 | 0 | } |
274 | | |
275 | | // Test if this build has sufficient capacity for this block size |
276 | 7.25k | bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS; |
277 | 7.25k | if (!have_capacity) |
278 | 0 | { |
279 | 0 | return ASTCENC_ERR_NOT_IMPLEMENTED; |
280 | 0 | } |
281 | | |
282 | 7.25k | return ASTCENC_SUCCESS; |
283 | 7.25k | } |
284 | | |
285 | | /** |
286 | | * @brief Validate flags. |
287 | | * |
288 | | * @param profile The profile to check. |
289 | | * @param flags The flags to check. |
290 | | * |
291 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
292 | | */ |
293 | | static astcenc_error validate_flags( |
294 | | astcenc_profile profile, |
295 | | unsigned int flags |
296 | 7.25k | ) { |
297 | | // Flags field must not contain any unknown flag bits |
298 | 7.25k | unsigned int exMask = ~ASTCENC_ALL_FLAGS; |
299 | 7.25k | if (popcount(flags & exMask) != 0) |
300 | 0 | { |
301 | 0 | return ASTCENC_ERR_BAD_FLAGS; |
302 | 0 | } |
303 | | |
304 | | // Flags field must only contain at most a single map type |
305 | 7.25k | exMask = ASTCENC_FLG_MAP_NORMAL |
306 | 7.25k | | ASTCENC_FLG_MAP_RGBM; |
307 | 7.25k | if (popcount(flags & exMask) > 1) |
308 | 29 | { |
309 | 29 | return ASTCENC_ERR_BAD_FLAGS; |
310 | 29 | } |
311 | | |
312 | | // Decode_unorm8 must only be used with an LDR profile |
313 | 7.22k | bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8; |
314 | 7.22k | bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A); |
315 | 7.22k | if (is_unorm8 && is_hdr) |
316 | 18 | { |
317 | 18 | return ASTCENC_ERR_BAD_DECODE_MODE; |
318 | 18 | } |
319 | | |
320 | 7.20k | return ASTCENC_SUCCESS; |
321 | 7.22k | } |
322 | | |
323 | | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
324 | | |
325 | | /** |
326 | | * @brief Validate single channel compression swizzle. |
327 | | * |
328 | | * @param swizzle The swizzle to check. |
329 | | * |
330 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
331 | | */ |
332 | | static astcenc_error validate_compression_swz( |
333 | | astcenc_swz swizzle |
334 | 8.94k | ) { |
335 | | // Not all enum values are handled; SWZ_Z is invalid for compression |
336 | 8.94k | switch (static_cast<int>(swizzle)) |
337 | 8.94k | { |
338 | 2.23k | case ASTCENC_SWZ_R: |
339 | 4.47k | case ASTCENC_SWZ_G: |
340 | 6.70k | case ASTCENC_SWZ_B: |
341 | 8.94k | case ASTCENC_SWZ_A: |
342 | 8.94k | case ASTCENC_SWZ_0: |
343 | 8.94k | case ASTCENC_SWZ_1: |
344 | 8.94k | return ASTCENC_SUCCESS; |
345 | 0 | default: |
346 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
347 | 8.94k | } |
348 | 8.94k | } |
349 | | |
350 | | /** |
351 | | * @brief Validate overall compression swizzle. |
352 | | * |
353 | | * @param swizzle The swizzle to check. |
354 | | * |
355 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
356 | | */ |
357 | | static astcenc_error validate_compression_swizzle( |
358 | | const astcenc_swizzle& swizzle |
359 | 2.23k | ) { |
360 | 2.23k | if (validate_compression_swz(swizzle.r) || |
361 | 2.23k | validate_compression_swz(swizzle.g) || |
362 | 2.23k | validate_compression_swz(swizzle.b) || |
363 | 2.23k | validate_compression_swz(swizzle.a)) |
364 | 0 | { |
365 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
366 | 0 | } |
367 | | |
368 | 2.23k | return ASTCENC_SUCCESS; |
369 | 2.23k | } |
370 | | #endif |
371 | | |
372 | | /** |
373 | | * @brief Validate single channel decompression swizzle. |
374 | | * |
375 | | * @param swizzle The swizzle to check. |
376 | | * |
377 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
378 | | */ |
379 | | static astcenc_error validate_decompression_swz( |
380 | | astcenc_swz swizzle |
381 | 4.45k | ) { |
382 | | // Values in this enum are from an external user, so not guaranteed to be |
383 | | // bounded to the enum values |
384 | 4.45k | switch (static_cast<int>(swizzle)) |
385 | 4.45k | { |
386 | 1.11k | case ASTCENC_SWZ_R: |
387 | 2.22k | case ASTCENC_SWZ_G: |
388 | 3.34k | case ASTCENC_SWZ_B: |
389 | 4.45k | case ASTCENC_SWZ_A: |
390 | 4.45k | case ASTCENC_SWZ_0: |
391 | 4.45k | case ASTCENC_SWZ_1: |
392 | 4.45k | case ASTCENC_SWZ_Z: |
393 | 4.45k | return ASTCENC_SUCCESS; |
394 | 0 | default: |
395 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
396 | 4.45k | } |
397 | 4.45k | } |
398 | | |
399 | | /** |
400 | | * @brief Validate overall decompression swizzle. |
401 | | * |
402 | | * @param swizzle The swizzle to check. |
403 | | * |
404 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
405 | | */ |
406 | | static astcenc_error validate_decompression_swizzle( |
407 | | const astcenc_swizzle& swizzle |
408 | 1.11k | ) { |
409 | 1.11k | if (validate_decompression_swz(swizzle.r) || |
410 | 1.11k | validate_decompression_swz(swizzle.g) || |
411 | 1.11k | validate_decompression_swz(swizzle.b) || |
412 | 1.11k | validate_decompression_swz(swizzle.a)) |
413 | 0 | { |
414 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
415 | 0 | } |
416 | | |
417 | 1.11k | return ASTCENC_SUCCESS; |
418 | 1.11k | } |
419 | | |
420 | | /** |
421 | | * Validate that an incoming configuration is in-spec. |
422 | | * |
423 | | * This function can respond in two ways: |
424 | | * |
425 | | * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown |
426 | | * for out-of-range inputs in this case. |
427 | | * * Numerical inputs and logic inputs are are logically invalid and which make no sense |
428 | | * algorithmically will return an error. |
429 | | * |
430 | | * @param[in,out] config The input compressor configuration. |
431 | | * |
432 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
433 | | */ |
434 | | static astcenc_error validate_config( |
435 | | astcenc_config &config |
436 | 3.60k | ) { |
437 | 3.60k | astcenc_error status; |
438 | | |
439 | 3.60k | status = validate_profile(config.profile); |
440 | 3.60k | if (status != ASTCENC_SUCCESS) |
441 | 0 | { |
442 | 0 | return status; |
443 | 0 | } |
444 | | |
445 | 3.60k | status = validate_flags(config.profile, config.flags); |
446 | 3.60k | if (status != ASTCENC_SUCCESS) |
447 | 0 | { |
448 | 0 | return status; |
449 | 0 | } |
450 | | |
451 | 3.60k | status = validate_block_size(config.block_x, config.block_y, config.block_z); |
452 | 3.60k | if (status != ASTCENC_SUCCESS) |
453 | 0 | { |
454 | 0 | return status; |
455 | 0 | } |
456 | | |
457 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
458 | | // Decompress-only builds only support decompress-only contexts |
459 | | if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) |
460 | | { |
461 | | return ASTCENC_ERR_BAD_PARAM; |
462 | | } |
463 | | #endif |
464 | | |
465 | 3.60k | config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); |
466 | | |
467 | 3.60k | config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); |
468 | 3.60k | config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
469 | 3.60k | config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
470 | 3.60k | config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
471 | 3.60k | config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); |
472 | 3.60k | config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); |
473 | 3.60k | config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); |
474 | 3.60k | config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
475 | 3.60k | config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
476 | 3.60k | config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
477 | 3.60k | config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); |
478 | 3.60k | config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); |
479 | 3.60k | config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f); |
480 | 3.60k | config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f); |
481 | 3.60k | config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f); |
482 | | |
483 | | // Specifying a zero weight color component is not allowed; force to small value |
484 | 3.60k | float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), |
485 | 3.60k | astc::max(config.cw_b_weight, config.cw_a_weight)); |
486 | 3.60k | if (max_weight > 0.0f) |
487 | 3.60k | { |
488 | 3.60k | max_weight /= 1000.0f; |
489 | 3.60k | config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); |
490 | 3.60k | config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); |
491 | 3.60k | config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); |
492 | 3.60k | config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); |
493 | 3.60k | } |
494 | | // If all color components error weights are zero then return an error |
495 | 0 | else |
496 | 0 | { |
497 | 0 | return ASTCENC_ERR_BAD_PARAM; |
498 | 0 | } |
499 | | |
500 | 3.60k | return ASTCENC_SUCCESS; |
501 | 3.60k | } |
502 | | |
503 | | /* See header for documentation. */ |
504 | | astcenc_error astcenc_config_init( |
505 | | astcenc_profile profile, |
506 | | unsigned int block_x, |
507 | | unsigned int block_y, |
508 | | unsigned int block_z, |
509 | | float quality, |
510 | | unsigned int flags, |
511 | | astcenc_config* configp |
512 | 3.65k | ) { |
513 | 3.65k | astcenc_error status; |
514 | | |
515 | 3.65k | status = validate_cpu_float(); |
516 | 3.65k | if (status != ASTCENC_SUCCESS) |
517 | 0 | { |
518 | 0 | return status; |
519 | 0 | } |
520 | | |
521 | | // Zero init all config fields; although most of will be over written |
522 | 3.65k | astcenc_config& config = *configp; |
523 | 3.65k | std::memset(&config, 0, sizeof(config)); |
524 | | |
525 | | // Process the block size |
526 | | // For 2D blocks Z==0 is accepted, but convert to 1 |
527 | 3.65k | block_z = astc::max(block_z, 1u); |
528 | 3.65k | status = validate_block_size(block_x, block_y, block_z); |
529 | 3.65k | if (status != ASTCENC_SUCCESS) |
530 | 0 | { |
531 | 0 | return status; |
532 | 0 | } |
533 | | |
534 | 3.65k | config.block_x = block_x; |
535 | 3.65k | config.block_y = block_y; |
536 | 3.65k | config.block_z = block_z; |
537 | | |
538 | 3.65k | float texels = static_cast<float>(block_x * block_y * block_z); |
539 | 3.65k | float ltexels = logf(texels) / logf(10.0f); |
540 | | |
541 | | // Process the performance quality level or preset; note that this must be done before we |
542 | | // process any additional settings, such as color profile and flags, which may replace some of |
543 | | // these settings with more use case tuned values |
544 | 3.65k | if (quality < ASTCENC_PRE_FASTEST || |
545 | 3.65k | quality > ASTCENC_PRE_EXHAUSTIVE) |
546 | 0 | { |
547 | 0 | return ASTCENC_ERR_BAD_QUALITY; |
548 | 0 | } |
549 | | |
550 | 3.65k | static const std::array<astcenc_preset_config, 6>* preset_configs; |
551 | 3.65k | size_t texels_int = block_x * block_y * block_z; |
552 | 3.65k | if (texels_int < 25) |
553 | 1.97k | { |
554 | 1.97k | preset_configs = &preset_configs_high; |
555 | 1.97k | } |
556 | 1.67k | else if (texels_int < 64) |
557 | 1.01k | { |
558 | 1.01k | preset_configs = &preset_configs_mid; |
559 | 1.01k | } |
560 | 660 | else |
561 | 660 | { |
562 | 660 | preset_configs = &preset_configs_low; |
563 | 660 | } |
564 | | |
565 | | // Determine which preset to use, or which pair to interpolate |
566 | 3.65k | size_t start; |
567 | 3.65k | size_t end; |
568 | 10.3k | for (end = 0; end < preset_configs->size(); end++) |
569 | 10.3k | { |
570 | 10.3k | if ((*preset_configs)[end].quality >= quality) |
571 | 3.65k | { |
572 | 3.65k | break; |
573 | 3.65k | } |
574 | 10.3k | } |
575 | | |
576 | 3.65k | start = end == 0 ? 0 : end - 1; |
577 | | |
578 | | // Start and end node are the same - so just transfer the values. |
579 | 3.65k | if (start == end) |
580 | 175 | { |
581 | 175 | config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit; |
582 | 175 | config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit; |
583 | 175 | config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit; |
584 | 175 | config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit; |
585 | 175 | config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit; |
586 | 175 | config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit; |
587 | 175 | config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit; |
588 | 175 | config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit; |
589 | 175 | config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit; |
590 | 175 | config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit; |
591 | 175 | config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels, |
592 | 175 | (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels); |
593 | | |
594 | 175 | config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot; |
595 | | |
596 | 175 | config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor; |
597 | 175 | config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor; |
598 | 175 | config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation; |
599 | 175 | config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable; |
600 | 175 | } |
601 | | // Start and end node are not the same - so interpolate between them |
602 | 3.47k | else |
603 | 3.47k | { |
604 | 3.47k | auto& node_a = (*preset_configs)[start]; |
605 | 3.47k | auto& node_b = (*preset_configs)[end]; |
606 | | |
607 | 3.47k | float wt_range = node_b.quality - node_a.quality; |
608 | 3.47k | assert(wt_range > 0); |
609 | | |
610 | | // Compute interpolation factors |
611 | 3.47k | float wt_node_a = (node_b.quality - quality) / wt_range; |
612 | 3.47k | float wt_node_b = (quality - node_a.quality) / wt_range; |
613 | | |
614 | 24.3k | #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b)) |
615 | 34.7k | #define LERPI(param) astc::flt2int_rtn(\ |
616 | 34.7k | (static_cast<float>(node_a.param) * wt_node_a) + \ |
617 | 34.7k | (static_cast<float>(node_b.param) * wt_node_b)) |
618 | 13.9k | #define LERPUI(param) static_cast<unsigned int>(LERPI(param)) |
619 | | |
620 | 3.47k | config.tune_partition_count_limit = LERPI(tune_partition_count_limit); |
621 | 3.47k | config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); |
622 | 3.47k | config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit); |
623 | 3.47k | config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit); |
624 | 3.47k | config.tune_block_mode_limit = LERPI(tune_block_mode_limit); |
625 | 3.47k | config.tune_refinement_limit = LERPI(tune_refinement_limit); |
626 | 3.47k | config.tune_candidate_limit = LERPUI(tune_candidate_limit); |
627 | 3.47k | config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit); |
628 | 3.47k | config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit); |
629 | 3.47k | config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit); |
630 | 3.47k | config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, |
631 | 3.47k | LERP(tune_db_limit_b_base) - 19 * ltexels); |
632 | | |
633 | 3.47k | config.tune_mse_overshoot = LERP(tune_mse_overshoot); |
634 | | |
635 | 3.47k | config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor); |
636 | 3.47k | config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor); |
637 | 3.47k | config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation); |
638 | 3.47k | config.tune_search_mode0_enable = LERP(tune_search_mode0_enable); |
639 | 3.47k | #undef LERP |
640 | 3.47k | #undef LERPI |
641 | 3.47k | #undef LERPUI |
642 | 3.47k | } |
643 | | |
644 | | // Set heuristics to the defaults for each color profile |
645 | 3.65k | config.cw_r_weight = 1.0f; |
646 | 3.65k | config.cw_g_weight = 1.0f; |
647 | 3.65k | config.cw_b_weight = 1.0f; |
648 | 3.65k | config.cw_a_weight = 1.0f; |
649 | | |
650 | 3.65k | config.a_scale_radius = 0; |
651 | | |
652 | 3.65k | config.rgbm_m_scale = 0.0f; |
653 | | |
654 | 3.65k | config.profile = profile; |
655 | | |
656 | | // Values in this enum are from an external user, so not guaranteed to be |
657 | | // bounded to the enum values |
658 | 3.65k | switch (static_cast<int>(profile)) |
659 | 3.65k | { |
660 | 676 | case ASTCENC_PRF_LDR: |
661 | 1.87k | case ASTCENC_PRF_LDR_SRGB: |
662 | 1.87k | break; |
663 | 883 | case ASTCENC_PRF_HDR_RGB_LDR_A: |
664 | 1.78k | case ASTCENC_PRF_HDR: |
665 | 1.78k | config.tune_db_limit = 999.0f; |
666 | 1.78k | config.tune_search_mode0_enable = 0.0f; |
667 | 1.78k | break; |
668 | 0 | default: |
669 | 0 | return ASTCENC_ERR_BAD_PROFILE; |
670 | 3.65k | } |
671 | | |
672 | | // Flags field must not contain any unknown flag bits |
673 | 3.65k | status = validate_flags(profile, flags); |
674 | 3.65k | if (status != ASTCENC_SUCCESS) |
675 | 47 | { |
676 | 47 | return status; |
677 | 47 | } |
678 | | |
679 | 3.60k | if (flags & ASTCENC_FLG_MAP_NORMAL) |
680 | 1.01k | { |
681 | | // Normal map encoding uses L+A blocks, so allow one more partitioning |
682 | | // than normal. We need need fewer bits for endpoints, so more likely |
683 | | // to be able to use more partitions than an RGB/RGBA block |
684 | 1.01k | config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); |
685 | | |
686 | 1.01k | config.cw_g_weight = 0.0f; |
687 | 1.01k | config.cw_b_weight = 0.0f; |
688 | 1.01k | config.tune_2partition_early_out_limit_factor *= 1.5f; |
689 | 1.01k | config.tune_3partition_early_out_limit_factor *= 1.5f; |
690 | 1.01k | config.tune_2plane_early_out_limit_correlation = 0.99f; |
691 | | |
692 | | // Normals are prone to blocking artifacts on smooth curves |
693 | | // so force compressor to try harder here ... |
694 | 1.01k | config.tune_db_limit *= 1.03f; |
695 | 1.01k | } |
696 | 2.58k | else if (flags & ASTCENC_FLG_MAP_RGBM) |
697 | 937 | { |
698 | 937 | config.rgbm_m_scale = 5.0f; |
699 | 937 | config.cw_a_weight = 2.0f * config.rgbm_m_scale; |
700 | 937 | } |
701 | 1.65k | else // (This is color data) |
702 | 1.65k | { |
703 | | // This is a very basic perceptual metric for RGB color data, which weights error |
704 | | // significance by the perceptual luminance contribution of each color channel. For |
705 | | // luminance the usual weights to compute luminance from a linear RGB value are as |
706 | | // follows: |
707 | | // |
708 | | // l = r * 0.3 + g * 0.59 + b * 0.11 |
709 | | // |
710 | | // ... but we scale these up to keep a better balance between color and alpha. Note |
711 | | // that if the content is using alpha we'd recommend using the -a option to weight |
712 | | // the color contribution by the alpha transparency. |
713 | 1.65k | if (flags & ASTCENC_FLG_USE_PERCEPTUAL) |
714 | 233 | { |
715 | 233 | config.cw_r_weight = 0.30f * 2.25f; |
716 | 233 | config.cw_g_weight = 0.59f * 2.25f; |
717 | 233 | config.cw_b_weight = 0.11f * 2.25f; |
718 | 233 | } |
719 | 1.65k | } |
720 | 3.60k | config.flags = flags; |
721 | | |
722 | 3.60k | return ASTCENC_SUCCESS; |
723 | 3.65k | } |
724 | | |
725 | | /* See header for documentation. */ |
726 | | astcenc_error astcenc_context_alloc( |
727 | | const astcenc_config* configp, |
728 | | unsigned int thread_count, |
729 | | astcenc_context** context, |
730 | | const astcenc_context* parent_context |
731 | 3.60k | ) { |
732 | 3.60k | astcenc_error status; |
733 | | |
734 | 3.60k | status = validate_cpu_float(); |
735 | 3.60k | if (status != ASTCENC_SUCCESS) |
736 | 0 | { |
737 | 0 | return status; |
738 | 0 | } |
739 | | |
740 | 3.60k | if (thread_count == 0) |
741 | 0 | { |
742 | 0 | return ASTCENC_ERR_BAD_PARAM; |
743 | 0 | } |
744 | | |
745 | | #if defined(ASTCENC_DIAGNOSTICS) |
746 | | // Force single threaded compressor use in diagnostic mode |
747 | | if (thread_count != 1) |
748 | | { |
749 | | return ASTCENC_ERR_BAD_PARAM; |
750 | | } |
751 | | #endif |
752 | | |
753 | | // Exactly one of config or parent_context must be set |
754 | 3.60k | bool has_config = configp != nullptr; |
755 | 3.60k | bool has_parent = parent_context != nullptr; |
756 | 3.60k | if (!(has_config ^ has_parent)) |
757 | 0 | { |
758 | 0 | return ASTCENC_ERR_BAD_PARAM; |
759 | 0 | } |
760 | | |
761 | 3.60k | if (has_parent) |
762 | 0 | { |
763 | 0 | configp = &parent_context->context.config; |
764 | 0 | } |
765 | | |
766 | 3.60k | const astcenc_config& config = *configp; |
767 | 3.60k | astcenc_context* ctxo = new astcenc_context; |
768 | 3.60k | astcenc_contexti* ctx = &ctxo->context; |
769 | 3.60k | ctx->thread_count = thread_count; |
770 | 3.60k | ctx->config = *configp; |
771 | 3.60k | ctx->working_buffers = nullptr; |
772 | | |
773 | | // These are allocated per-compress, as they depend on image size |
774 | 3.60k | ctx->input_alpha_averages = nullptr; |
775 | | |
776 | | // Copy the config first and validate the copy (we may modify it) |
777 | 3.60k | status = validate_config(ctx->config); |
778 | 3.60k | if (status != ASTCENC_SUCCESS) |
779 | 0 | { |
780 | 0 | delete ctxo; |
781 | 0 | return status; |
782 | 0 | } |
783 | | |
784 | 3.60k | if (!parent_context) |
785 | 3.60k | { |
786 | 3.60k | block_size_descriptor* bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN); |
787 | 3.60k | if (!bsd) |
788 | 0 | { |
789 | 0 | delete ctxo; |
790 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
791 | 0 | } |
792 | | |
793 | 3.60k | bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); |
794 | 3.60k | init_block_size_descriptor(config.block_x, config.block_y, config.block_z, |
795 | 3.60k | can_omit_modes, |
796 | 3.60k | config.tune_partition_count_limit, |
797 | 3.60k | static_cast<float>(config.tune_block_mode_limit) / 100.0f, |
798 | 3.60k | *bsd); |
799 | | |
800 | 3.60k | ctx->owns_bsd = true; |
801 | 3.60k | ctx->bsd = bsd; |
802 | 3.60k | } |
803 | 0 | else |
804 | 0 | { |
805 | 0 | ctx->owns_bsd = false; |
806 | 0 | ctx->bsd = parent_context->context.bsd; |
807 | 0 | } |
808 | | |
809 | 3.60k | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
810 | | // Do setup only needed by compression |
811 | 3.60k | if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) |
812 | 3.29k | { |
813 | | // Turn a dB limit into a per-texel error for faster use later |
814 | 3.29k | if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) |
815 | 1.65k | { |
816 | 1.65k | ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; |
817 | 1.65k | } |
818 | 1.63k | else |
819 | 1.63k | { |
820 | 1.63k | ctx->config.tune_db_limit = 0.0f; |
821 | 1.63k | } |
822 | | |
823 | 3.29k | size_t worksize = sizeof(compression_working_buffers) * thread_count; |
824 | 3.29k | ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN); |
825 | 3.29k | static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0), |
826 | 3.29k | "compression_working_buffers size must be multiple of vector alignment"); |
827 | 3.29k | if (!ctx->working_buffers) |
828 | 0 | { |
829 | 0 | if (ctx->owns_bsd) |
830 | 0 | { |
831 | 0 | aligned_free<const block_size_descriptor>(ctx->bsd); |
832 | 0 | } |
833 | 0 | delete ctxo; |
834 | 0 | *context = nullptr; |
835 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
836 | 0 | } |
837 | 3.29k | } |
838 | 3.60k | #endif |
839 | | |
840 | | #if defined(ASTCENC_DIAGNOSTICS) |
841 | | ctx->trace_log = new TraceLog(ctx->config.trace_file_path); |
842 | | if (!ctx->trace_log->m_file) |
843 | | { |
844 | | return ASTCENC_ERR_DTRACE_FAILURE; |
845 | | } |
846 | | |
847 | | trace_add_data("block_x", config.block_x); |
848 | | trace_add_data("block_y", config.block_y); |
849 | | trace_add_data("block_z", config.block_z); |
850 | | #endif |
851 | | |
852 | 3.60k | *context = ctxo; |
853 | | |
854 | 3.60k | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
855 | 3.60k | prepare_angular_tables(); |
856 | 3.60k | #endif |
857 | | |
858 | 3.60k | return ASTCENC_SUCCESS; |
859 | 3.60k | } |
860 | | |
861 | | /* See header for documentation. */ |
862 | | void astcenc_context_free( |
863 | | astcenc_context* ctxo |
864 | 3.60k | ) { |
865 | 3.60k | if (ctxo) |
866 | 3.60k | { |
867 | 3.60k | astcenc_contexti* ctx = &ctxo->context; |
868 | 3.60k | aligned_free<compression_working_buffers>(ctx->working_buffers); |
869 | 3.60k | if (ctx->owns_bsd) |
870 | 3.60k | { |
871 | 3.60k | aligned_free<const block_size_descriptor>(ctx->bsd); |
872 | 3.60k | } |
873 | | #if defined(ASTCENC_DIAGNOSTICS) |
874 | | delete ctx->trace_log; |
875 | | #endif |
876 | 3.60k | delete ctxo; |
877 | 3.60k | } |
878 | 3.60k | } |
879 | | |
880 | | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
881 | | |
882 | | /** |
883 | | * @brief Compress an image, after any preflight has completed. |
884 | | * |
885 | | * @param[out] ctxo The compressor context. |
886 | | * @param thread_index The thread index. |
887 | | * @param image The input image. |
888 | | * @param swizzle The input swizzle. |
889 | | * @param[out] buffer The output array for the compressed data. |
890 | | */ |
891 | | static void compress_image( |
892 | | astcenc_context& ctxo, |
893 | | unsigned int thread_index, |
894 | | const astcenc_image& image, |
895 | | const astcenc_swizzle& swizzle, |
896 | | uint8_t* buffer |
897 | 2.23k | ) { |
898 | 2.23k | astcenc_contexti& ctx = ctxo.context; |
899 | 2.23k | const block_size_descriptor& bsd = *ctx.bsd; |
900 | 2.23k | astcenc_profile decode_mode = ctx.config.profile; |
901 | | |
902 | 2.23k | image_block blk; |
903 | | |
904 | 2.23k | size_t block_x = bsd.dim_x; |
905 | 2.23k | size_t block_y = bsd.dim_y; |
906 | 2.23k | size_t block_z = bsd.dim_z; |
907 | 2.23k | blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); |
908 | | |
909 | 2.23k | size_t dim_x = image.dim_x; |
910 | 2.23k | size_t dim_y = image.dim_y; |
911 | 2.23k | size_t dim_z = image.dim_z; |
912 | | |
913 | 2.23k | size_t blocks_x = astc::get_block_count_safe(dim_x, block_x); |
914 | 2.23k | size_t blocks_y = astc::get_block_count_safe(dim_y, block_y); |
915 | 2.23k | size_t blocks_z = astc::get_block_count_safe(dim_z, block_z); |
916 | | |
917 | 2.23k | size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z); |
918 | | // Should never fail here - tested in caller before calling here |
919 | 2.23k | assert(block_count > 0); |
920 | | |
921 | | |
922 | 2.23k | size_t row_blocks = blocks_x; |
923 | 2.23k | size_t plane_blocks = blocks_x * blocks_y; |
924 | | |
925 | 2.23k | blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8; |
926 | | |
927 | | // Populate the block channel weights |
928 | 2.23k | blk.channel_weight = vfloat4(ctx.config.cw_r_weight, |
929 | 2.23k | ctx.config.cw_g_weight, |
930 | 2.23k | ctx.config.cw_b_weight, |
931 | 2.23k | ctx.config.cw_a_weight); |
932 | | |
933 | | // Use preallocated scratch buffer |
934 | 2.23k | auto& temp_buffers = ctx.working_buffers[thread_index]; |
935 | | |
936 | | // Only the first thread actually runs the initializer |
937 | 2.23k | ctxo.manage_compress.init(block_count, ctx.config.progress_callback); |
938 | | |
939 | | // Determine if we can use an optimized load function |
940 | 2.23k | bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) || |
941 | 2.23k | (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A); |
942 | | |
943 | 2.23k | bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) || |
944 | 1.69k | (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A); |
945 | | |
946 | 2.23k | bool use_fast_load = !needs_swz && !needs_hdr && |
947 | 1.05k | block_z == 1 && image.data_type == ASTCENC_TYPE_U8; |
948 | | |
949 | 2.23k | auto load_func = load_image_block; |
950 | 2.23k | if (use_fast_load) |
951 | 1.05k | { |
952 | 1.05k | load_func = load_image_block_fast_ldr; |
953 | 1.05k | } |
954 | | |
955 | | // All threads run this processing loop until there is no work remaining |
956 | 4.47k | while (true) |
957 | 4.47k | { |
958 | 4.47k | size_t count; |
959 | 4.47k | size_t base = ctxo.manage_compress.get_task_assignment(16, count); |
960 | 4.47k | if (!count) |
961 | 2.23k | { |
962 | 2.23k | break; |
963 | 2.23k | } |
964 | | |
965 | 4.47k | for (size_t i = base; i < base + count; i++) |
966 | 2.23k | { |
967 | | // Decode i into x, y, z block indices |
968 | 2.23k | size_t z = i / plane_blocks; |
969 | 2.23k | size_t rem = i - (z * plane_blocks); |
970 | 2.23k | size_t y = rem / row_blocks; |
971 | 2.23k | size_t x = rem - (y * row_blocks); |
972 | | |
973 | | // Test if we can apply some basic alpha-scale RDO |
974 | 2.23k | bool use_full_block = true; |
975 | 2.23k | if (ctx.config.a_scale_radius != 0 && block_z == 1) |
976 | 0 | { |
977 | 0 | size_t start_x = x * block_x; |
978 | 0 | size_t end_x = astc::min(dim_x, start_x + block_x); |
979 | |
|
980 | 0 | size_t start_y = y * block_y; |
981 | 0 | size_t end_y = astc::min(dim_y, start_y + block_y); |
982 | | |
983 | | // SATs accumulate error, so don't test exactly zero. Test for |
984 | | // less than 1 alpha in the expanded block footprint that |
985 | | // includes the alpha radius. |
986 | 0 | size_t x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1); |
987 | 0 | size_t y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1); |
988 | |
|
989 | 0 | float footprint = static_cast<float>(x_footprint * y_footprint); |
990 | 0 | float threshold = 0.9f / (255.0f * footprint); |
991 | | |
992 | | // Do we have any alpha values? |
993 | 0 | use_full_block = false; |
994 | 0 | for (size_t ay = start_y; ay < end_y; ay++) |
995 | 0 | { |
996 | 0 | for (size_t ax = start_x; ax < end_x; ax++) |
997 | 0 | { |
998 | 0 | float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; |
999 | 0 | if (a_avg > threshold) |
1000 | 0 | { |
1001 | 0 | use_full_block = true; |
1002 | 0 | ax = end_x; |
1003 | 0 | ay = end_y; |
1004 | 0 | } |
1005 | 0 | } |
1006 | 0 | } |
1007 | 0 | } |
1008 | | |
1009 | | // Fetch the full block for compression |
1010 | 2.23k | if (use_full_block) |
1011 | 2.23k | { |
1012 | 2.23k | load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); |
1013 | | |
1014 | | // Scale RGB error contribution by the maximum alpha in the block |
1015 | | // This encourages preserving alpha accuracy in regions with high |
1016 | | // transparency, and can buy up to 0.5 dB PSNR. |
1017 | 2.23k | if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) |
1018 | 897 | { |
1019 | 897 | float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f); |
1020 | 897 | blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale, |
1021 | 897 | ctx.config.cw_g_weight * alpha_scale, |
1022 | 897 | ctx.config.cw_b_weight * alpha_scale, |
1023 | 897 | ctx.config.cw_a_weight); |
1024 | 897 | } |
1025 | 2.23k | } |
1026 | | // Apply alpha scale RDO - substitute constant color block |
1027 | 0 | else |
1028 | 0 | { |
1029 | 0 | blk.origin_texel = vfloat4::zero(); |
1030 | 0 | blk.data_min = vfloat4::zero(); |
1031 | 0 | blk.data_mean = vfloat4::zero(); |
1032 | 0 | blk.data_max = vfloat4::zero(); |
1033 | 0 | blk.grayscale = true; |
1034 | 0 | } |
1035 | | |
1036 | 2.23k | size_t offset = ((z * blocks_y + y) * blocks_x + x) * 16; |
1037 | 2.23k | uint8_t *bp = buffer + offset; |
1038 | 2.23k | compress_block(ctx, blk, bp, temp_buffers); |
1039 | 2.23k | } |
1040 | | |
1041 | 2.23k | ctxo.manage_compress.complete_task_assignment(count); |
1042 | 2.23k | } |
1043 | 2.23k | } |
1044 | | |
1045 | | /** |
1046 | | * @brief Compute regional averages in an image. |
1047 | | * |
1048 | | * This function can be called by multiple threads, but only after a single |
1049 | | * thread calls the setup function @c init_compute_averages(). |
1050 | | * |
1051 | | * Results are written back into @c img->input_alpha_averages. |
1052 | | * |
1053 | | * @param[out] ctx The context. |
1054 | | * @param ag The average and variance arguments created during setup. |
1055 | | */ |
1056 | | static void compute_averages( |
1057 | | astcenc_context& ctx, |
1058 | | const avg_args &ag |
1059 | 0 | ) { |
1060 | 0 | pixel_region_args arg = ag.arg; |
1061 | 0 | arg.work_memory = new vfloat4[ag.work_memory_size]; |
1062 | |
|
1063 | 0 | size_t size_x = ag.img_size_x; |
1064 | 0 | size_t size_y = ag.img_size_y; |
1065 | 0 | size_t size_z = ag.img_size_z; |
1066 | |
|
1067 | 0 | size_t step_xy = ag.blk_size_xy; |
1068 | 0 | size_t step_z = ag.blk_size_z; |
1069 | |
|
1070 | 0 | size_t tasks_y = (size_y + step_xy - 1) / step_xy; |
1071 | | |
1072 | | // All threads run this processing loop until there is no work remaining |
1073 | 0 | while (true) |
1074 | 0 | { |
1075 | 0 | size_t count; |
1076 | 0 | size_t base = ctx.manage_avg.get_task_assignment(16, count); |
1077 | 0 | if (!count) |
1078 | 0 | { |
1079 | 0 | break; |
1080 | 0 | } |
1081 | | |
1082 | 0 | for (size_t i = base; i < base + count; i++) |
1083 | 0 | { |
1084 | 0 | size_t z_task = i / tasks_y; |
1085 | 0 | size_t y_task = i - (z_task * tasks_y); |
1086 | |
|
1087 | 0 | size_t z = z_task * step_z; |
1088 | 0 | size_t y = y_task * step_xy; |
1089 | |
|
1090 | 0 | arg.size_z = astc::min(step_z, size_z - z); |
1091 | 0 | arg.offset_z = z; |
1092 | |
|
1093 | 0 | arg.size_y = astc::min(step_xy, size_y - y); |
1094 | 0 | arg.offset_y = y; |
1095 | |
|
1096 | 0 | for (size_t x = 0; x < size_x; x += step_xy) |
1097 | 0 | { |
1098 | 0 | arg.size_x = astc::min(step_xy, size_x - x); |
1099 | 0 | arg.offset_x = x; |
1100 | 0 | compute_pixel_region_variance(ctx.context, arg); |
1101 | 0 | } |
1102 | 0 | } |
1103 | |
|
1104 | 0 | ctx.manage_avg.complete_task_assignment(count); |
1105 | 0 | } |
1106 | |
|
1107 | 0 | delete[] arg.work_memory; |
1108 | 0 | } |
1109 | | |
1110 | | #endif |
1111 | | |
1112 | | /* See header for documentation. */ |
1113 | | astcenc_error astcenc_compress_image( |
1114 | | astcenc_context* ctxo, |
1115 | | astcenc_image* imagep, |
1116 | | const astcenc_swizzle* swizzle, |
1117 | | uint8_t* data_out, |
1118 | | size_t data_len, |
1119 | | unsigned int thread_index |
1120 | 2.23k | ) { |
1121 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1122 | | (void)ctxo; |
1123 | | (void)imagep; |
1124 | | (void)swizzle; |
1125 | | (void)data_out; |
1126 | | (void)data_len; |
1127 | | (void)thread_index; |
1128 | | return ASTCENC_ERR_BAD_CONTEXT; |
1129 | | #else |
1130 | 2.23k | astcenc_contexti* ctx = &ctxo->context; |
1131 | 2.23k | astcenc_error status; |
1132 | 2.23k | astcenc_image& image = *imagep; |
1133 | | |
1134 | 2.23k | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1135 | 0 | { |
1136 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1137 | 0 | } |
1138 | | |
1139 | 2.23k | status = validate_compression_swizzle(*swizzle); |
1140 | 2.23k | if (status != ASTCENC_SUCCESS) |
1141 | 0 | { |
1142 | 0 | return status; |
1143 | 0 | } |
1144 | | |
1145 | 2.23k | if (thread_index >= ctx->thread_count) |
1146 | 0 | { |
1147 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1148 | 0 | } |
1149 | | |
1150 | 2.23k | size_t dim_x = image.dim_x; |
1151 | 2.23k | size_t dim_y = image.dim_y; |
1152 | 2.23k | size_t dim_z = image.dim_z; |
1153 | | |
1154 | 2.23k | size_t texel_count = get_texels_count(dim_x, dim_y, dim_z); |
1155 | | // Cumulative texel sizes would overflow a size_t |
1156 | 2.23k | if (texel_count == 0) |
1157 | 0 | { |
1158 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1159 | 0 | } |
1160 | | |
1161 | 2.23k | size_t block_x = ctx->config.block_x; |
1162 | 2.23k | size_t block_y = ctx->config.block_y; |
1163 | 2.23k | size_t block_z = ctx->config.block_z; |
1164 | | |
1165 | 2.23k | size_t blocks_x = astc::get_block_count_safe(dim_x, block_x); |
1166 | 2.23k | size_t blocks_y = astc::get_block_count_safe(dim_y, block_y); |
1167 | 2.23k | size_t blocks_z = astc::get_block_count_safe(dim_z, block_z); |
1168 | | |
1169 | 2.23k | size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z); |
1170 | | // Cumulative block sizes would overflow a size_t |
1171 | 2.23k | if (block_count == 0) |
1172 | 0 | { |
1173 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1174 | 0 | } |
1175 | | |
1176 | | // Check we have enough output space, size_needed calc cannot overflow as |
1177 | | // get_blocks_count() already validated that a byte count would fit |
1178 | 2.23k | size_t size_needed = block_count * 16; |
1179 | 2.23k | if (data_len < size_needed) |
1180 | 0 | { |
1181 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
1182 | 0 | } |
1183 | | |
1184 | | // If context thread count is one then implicitly reset |
1185 | 2.23k | if (ctx->thread_count == 1) |
1186 | 2.23k | { |
1187 | 2.23k | astcenc_compress_reset(ctxo); |
1188 | 2.23k | } |
1189 | | |
1190 | 2.23k | if (ctx->config.a_scale_radius != 0) |
1191 | 0 | { |
1192 | | // First thread to enter will do setup, other threads will subsequently |
1193 | | // enter the critical section but simply skip over the initialization |
1194 | 0 | auto init_avg = [ctx, &image, swizzle, texel_count]() { |
1195 | | // Perform memory allocations for the destination buffers |
1196 | 0 | ctx->input_alpha_averages = new float[texel_count]; |
1197 | |
|
1198 | 0 | return init_compute_averages( |
1199 | 0 | image, ctx->config.a_scale_radius, *swizzle, |
1200 | 0 | ctx->avg_preprocess_args); |
1201 | 0 | }; |
1202 | | |
1203 | | // Only the first thread actually runs the initializer |
1204 | 0 | ctxo->manage_avg.init(init_avg); |
1205 | | |
1206 | | // All threads will enter this function and dynamically grab work |
1207 | 0 | compute_averages(*ctxo, ctx->avg_preprocess_args); |
1208 | 0 | } |
1209 | | |
1210 | | // Wait for compute_averages to complete before compressing |
1211 | 2.23k | ctxo->manage_avg.wait(); |
1212 | | |
1213 | 2.23k | compress_image(*ctxo, thread_index, image, *swizzle, data_out); |
1214 | | |
1215 | | // Wait for compress to complete before freeing memory |
1216 | 2.23k | ctxo->manage_compress.wait(); |
1217 | | |
1218 | 2.23k | auto term_compress = [ctx]() { |
1219 | 2.23k | delete[] ctx->input_alpha_averages; |
1220 | 2.23k | ctx->input_alpha_averages = nullptr; |
1221 | 2.23k | }; |
1222 | | |
1223 | | // Only the first thread to arrive actually runs the term |
1224 | 2.23k | ctxo->manage_compress.term(term_compress); |
1225 | | |
1226 | 2.23k | return ASTCENC_SUCCESS; |
1227 | 2.23k | #endif |
1228 | 2.23k | } |
1229 | | |
1230 | | /* See header for documentation. */ |
1231 | | astcenc_error astcenc_compress_reset( |
1232 | | astcenc_context* ctxo |
1233 | 2.23k | ) { |
1234 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1235 | | (void)ctxo; |
1236 | | return ASTCENC_ERR_BAD_CONTEXT; |
1237 | | #else |
1238 | 2.23k | astcenc_contexti* ctx = &ctxo->context; |
1239 | 2.23k | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1240 | 0 | { |
1241 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1242 | 0 | } |
1243 | | |
1244 | 2.23k | ctxo->manage_avg.reset(); |
1245 | 2.23k | ctxo->manage_compress.reset(); |
1246 | 2.23k | return ASTCENC_SUCCESS; |
1247 | 2.23k | #endif |
1248 | 2.23k | } |
1249 | | |
1250 | | /* See header for documentation. */ |
1251 | | astcenc_error astcenc_compress_cancel( |
1252 | | astcenc_context* ctxo |
1253 | 0 | ) { |
1254 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1255 | | (void)ctxo; |
1256 | | return ASTCENC_ERR_BAD_CONTEXT; |
1257 | | #else |
1258 | 0 | astcenc_contexti* ctx = &ctxo->context; |
1259 | 0 | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1260 | 0 | { |
1261 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1262 | 0 | } |
1263 | | |
1264 | | // Cancel compression before cancelling avg. This avoids the race condition |
1265 | | // where cancelling them in the other order could see a compression worker |
1266 | | // starting to process even though some of the avg data is undefined. |
1267 | 0 | ctxo->manage_compress.cancel(); |
1268 | 0 | ctxo->manage_avg.cancel(); |
1269 | 0 | return ASTCENC_SUCCESS; |
1270 | 0 | #endif |
1271 | 0 | } |
1272 | | |
1273 | | /* See header for documentation. */ |
1274 | | astcenc_error astcenc_decompress_image( |
1275 | | astcenc_context* ctxo, |
1276 | | const uint8_t* data, |
1277 | | size_t data_len, |
1278 | | astcenc_image* image_outp, |
1279 | | const astcenc_swizzle* swizzle, |
1280 | | unsigned int thread_index |
1281 | 1.11k | ) { |
1282 | 1.11k | astcenc_error status; |
1283 | 1.11k | astcenc_image& image_out = *image_outp; |
1284 | 1.11k | astcenc_contexti* ctx = &ctxo->context; |
1285 | | |
1286 | | // Today this doesn't matter (working set on stack) but might in future ... |
1287 | 1.11k | if (thread_index >= ctx->thread_count) |
1288 | 0 | { |
1289 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1290 | 0 | } |
1291 | | |
1292 | 1.11k | status = validate_decompression_swizzle(*swizzle); |
1293 | 1.11k | if (status != ASTCENC_SUCCESS) |
1294 | 0 | { |
1295 | 0 | return status; |
1296 | 0 | } |
1297 | | |
1298 | 1.11k | size_t dim_x = image_out.dim_x; |
1299 | 1.11k | size_t dim_y = image_out.dim_y; |
1300 | 1.11k | size_t dim_z = image_out.dim_z; |
1301 | | |
1302 | 1.11k | size_t texel_count = get_texels_count(dim_x, dim_y, dim_z); |
1303 | | // Cumulative texel sizes would overflow a size_t |
1304 | 1.11k | if (texel_count == 0) |
1305 | 0 | { |
1306 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1307 | 0 | } |
1308 | | |
1309 | 1.11k | size_t block_x = ctx->config.block_x; |
1310 | 1.11k | size_t block_y = ctx->config.block_y; |
1311 | 1.11k | size_t block_z = ctx->config.block_z; |
1312 | | |
1313 | 1.11k | size_t blocks_x = astc::get_block_count_safe(dim_x, block_x); |
1314 | 1.11k | size_t blocks_y = astc::get_block_count_safe(dim_y, block_y); |
1315 | 1.11k | size_t blocks_z = astc::get_block_count_safe(dim_z, block_z); |
1316 | | |
1317 | 1.11k | size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z); |
1318 | | // Cumulative block sizes would overflow a size_t |
1319 | 1.11k | if (block_count == 0) |
1320 | 0 | { |
1321 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1322 | 0 | } |
1323 | | |
1324 | | // Check we have enough output space, size_needed calc cannot overflow as |
1325 | | // get_blocks_count() already validated that a byte count would fit |
1326 | 1.11k | size_t size_needed = block_count * 16; |
1327 | 1.11k | if (data_len < size_needed) |
1328 | 0 | { |
1329 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
1330 | 0 | } |
1331 | | |
1332 | 1.11k | size_t row_blocks = blocks_x; |
1333 | 1.11k | size_t plane_blocks = blocks_x * blocks_y; |
1334 | | |
1335 | 1.11k | image_block blk {}; |
1336 | 1.11k | blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); |
1337 | | |
1338 | | // Decode mode inferred from the output data type |
1339 | 1.11k | blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8; |
1340 | | |
1341 | | // If context thread count is one then implicitly reset |
1342 | 1.11k | if (ctx->thread_count == 1) |
1343 | 1.11k | { |
1344 | 1.11k | astcenc_decompress_reset(ctxo); |
1345 | 1.11k | } |
1346 | | |
1347 | | // Only the first thread actually runs the initializer |
1348 | 1.11k | ctxo->manage_decompress.init(block_count, nullptr); |
1349 | | |
1350 | | // All threads run this processing loop until there is no work remaining |
1351 | 2.22k | while (true) |
1352 | 2.22k | { |
1353 | 2.22k | size_t count; |
1354 | 2.22k | size_t base = ctxo->manage_decompress.get_task_assignment(128, count); |
1355 | 2.22k | if (!count) |
1356 | 1.11k | { |
1357 | 1.11k | break; |
1358 | 1.11k | } |
1359 | | |
1360 | 5.06k | for (size_t i = base; i < base + count; i++) |
1361 | 3.95k | { |
1362 | | // Decode i into x, y, z block indices |
1363 | 3.95k | size_t z = i / plane_blocks; |
1364 | 3.95k | size_t rem = i - (z * plane_blocks); |
1365 | 3.95k | size_t y = rem / row_blocks; |
1366 | 3.95k | size_t x = rem - (y * row_blocks); |
1367 | | |
1368 | 3.95k | size_t offset = (((z * blocks_y + y) * blocks_x) + x) * 16; |
1369 | 3.95k | const uint8_t* bp = data + offset; |
1370 | | |
1371 | 3.95k | symbolic_compressed_block scb; |
1372 | | |
1373 | 3.95k | physical_to_symbolic(*ctx->bsd, bp, scb); |
1374 | | |
1375 | 3.95k | decompress_symbolic_block(ctx->config.profile, *ctx->bsd, |
1376 | 3.95k | x * block_x, |
1377 | 3.95k | y * block_y, |
1378 | 3.95k | z * block_z, |
1379 | 3.95k | scb, blk); |
1380 | | |
1381 | 3.95k | store_image_block(image_out, blk, *ctx->bsd, |
1382 | 3.95k | x * block_x, y * block_y, z * block_z, |
1383 | 3.95k | *swizzle); |
1384 | 3.95k | } |
1385 | | |
1386 | 1.11k | ctxo->manage_decompress.complete_task_assignment(count); |
1387 | 1.11k | } |
1388 | | |
1389 | 1.11k | return ASTCENC_SUCCESS; |
1390 | 1.11k | } |
1391 | | |
1392 | | /* See header for documentation. */ |
1393 | | astcenc_error astcenc_decompress_reset( |
1394 | | astcenc_context* ctxo |
1395 | 1.11k | ) { |
1396 | 1.11k | ctxo->manage_decompress.reset(); |
1397 | 1.11k | return ASTCENC_SUCCESS; |
1398 | 1.11k | } |
1399 | | |
1400 | | /* See header for documentation. */ |
1401 | | astcenc_error astcenc_get_block_info( |
1402 | | astcenc_context* ctxo, |
1403 | | const uint8_t data[16], |
1404 | | astcenc_block_info* info |
1405 | 0 | ) { |
1406 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1407 | | (void)ctxo; |
1408 | | (void)data; |
1409 | | (void)info; |
1410 | | return ASTCENC_ERR_BAD_CONTEXT; |
1411 | | #else |
1412 | 0 | astcenc_contexti* ctx = &ctxo->context; |
1413 | | |
1414 | | // Decode the compressed data into a symbolic form |
1415 | 0 | symbolic_compressed_block scb; |
1416 | 0 | physical_to_symbolic(*ctx->bsd, data, scb); |
1417 | | |
1418 | | // Fetch the appropriate partition and decimation tables |
1419 | 0 | const block_size_descriptor& bsd = *ctx->bsd; |
1420 | | |
1421 | | // Start from a clean slate |
1422 | 0 | memset(info, 0, sizeof(*info)); |
1423 | | |
1424 | | // Basic info we can always populate |
1425 | 0 | info->profile = ctx->config.profile; |
1426 | |
|
1427 | 0 | info->block_x = ctx->config.block_x; |
1428 | 0 | info->block_y = ctx->config.block_y; |
1429 | 0 | info->block_z = ctx->config.block_z; |
1430 | 0 | info->texel_count = bsd.texel_count; |
1431 | | |
1432 | | // Check for error blocks first |
1433 | 0 | info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; |
1434 | 0 | if (info->is_error_block) |
1435 | 0 | { |
1436 | 0 | return ASTCENC_SUCCESS; |
1437 | 0 | } |
1438 | | |
1439 | | // Check for constant color blocks second |
1440 | 0 | info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || |
1441 | 0 | scb.block_type == SYM_BTYPE_CONST_U16; |
1442 | 0 | if (info->is_constant_block) |
1443 | 0 | { |
1444 | 0 | return ASTCENC_SUCCESS; |
1445 | 0 | } |
1446 | | |
1447 | | // Otherwise handle a full block ; known to be valid after conditions above have been checked |
1448 | 0 | unsigned int partition_count = scb.partition_count; |
1449 | 0 | const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); |
1450 | |
|
1451 | 0 | const block_mode& bm = bsd.get_block_mode(scb.block_mode); |
1452 | 0 | const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); |
1453 | |
|
1454 | 0 | info->weight_x = di.weight_x; |
1455 | 0 | info->weight_y = di.weight_y; |
1456 | 0 | info->weight_z = di.weight_z; |
1457 | |
|
1458 | 0 | info->is_dual_plane_block = bm.is_dual_plane != 0; |
1459 | |
|
1460 | 0 | info->partition_count = scb.partition_count; |
1461 | 0 | info->partition_index = scb.partition_index; |
1462 | 0 | info->dual_plane_component = scb.plane2_component; |
1463 | |
|
1464 | 0 | info->color_level_count = get_quant_level(scb.get_color_quant_mode()); |
1465 | 0 | info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); |
1466 | | |
1467 | | // Unpack color endpoints for each active partition |
1468 | 0 | for (size_t i = 0; i < scb.partition_count; i++) |
1469 | 0 | { |
1470 | 0 | bool rgb_hdr; |
1471 | 0 | bool a_hdr; |
1472 | 0 | vint4 endpnt[2]; |
1473 | |
|
1474 | 0 | unpack_color_endpoints(ctx->config.profile, |
1475 | 0 | scb.color_formats[i], |
1476 | 0 | scb.color_values[i], |
1477 | 0 | rgb_hdr, a_hdr, |
1478 | 0 | endpnt[0], endpnt[1]); |
1479 | | |
1480 | | // Store the color endpoint mode info |
1481 | 0 | info->color_endpoint_modes[i] = scb.color_formats[i]; |
1482 | 0 | info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr; |
1483 | | |
1484 | | // Store the unpacked and decoded color endpoint |
1485 | 0 | vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); |
1486 | 0 | for (size_t j = 0; j < 2; j++) |
1487 | 0 | { |
1488 | 0 | vint4 color_lns = lns_to_sf16(endpnt[j]); |
1489 | 0 | vint4 color_unorm = unorm16_to_sf16(endpnt[j]); |
1490 | 0 | vint4 datai = select(color_unorm, color_lns, hdr_mask); |
1491 | 0 | store(float16_to_float(datai), info->color_endpoints[i][j]); |
1492 | 0 | } |
1493 | 0 | } |
1494 | | |
1495 | | // Unpack weights for each texel |
1496 | 0 | int weight_plane1[BLOCK_MAX_TEXELS]; |
1497 | 0 | int weight_plane2[BLOCK_MAX_TEXELS]; |
1498 | |
|
1499 | 0 | unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); |
1500 | 0 | for (size_t i = 0; i < bsd.texel_count; i++) |
1501 | 0 | { |
1502 | 0 | info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); |
1503 | 0 | if (info->is_dual_plane_block) |
1504 | 0 | { |
1505 | 0 | info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM); |
1506 | 0 | } |
1507 | 0 | } |
1508 | | |
1509 | | // Unpack partition assignments for each texel |
1510 | 0 | for (size_t i = 0; i < bsd.texel_count; i++) |
1511 | 0 | { |
1512 | 0 | info->partition_assignment[i] = pi.partition_of_texel[i]; |
1513 | 0 | } |
1514 | |
|
1515 | 0 | return ASTCENC_SUCCESS; |
1516 | 0 | #endif |
1517 | 0 | } |
1518 | | |
1519 | | /* See header for documentation. */ |
1520 | | const char* astcenc_get_error_string( |
1521 | | astcenc_error status |
1522 | 0 | ) { |
1523 | | // Values in this enum are from an external user, so not guaranteed to be |
1524 | | // bounded to the enum values |
1525 | 0 | switch (static_cast<int>(status)) |
1526 | 0 | { |
1527 | 0 | case ASTCENC_SUCCESS: |
1528 | 0 | return "ASTCENC_SUCCESS"; |
1529 | 0 | case ASTCENC_ERR_OUT_OF_MEM: |
1530 | 0 | return "ASTCENC_ERR_OUT_OF_MEM"; |
1531 | 0 | case ASTCENC_ERR_BAD_CPU_FLOAT: |
1532 | 0 | return "ASTCENC_ERR_BAD_CPU_FLOAT"; |
1533 | 0 | case ASTCENC_ERR_BAD_PARAM: |
1534 | 0 | return "ASTCENC_ERR_BAD_PARAM"; |
1535 | 0 | case ASTCENC_ERR_BAD_BLOCK_SIZE: |
1536 | 0 | return "ASTCENC_ERR_BAD_BLOCK_SIZE"; |
1537 | 0 | case ASTCENC_ERR_BAD_PROFILE: |
1538 | 0 | return "ASTCENC_ERR_BAD_PROFILE"; |
1539 | 0 | case ASTCENC_ERR_BAD_QUALITY: |
1540 | 0 | return "ASTCENC_ERR_BAD_QUALITY"; |
1541 | 0 | case ASTCENC_ERR_BAD_FLAGS: |
1542 | 0 | return "ASTCENC_ERR_BAD_FLAGS"; |
1543 | 0 | case ASTCENC_ERR_BAD_SWIZZLE: |
1544 | 0 | return "ASTCENC_ERR_BAD_SWIZZLE"; |
1545 | 0 | case ASTCENC_ERR_BAD_CONTEXT: |
1546 | 0 | return "ASTCENC_ERR_BAD_CONTEXT"; |
1547 | 0 | case ASTCENC_ERR_NOT_IMPLEMENTED: |
1548 | 0 | return "ASTCENC_ERR_NOT_IMPLEMENTED"; |
1549 | 0 | case ASTCENC_ERR_BAD_DECODE_MODE: |
1550 | 0 | return "ASTCENC_ERR_BAD_DECODE_MODE"; |
1551 | | #if defined(ASTCENC_DIAGNOSTICS) |
1552 | | case ASTCENC_ERR_DTRACE_FAILURE: |
1553 | | return "ASTCENC_ERR_DTRACE_FAILURE"; |
1554 | | #endif |
1555 | 0 | default: |
1556 | 0 | return nullptr; |
1557 | 0 | } |
1558 | 0 | } |