/src/astc-encoder/Source/astcenc_entry.cpp
Line | Count | Source |
1 | | // SPDX-License-Identifier: Apache-2.0 |
2 | | // ---------------------------------------------------------------------------- |
3 | | // Copyright 2011-2026 Arm Limited |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | | // use this file except in compliance with the License. You may obtain a copy |
7 | | // of the License at: |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | | // License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // ---------------------------------------------------------------------------- |
17 | | |
18 | | /** |
19 | | * @brief Functions for the library entrypoint. |
20 | | */ |
21 | | |
22 | | #include <array> |
23 | | #include <cstring> |
24 | | #include <new> |
25 | | |
26 | | #include "astcenc.h" |
27 | | #include "astcenc_internal_entry.h" |
28 | | #include "astcenc_diagnostic_trace.h" |
29 | | |
30 | | /** |
31 | | * @brief Record of the quality tuning parameter values. |
32 | | * |
33 | | * See the @c astcenc_config structure for detailed parameter documentation. |
34 | | * |
35 | | * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit. |
36 | | * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios |
37 | | * for the more through search presets because the underlying db_limit is so much higher. |
38 | | */ |
39 | | struct astcenc_preset_config |
40 | | { |
41 | | float quality; |
42 | | unsigned int tune_partition_count_limit; |
43 | | unsigned int tune_2partition_index_limit; |
44 | | unsigned int tune_3partition_index_limit; |
45 | | unsigned int tune_4partition_index_limit; |
46 | | unsigned int tune_block_mode_limit; |
47 | | unsigned int tune_refinement_limit; |
48 | | unsigned int tune_candidate_limit; |
49 | | unsigned int tune_2partitioning_candidate_limit; |
50 | | unsigned int tune_3partitioning_candidate_limit; |
51 | | unsigned int tune_4partitioning_candidate_limit; |
52 | | float tune_db_limit_a_base; |
53 | | float tune_db_limit_b_base; |
54 | | float tune_mse_overshoot; |
55 | | float tune_2partition_early_out_limit_factor; |
56 | | float tune_3partition_early_out_limit_factor; |
57 | | float tune_2plane_early_out_limit_correlation; |
58 | | float tune_search_mode0_enable; |
59 | | }; |
60 | | |
61 | | /** |
62 | | * @brief The static presets for high bandwidth encodings (x < 25 texels per block). |
63 | | */ |
64 | | static const std::array<astcenc_preset_config, 6> preset_configs_high {{ |
65 | | { |
66 | | ASTCENC_PRE_FASTEST, |
67 | | 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f |
68 | | }, { |
69 | | ASTCENC_PRE_FAST, |
70 | | 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f |
71 | | }, { |
72 | | ASTCENC_PRE_MEDIUM, |
73 | | 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f |
74 | | }, { |
75 | | ASTCENC_PRE_THOROUGH, |
76 | | 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f |
77 | | }, { |
78 | | ASTCENC_PRE_VERYTHOROUGH, |
79 | | 4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f |
80 | | }, { |
81 | | ASTCENC_PRE_EXHAUSTIVE, |
82 | | 4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f |
83 | | } |
84 | | }}; |
85 | | |
86 | | /** |
87 | | * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block). |
88 | | */ |
89 | | static const std::array<astcenc_preset_config, 6> preset_configs_mid {{ |
90 | | { |
91 | | ASTCENC_PRE_FASTEST, |
92 | | 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f |
93 | | }, { |
94 | | ASTCENC_PRE_FAST, |
95 | | 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f |
96 | | }, { |
97 | | ASTCENC_PRE_MEDIUM, |
98 | | 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f |
99 | | }, { |
100 | | ASTCENC_PRE_THOROUGH, |
101 | | 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f |
102 | | }, { |
103 | | ASTCENC_PRE_VERYTHOROUGH, |
104 | | 4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f |
105 | | }, { |
106 | | ASTCENC_PRE_EXHAUSTIVE, |
107 | | 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f |
108 | | } |
109 | | }}; |
110 | | |
111 | | /** |
112 | | * @brief The static presets for low bandwidth encodings (64 <= x texels per block). |
113 | | */ |
114 | | static const std::array<astcenc_preset_config, 6> preset_configs_low {{ |
115 | | { |
116 | | ASTCENC_PRE_FASTEST, |
117 | | 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f |
118 | | }, { |
119 | | ASTCENC_PRE_FAST, |
120 | | 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f |
121 | | }, { |
122 | | ASTCENC_PRE_MEDIUM, |
123 | | 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f |
124 | | }, { |
125 | | ASTCENC_PRE_THOROUGH, |
126 | | 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f |
127 | | }, { |
128 | | ASTCENC_PRE_VERYTHOROUGH, |
129 | | 4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f |
130 | | }, { |
131 | | ASTCENC_PRE_EXHAUSTIVE, |
132 | | 4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f |
133 | | } |
134 | | }}; |
135 | | |
136 | | /** |
137 | | * @brief Validate CPU floating point meets assumptions made in the codec. |
138 | | * |
139 | | * The codec is written with the assumption that float bit patterns are valid |
140 | | * IEEE754 values that are stored and reloaded with round-to-nearest rounding. |
141 | | * This is always the case in an IEEE-754 compliant system, however not every |
142 | | * system or compilation mode is actually IEEE-754 compliant. This normally |
143 | | * fails if the code is compiled with fast math enabled, for example. |
144 | | * |
145 | | * @return Return @c ASTCENC_SUCCESS if validated, an error on failure. |
146 | | */ |
147 | | static astcenc_error validate_cpu_float() |
148 | 7.21k | { |
149 | 7.21k | volatile float xprec_testval = 2.51f; |
150 | 7.21k | float store = xprec_testval + 12582912.0f; |
151 | 7.21k | float q = store - 12582912.0f; |
152 | | |
153 | 7.21k | if (q != 3.0f) |
154 | 0 | { |
155 | 0 | return ASTCENC_ERR_BAD_CPU_FLOAT; |
156 | 0 | } |
157 | | |
158 | 7.21k | return ASTCENC_SUCCESS; |
159 | 7.21k | } |
160 | | |
161 | | /** |
162 | | * @brief Validate config profile. |
163 | | * |
164 | | * @param profile The profile to check. |
165 | | * |
166 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
167 | | */ |
168 | | static astcenc_error validate_profile( |
169 | | astcenc_profile profile |
170 | 3.58k | ) { |
171 | | // Values in this enum are from an external user, so not guaranteed to be |
172 | | // bounded to the enum values |
173 | 3.58k | switch (static_cast<int>(profile)) |
174 | 3.58k | { |
175 | 1.21k | case ASTCENC_PRF_LDR_SRGB: |
176 | 1.89k | case ASTCENC_PRF_LDR: |
177 | 2.72k | case ASTCENC_PRF_HDR_RGB_LDR_A: |
178 | 3.58k | case ASTCENC_PRF_HDR: |
179 | 3.58k | return ASTCENC_SUCCESS; |
180 | 0 | default: |
181 | 0 | return ASTCENC_ERR_BAD_PROFILE; |
182 | 3.58k | } |
183 | 3.58k | } |
184 | | |
185 | | /** |
186 | | * @brief Validate block size. |
187 | | * |
188 | | * @param block_x The block x dimensions. |
189 | | * @param block_y The block y dimensions. |
190 | | * @param block_z The block z dimensions. |
191 | | * |
192 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
193 | | */ |
194 | | static astcenc_error validate_block_size( |
195 | | unsigned int block_x, |
196 | | unsigned int block_y, |
197 | | unsigned int block_z |
198 | 7.21k | ) { |
199 | | // Test if this is a legal block size at all |
200 | 7.21k | bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || |
201 | 0 | ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z))); |
202 | 7.21k | if (!is_legal) |
203 | 0 | { |
204 | 0 | return ASTCENC_ERR_BAD_BLOCK_SIZE; |
205 | 0 | } |
206 | | |
207 | | // Test if this build has sufficient capacity for this block size |
208 | 7.21k | bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS; |
209 | 7.21k | if (!have_capacity) |
210 | 0 | { |
211 | 0 | return ASTCENC_ERR_NOT_IMPLEMENTED; |
212 | 0 | } |
213 | | |
214 | 7.21k | return ASTCENC_SUCCESS; |
215 | 7.21k | } |
216 | | |
217 | | /** |
218 | | * @brief Validate flags. |
219 | | * |
220 | | * @param profile The profile to check. |
221 | | * @param flags The flags to check. |
222 | | * |
223 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
224 | | */ |
225 | | static astcenc_error validate_flags( |
226 | | astcenc_profile profile, |
227 | | unsigned int flags |
228 | 7.21k | ) { |
229 | | // Flags field must not contain any unknown flag bits |
230 | 7.21k | unsigned int exMask = ~ASTCENC_ALL_FLAGS; |
231 | 7.21k | if (popcount(flags & exMask) != 0) |
232 | 0 | { |
233 | 0 | return ASTCENC_ERR_BAD_FLAGS; |
234 | 0 | } |
235 | | |
236 | | // Flags field must only contain at most a single map type |
237 | 7.21k | exMask = ASTCENC_FLG_MAP_NORMAL |
238 | 7.21k | | ASTCENC_FLG_MAP_RGBM; |
239 | 7.21k | if (popcount(flags & exMask) > 1) |
240 | 32 | { |
241 | 32 | return ASTCENC_ERR_BAD_FLAGS; |
242 | 32 | } |
243 | | |
244 | | // Decode_unorm8 must only be used with an LDR profile |
245 | 7.18k | bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8; |
246 | 7.18k | bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A); |
247 | 7.18k | if (is_unorm8 && is_hdr) |
248 | 17 | { |
249 | 17 | return ASTCENC_ERR_BAD_DECODE_MODE; |
250 | 17 | } |
251 | | |
252 | 7.17k | return ASTCENC_SUCCESS; |
253 | 7.18k | } |
254 | | |
255 | | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
256 | | |
257 | | /** |
258 | | * @brief Validate single channel compression swizzle. |
259 | | * |
260 | | * @param swizzle The swizzle to check. |
261 | | * |
262 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
263 | | */ |
264 | | static astcenc_error validate_compression_swz( |
265 | | astcenc_swz swizzle |
266 | 8.86k | ) { |
267 | | // Not all enum values are handled; SWZ_Z is invalid for compression |
268 | 8.86k | switch (static_cast<int>(swizzle)) |
269 | 8.86k | { |
270 | 2.21k | case ASTCENC_SWZ_R: |
271 | 4.43k | case ASTCENC_SWZ_G: |
272 | 6.64k | case ASTCENC_SWZ_B: |
273 | 8.86k | case ASTCENC_SWZ_A: |
274 | 8.86k | case ASTCENC_SWZ_0: |
275 | 8.86k | case ASTCENC_SWZ_1: |
276 | 8.86k | return ASTCENC_SUCCESS; |
277 | 0 | default: |
278 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
279 | 8.86k | } |
280 | 8.86k | } |
281 | | |
282 | | /** |
283 | | * @brief Validate overall compression swizzle. |
284 | | * |
285 | | * @param swizzle The swizzle to check. |
286 | | * |
287 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
288 | | */ |
289 | | static astcenc_error validate_compression_swizzle( |
290 | | const astcenc_swizzle& swizzle |
291 | 2.21k | ) { |
292 | 2.21k | if (validate_compression_swz(swizzle.r) || |
293 | 2.21k | validate_compression_swz(swizzle.g) || |
294 | 2.21k | validate_compression_swz(swizzle.b) || |
295 | 2.21k | validate_compression_swz(swizzle.a)) |
296 | 0 | { |
297 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
298 | 0 | } |
299 | | |
300 | 2.21k | return ASTCENC_SUCCESS; |
301 | 2.21k | } |
302 | | #endif |
303 | | |
304 | | /** |
305 | | * @brief Validate single channel decompression swizzle. |
306 | | * |
307 | | * @param swizzle The swizzle to check. |
308 | | * |
309 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
310 | | */ |
311 | | static astcenc_error validate_decompression_swz( |
312 | | astcenc_swz swizzle |
313 | 4.45k | ) { |
314 | | // Values in this enum are from an external user, so not guaranteed to be |
315 | | // bounded to the enum values |
316 | 4.45k | switch (static_cast<int>(swizzle)) |
317 | 4.45k | { |
318 | 1.11k | case ASTCENC_SWZ_R: |
319 | 2.22k | case ASTCENC_SWZ_G: |
320 | 3.34k | case ASTCENC_SWZ_B: |
321 | 4.45k | case ASTCENC_SWZ_A: |
322 | 4.45k | case ASTCENC_SWZ_0: |
323 | 4.45k | case ASTCENC_SWZ_1: |
324 | 4.45k | case ASTCENC_SWZ_Z: |
325 | 4.45k | return ASTCENC_SUCCESS; |
326 | 0 | default: |
327 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
328 | 4.45k | } |
329 | 4.45k | } |
330 | | |
331 | | /** |
332 | | * @brief Validate overall decompression swizzle. |
333 | | * |
334 | | * @param swizzle The swizzle to check. |
335 | | * |
336 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
337 | | */ |
338 | | static astcenc_error validate_decompression_swizzle( |
339 | | const astcenc_swizzle& swizzle |
340 | 1.11k | ) { |
341 | 1.11k | if (validate_decompression_swz(swizzle.r) || |
342 | 1.11k | validate_decompression_swz(swizzle.g) || |
343 | 1.11k | validate_decompression_swz(swizzle.b) || |
344 | 1.11k | validate_decompression_swz(swizzle.a)) |
345 | 0 | { |
346 | 0 | return ASTCENC_ERR_BAD_SWIZZLE; |
347 | 0 | } |
348 | | |
349 | 1.11k | return ASTCENC_SUCCESS; |
350 | 1.11k | } |
351 | | |
352 | | /** |
353 | | * Validate that an incoming configuration is in-spec. |
354 | | * |
355 | | * This function can respond in two ways: |
356 | | * |
357 | | * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown |
358 | | * for out-of-range inputs in this case. |
359 | | * * Numerical inputs and logic inputs are are logically invalid and which make no sense |
360 | | * algorithmically will return an error. |
361 | | * |
362 | | * @param[in,out] config The input compressor configuration. |
363 | | * |
364 | | * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. |
365 | | */ |
366 | | static astcenc_error validate_config( |
367 | | astcenc_config &config |
368 | 3.58k | ) { |
369 | 3.58k | astcenc_error status; |
370 | | |
371 | 3.58k | status = validate_profile(config.profile); |
372 | 3.58k | if (status != ASTCENC_SUCCESS) |
373 | 0 | { |
374 | 0 | return status; |
375 | 0 | } |
376 | | |
377 | 3.58k | status = validate_flags(config.profile, config.flags); |
378 | 3.58k | if (status != ASTCENC_SUCCESS) |
379 | 0 | { |
380 | 0 | return status; |
381 | 0 | } |
382 | | |
383 | 3.58k | status = validate_block_size(config.block_x, config.block_y, config.block_z); |
384 | 3.58k | if (status != ASTCENC_SUCCESS) |
385 | 0 | { |
386 | 0 | return status; |
387 | 0 | } |
388 | | |
389 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
390 | | // Decompress-only builds only support decompress-only contexts |
391 | | if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) |
392 | | { |
393 | | return ASTCENC_ERR_BAD_PARAM; |
394 | | } |
395 | | #endif |
396 | | |
397 | 3.58k | config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); |
398 | | |
399 | 3.58k | config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); |
400 | 3.58k | config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
401 | 3.58k | config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
402 | 3.58k | config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); |
403 | 3.58k | config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); |
404 | 3.58k | config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); |
405 | 3.58k | config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); |
406 | 3.58k | config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
407 | 3.58k | config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
408 | 3.58k | config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); |
409 | 3.58k | config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); |
410 | 3.58k | config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); |
411 | 3.58k | config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f); |
412 | 3.58k | config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f); |
413 | 3.58k | config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f); |
414 | | |
415 | | // Specifying a zero weight color component is not allowed; force to small value |
416 | 3.58k | float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), |
417 | 3.58k | astc::max(config.cw_b_weight, config.cw_a_weight)); |
418 | 3.58k | if (max_weight > 0.0f) |
419 | 3.58k | { |
420 | 3.58k | max_weight /= 1000.0f; |
421 | 3.58k | config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); |
422 | 3.58k | config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); |
423 | 3.58k | config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); |
424 | 3.58k | config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); |
425 | 3.58k | } |
426 | | // If all color components error weights are zero then return an error |
427 | 0 | else |
428 | 0 | { |
429 | 0 | return ASTCENC_ERR_BAD_PARAM; |
430 | 0 | } |
431 | | |
432 | 3.58k | return ASTCENC_SUCCESS; |
433 | 3.58k | } |
434 | | |
435 | | /* See header for documentation. */ |
436 | | astcenc_error astcenc_config_init( |
437 | | astcenc_profile profile, |
438 | | unsigned int block_x, |
439 | | unsigned int block_y, |
440 | | unsigned int block_z, |
441 | | float quality, |
442 | | unsigned int flags, |
443 | | astcenc_config* configp |
444 | 3.63k | ) { |
445 | 3.63k | astcenc_error status; |
446 | | |
447 | 3.63k | status = validate_cpu_float(); |
448 | 3.63k | if (status != ASTCENC_SUCCESS) |
449 | 0 | { |
450 | 0 | return status; |
451 | 0 | } |
452 | | |
453 | | // Zero init all config fields; although most of will be over written |
454 | 3.63k | astcenc_config& config = *configp; |
455 | 3.63k | std::memset(&config, 0, sizeof(config)); |
456 | | |
457 | | // Process the block size |
458 | 3.63k | block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1 |
459 | 3.63k | status = validate_block_size(block_x, block_y, block_z); |
460 | 3.63k | if (status != ASTCENC_SUCCESS) |
461 | 0 | { |
462 | 0 | return status; |
463 | 0 | } |
464 | | |
465 | 3.63k | config.block_x = block_x; |
466 | 3.63k | config.block_y = block_y; |
467 | 3.63k | config.block_z = block_z; |
468 | | |
469 | 3.63k | float texels = static_cast<float>(block_x * block_y * block_z); |
470 | 3.63k | float ltexels = logf(texels) / logf(10.0f); |
471 | | |
472 | | // Process the performance quality level or preset; note that this must be done before we |
473 | | // process any additional settings, such as color profile and flags, which may replace some of |
474 | | // these settings with more use case tuned values |
475 | 3.63k | if (quality < ASTCENC_PRE_FASTEST || |
476 | 3.63k | quality > ASTCENC_PRE_EXHAUSTIVE) |
477 | 0 | { |
478 | 0 | return ASTCENC_ERR_BAD_QUALITY; |
479 | 0 | } |
480 | | |
481 | 3.63k | static const std::array<astcenc_preset_config, 6>* preset_configs; |
482 | 3.63k | int texels_int = block_x * block_y * block_z; |
483 | 3.63k | if (texels_int < 25) |
484 | 1.96k | { |
485 | 1.96k | preset_configs = &preset_configs_high; |
486 | 1.96k | } |
487 | 1.67k | else if (texels_int < 64) |
488 | 1.00k | { |
489 | 1.00k | preset_configs = &preset_configs_mid; |
490 | 1.00k | } |
491 | 669 | else |
492 | 669 | { |
493 | 669 | preset_configs = &preset_configs_low; |
494 | 669 | } |
495 | | |
496 | | // Determine which preset to use, or which pair to interpolate |
497 | 3.63k | size_t start; |
498 | 3.63k | size_t end; |
499 | 10.3k | for (end = 0; end < preset_configs->size(); end++) |
500 | 10.3k | { |
501 | 10.3k | if ((*preset_configs)[end].quality >= quality) |
502 | 3.63k | { |
503 | 3.63k | break; |
504 | 3.63k | } |
505 | 10.3k | } |
506 | | |
507 | 3.63k | start = end == 0 ? 0 : end - 1; |
508 | | |
509 | | // Start and end node are the same - so just transfer the values. |
510 | 3.63k | if (start == end) |
511 | 180 | { |
512 | 180 | config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit; |
513 | 180 | config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit; |
514 | 180 | config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit; |
515 | 180 | config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit; |
516 | 180 | config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit; |
517 | 180 | config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit; |
518 | 180 | config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit; |
519 | 180 | config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit; |
520 | 180 | config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit; |
521 | 180 | config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit; |
522 | 180 | config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels, |
523 | 180 | (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels); |
524 | | |
525 | 180 | config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot; |
526 | | |
527 | 180 | config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor; |
528 | 180 | config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor; |
529 | 180 | config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation; |
530 | 180 | config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable; |
531 | 180 | } |
532 | | // Start and end node are not the same - so interpolate between them |
533 | 3.45k | else |
534 | 3.45k | { |
535 | 3.45k | auto& node_a = (*preset_configs)[start]; |
536 | 3.45k | auto& node_b = (*preset_configs)[end]; |
537 | | |
538 | 3.45k | float wt_range = node_b.quality - node_a.quality; |
539 | 3.45k | assert(wt_range > 0); |
540 | | |
541 | | // Compute interpolation factors |
542 | 3.45k | float wt_node_a = (node_b.quality - quality) / wt_range; |
543 | 3.45k | float wt_node_b = (quality - node_a.quality) / wt_range; |
544 | | |
545 | 24.1k | #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b)) |
546 | 34.5k | #define LERPI(param) astc::flt2int_rtn(\ |
547 | 34.5k | (static_cast<float>(node_a.param) * wt_node_a) + \ |
548 | 34.5k | (static_cast<float>(node_b.param) * wt_node_b)) |
549 | 13.8k | #define LERPUI(param) static_cast<unsigned int>(LERPI(param)) |
550 | | |
551 | 3.45k | config.tune_partition_count_limit = LERPI(tune_partition_count_limit); |
552 | 3.45k | config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); |
553 | 3.45k | config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit); |
554 | 3.45k | config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit); |
555 | 3.45k | config.tune_block_mode_limit = LERPI(tune_block_mode_limit); |
556 | 3.45k | config.tune_refinement_limit = LERPI(tune_refinement_limit); |
557 | 3.45k | config.tune_candidate_limit = LERPUI(tune_candidate_limit); |
558 | 3.45k | config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit); |
559 | 3.45k | config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit); |
560 | 3.45k | config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit); |
561 | 3.45k | config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, |
562 | 3.45k | LERP(tune_db_limit_b_base) - 19 * ltexels); |
563 | | |
564 | 3.45k | config.tune_mse_overshoot = LERP(tune_mse_overshoot); |
565 | | |
566 | 3.45k | config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor); |
567 | 3.45k | config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor); |
568 | 3.45k | config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation); |
569 | 3.45k | config.tune_search_mode0_enable = LERP(tune_search_mode0_enable); |
570 | 3.45k | #undef LERP |
571 | 3.45k | #undef LERPI |
572 | 3.45k | #undef LERPUI |
573 | 3.45k | } |
574 | | |
575 | | // Set heuristics to the defaults for each color profile |
576 | 3.63k | config.cw_r_weight = 1.0f; |
577 | 3.63k | config.cw_g_weight = 1.0f; |
578 | 3.63k | config.cw_b_weight = 1.0f; |
579 | 3.63k | config.cw_a_weight = 1.0f; |
580 | | |
581 | 3.63k | config.a_scale_radius = 0; |
582 | | |
583 | 3.63k | config.rgbm_m_scale = 0.0f; |
584 | | |
585 | 3.63k | config.profile = profile; |
586 | | |
587 | | // Values in this enum are from an external user, so not guaranteed to be |
588 | | // bounded to the enum values |
589 | 3.63k | switch (static_cast<int>(profile)) |
590 | 3.63k | { |
591 | 691 | case ASTCENC_PRF_LDR: |
592 | 1.91k | case ASTCENC_PRF_LDR_SRGB: |
593 | 1.91k | break; |
594 | 839 | case ASTCENC_PRF_HDR_RGB_LDR_A: |
595 | 1.72k | case ASTCENC_PRF_HDR: |
596 | 1.72k | config.tune_db_limit = 999.0f; |
597 | 1.72k | config.tune_search_mode0_enable = 0.0f; |
598 | 1.72k | break; |
599 | 0 | default: |
600 | 0 | return ASTCENC_ERR_BAD_PROFILE; |
601 | 3.63k | } |
602 | | |
603 | | // Flags field must not contain any unknown flag bits |
604 | 3.63k | status = validate_flags(profile, flags); |
605 | 3.63k | if (status != ASTCENC_SUCCESS) |
606 | 49 | { |
607 | 49 | return status; |
608 | 49 | } |
609 | | |
610 | 3.58k | if (flags & ASTCENC_FLG_MAP_NORMAL) |
611 | 998 | { |
612 | | // Normal map encoding uses L+A blocks, so allow one more partitioning |
613 | | // than normal. We need need fewer bits for endpoints, so more likely |
614 | | // to be able to use more partitions than an RGB/RGBA block |
615 | 998 | config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); |
616 | | |
617 | 998 | config.cw_g_weight = 0.0f; |
618 | 998 | config.cw_b_weight = 0.0f; |
619 | 998 | config.tune_2partition_early_out_limit_factor *= 1.5f; |
620 | 998 | config.tune_3partition_early_out_limit_factor *= 1.5f; |
621 | 998 | config.tune_2plane_early_out_limit_correlation = 0.99f; |
622 | | |
623 | | // Normals are prone to blocking artifacts on smooth curves |
624 | | // so force compressor to try harder here ... |
625 | 998 | config.tune_db_limit *= 1.03f; |
626 | 998 | } |
627 | 2.58k | else if (flags & ASTCENC_FLG_MAP_RGBM) |
628 | 911 | { |
629 | 911 | config.rgbm_m_scale = 5.0f; |
630 | 911 | config.cw_a_weight = 2.0f * config.rgbm_m_scale; |
631 | 911 | } |
632 | 1.67k | else // (This is color data) |
633 | 1.67k | { |
634 | | // This is a very basic perceptual metric for RGB color data, which weights error |
635 | | // significance by the perceptual luminance contribution of each color channel. For |
636 | | // luminance the usual weights to compute luminance from a linear RGB value are as |
637 | | // follows: |
638 | | // |
639 | | // l = r * 0.3 + g * 0.59 + b * 0.11 |
640 | | // |
641 | | // ... but we scale these up to keep a better balance between color and alpha. Note |
642 | | // that if the content is using alpha we'd recommend using the -a option to weight |
643 | | // the color contribution by the alpha transparency. |
644 | 1.67k | if (flags & ASTCENC_FLG_USE_PERCEPTUAL) |
645 | 242 | { |
646 | 242 | config.cw_r_weight = 0.30f * 2.25f; |
647 | 242 | config.cw_g_weight = 0.59f * 2.25f; |
648 | 242 | config.cw_b_weight = 0.11f * 2.25f; |
649 | 242 | } |
650 | 1.67k | } |
651 | 3.58k | config.flags = flags; |
652 | | |
653 | 3.58k | return ASTCENC_SUCCESS; |
654 | 3.63k | } |
655 | | |
656 | | /* See header for documentation. */ |
657 | | astcenc_error astcenc_context_alloc( |
658 | | const astcenc_config* configp, |
659 | | unsigned int thread_count, |
660 | | astcenc_context** context, |
661 | | const astcenc_context* parent_context |
662 | 3.58k | ) { |
663 | 3.58k | astcenc_error status; |
664 | | |
665 | 3.58k | status = validate_cpu_float(); |
666 | 3.58k | if (status != ASTCENC_SUCCESS) |
667 | 0 | { |
668 | 0 | return status; |
669 | 0 | } |
670 | | |
671 | 3.58k | if (thread_count == 0) |
672 | 0 | { |
673 | 0 | return ASTCENC_ERR_BAD_PARAM; |
674 | 0 | } |
675 | | |
676 | | #if defined(ASTCENC_DIAGNOSTICS) |
677 | | // Force single threaded compressor use in diagnostic mode |
678 | | if (thread_count != 1) |
679 | | { |
680 | | return ASTCENC_ERR_BAD_PARAM; |
681 | | } |
682 | | #endif |
683 | | |
684 | | // Exactly one of config or parent_context must be set |
685 | 3.58k | bool has_config = configp != nullptr; |
686 | 3.58k | bool has_parent = parent_context != nullptr; |
687 | 3.58k | if (!(has_config ^ has_parent)) |
688 | 0 | { |
689 | 0 | return ASTCENC_ERR_BAD_PARAM; |
690 | 0 | } |
691 | | |
692 | 3.58k | if (has_parent) |
693 | 0 | { |
694 | 0 | configp = &parent_context->context.config; |
695 | 0 | } |
696 | | |
697 | 3.58k | const astcenc_config& config = *configp; |
698 | 3.58k | astcenc_context* ctxo = new astcenc_context; |
699 | 3.58k | astcenc_contexti* ctx = &ctxo->context; |
700 | 3.58k | ctx->thread_count = thread_count; |
701 | 3.58k | ctx->config = *configp; |
702 | 3.58k | ctx->working_buffers = nullptr; |
703 | | |
704 | | // These are allocated per-compress, as they depend on image size |
705 | 3.58k | ctx->input_alpha_averages = nullptr; |
706 | | |
707 | | // Copy the config first and validate the copy (we may modify it) |
708 | 3.58k | status = validate_config(ctx->config); |
709 | 3.58k | if (status != ASTCENC_SUCCESS) |
710 | 0 | { |
711 | 0 | delete ctxo; |
712 | 0 | return status; |
713 | 0 | } |
714 | | |
715 | 3.58k | if (!parent_context) |
716 | 3.58k | { |
717 | 3.58k | block_size_descriptor* bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN); |
718 | 3.58k | if (!bsd) |
719 | 0 | { |
720 | 0 | delete ctxo; |
721 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
722 | 0 | } |
723 | | |
724 | 3.58k | bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); |
725 | 3.58k | init_block_size_descriptor(config.block_x, config.block_y, config.block_z, |
726 | 3.58k | can_omit_modes, |
727 | 3.58k | config.tune_partition_count_limit, |
728 | 3.58k | static_cast<float>(config.tune_block_mode_limit) / 100.0f, |
729 | 3.58k | *bsd); |
730 | | |
731 | 3.58k | ctx->owns_bsd = true; |
732 | 3.58k | ctx->bsd = bsd; |
733 | 3.58k | } |
734 | 0 | else |
735 | 0 | { |
736 | 0 | ctx->owns_bsd = false; |
737 | 0 | ctx->bsd = parent_context->context.bsd; |
738 | 0 | } |
739 | | |
740 | 3.58k | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
741 | | // Do setup only needed by compression |
742 | 3.58k | if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) |
743 | 3.27k | { |
744 | | // Turn a dB limit into a per-texel error for faster use later |
745 | 3.27k | if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) |
746 | 1.70k | { |
747 | 1.70k | ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; |
748 | 1.70k | } |
749 | 1.57k | else |
750 | 1.57k | { |
751 | 1.57k | ctx->config.tune_db_limit = 0.0f; |
752 | 1.57k | } |
753 | | |
754 | 3.27k | size_t worksize = sizeof(compression_working_buffers) * thread_count; |
755 | 3.27k | ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN); |
756 | 3.27k | static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0), |
757 | 3.27k | "compression_working_buffers size must be multiple of vector alignment"); |
758 | 3.27k | if (!ctx->working_buffers) |
759 | 0 | { |
760 | 0 | if (ctx->owns_bsd) |
761 | 0 | { |
762 | 0 | aligned_free<const block_size_descriptor>(ctx->bsd); |
763 | 0 | } |
764 | 0 | delete ctxo; |
765 | 0 | *context = nullptr; |
766 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
767 | 0 | } |
768 | 3.27k | } |
769 | 3.58k | #endif |
770 | | |
771 | | #if defined(ASTCENC_DIAGNOSTICS) |
772 | | ctx->trace_log = new TraceLog(ctx->config.trace_file_path); |
773 | | if (!ctx->trace_log->m_file) |
774 | | { |
775 | | return ASTCENC_ERR_DTRACE_FAILURE; |
776 | | } |
777 | | |
778 | | trace_add_data("block_x", config.block_x); |
779 | | trace_add_data("block_y", config.block_y); |
780 | | trace_add_data("block_z", config.block_z); |
781 | | #endif |
782 | | |
783 | 3.58k | *context = ctxo; |
784 | | |
785 | 3.58k | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
786 | 3.58k | prepare_angular_tables(); |
787 | 3.58k | #endif |
788 | | |
789 | 3.58k | return ASTCENC_SUCCESS; |
790 | 3.58k | } |
791 | | |
792 | | /* See header dor documentation. */ |
793 | | void astcenc_context_free( |
794 | | astcenc_context* ctxo |
795 | 3.58k | ) { |
796 | 3.58k | if (ctxo) |
797 | 3.58k | { |
798 | 3.58k | astcenc_contexti* ctx = &ctxo->context; |
799 | 3.58k | aligned_free<compression_working_buffers>(ctx->working_buffers); |
800 | 3.58k | if (ctx->owns_bsd) |
801 | 3.58k | { |
802 | 3.58k | aligned_free<const block_size_descriptor>(ctx->bsd); |
803 | 3.58k | } |
804 | | #if defined(ASTCENC_DIAGNOSTICS) |
805 | | delete ctx->trace_log; |
806 | | #endif |
807 | 3.58k | delete ctxo; |
808 | 3.58k | } |
809 | 3.58k | } |
810 | | |
811 | | #if !defined(ASTCENC_DECOMPRESS_ONLY) |
812 | | |
813 | | /** |
814 | | * @brief Compress an image, after any preflight has completed. |
815 | | * |
816 | | * @param[out] ctxo The compressor context. |
817 | | * @param thread_index The thread index. |
818 | | * @param image The intput image. |
819 | | * @param swizzle The input swizzle. |
820 | | * @param[out] buffer The output array for the compressed data. |
821 | | */ |
822 | | static void compress_image( |
823 | | astcenc_context& ctxo, |
824 | | unsigned int thread_index, |
825 | | const astcenc_image& image, |
826 | | const astcenc_swizzle& swizzle, |
827 | | uint8_t* buffer |
828 | 2.21k | ) { |
829 | 2.21k | astcenc_contexti& ctx = ctxo.context; |
830 | 2.21k | const block_size_descriptor& bsd = *ctx.bsd; |
831 | 2.21k | astcenc_profile decode_mode = ctx.config.profile; |
832 | | |
833 | 2.21k | image_block blk; |
834 | | |
835 | 2.21k | int block_x = bsd.xdim; |
836 | 2.21k | int block_y = bsd.ydim; |
837 | 2.21k | int block_z = bsd.zdim; |
838 | 2.21k | blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); |
839 | | |
840 | 2.21k | int dim_x = image.dim_x; |
841 | 2.21k | int dim_y = image.dim_y; |
842 | 2.21k | int dim_z = image.dim_z; |
843 | | |
844 | 2.21k | int xblocks = (dim_x + block_x - 1) / block_x; |
845 | 2.21k | int yblocks = (dim_y + block_y - 1) / block_y; |
846 | 2.21k | int zblocks = (dim_z + block_z - 1) / block_z; |
847 | 2.21k | int block_count = zblocks * yblocks * xblocks; |
848 | | |
849 | 2.21k | int row_blocks = xblocks; |
850 | 2.21k | int plane_blocks = xblocks * yblocks; |
851 | | |
852 | 2.21k | blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8; |
853 | | |
854 | | // Populate the block channel weights |
855 | 2.21k | blk.channel_weight = vfloat4(ctx.config.cw_r_weight, |
856 | 2.21k | ctx.config.cw_g_weight, |
857 | 2.21k | ctx.config.cw_b_weight, |
858 | 2.21k | ctx.config.cw_a_weight); |
859 | | |
860 | | // Use preallocated scratch buffer |
861 | 2.21k | auto& temp_buffers = ctx.working_buffers[thread_index]; |
862 | | |
863 | | // Only the first thread actually runs the initializer |
864 | 2.21k | ctxo.manage_compress.init(block_count, ctx.config.progress_callback); |
865 | | |
866 | | // Determine if we can use an optimized load function |
867 | 2.21k | bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) || |
868 | 2.21k | (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A); |
869 | | |
870 | 2.21k | bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) || |
871 | 1.70k | (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A); |
872 | | |
873 | 2.21k | bool use_fast_load = !needs_swz && !needs_hdr && |
874 | 1.09k | block_z == 1 && image.data_type == ASTCENC_TYPE_U8; |
875 | | |
876 | 2.21k | auto load_func = load_image_block; |
877 | 2.21k | if (use_fast_load) |
878 | 1.09k | { |
879 | 1.09k | load_func = load_image_block_fast_ldr; |
880 | 1.09k | } |
881 | | |
882 | | // All threads run this processing loop until there is no work remaining |
883 | 4.43k | while (true) |
884 | 4.43k | { |
885 | 4.43k | unsigned int count; |
886 | 4.43k | unsigned int base = ctxo.manage_compress.get_task_assignment(16, count); |
887 | 4.43k | if (!count) |
888 | 2.21k | { |
889 | 2.21k | break; |
890 | 2.21k | } |
891 | | |
892 | 4.43k | for (unsigned int i = base; i < base + count; i++) |
893 | 2.21k | { |
894 | | // Decode i into x, y, z block indices |
895 | 2.21k | int z = i / plane_blocks; |
896 | 2.21k | unsigned int rem = i - (z * plane_blocks); |
897 | 2.21k | int y = rem / row_blocks; |
898 | 2.21k | int x = rem - (y * row_blocks); |
899 | | |
900 | | // Test if we can apply some basic alpha-scale RDO |
901 | 2.21k | bool use_full_block = true; |
902 | 2.21k | if (ctx.config.a_scale_radius != 0 && block_z == 1) |
903 | 0 | { |
904 | 0 | int start_x = x * block_x; |
905 | 0 | int end_x = astc::min(dim_x, start_x + block_x); |
906 | |
|
907 | 0 | int start_y = y * block_y; |
908 | 0 | int end_y = astc::min(dim_y, start_y + block_y); |
909 | | |
910 | | // SATs accumulate error, so don't test exactly zero. Test for |
911 | | // less than 1 alpha in the expanded block footprint that |
912 | | // includes the alpha radius. |
913 | 0 | int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1); |
914 | |
|
915 | 0 | int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1); |
916 | |
|
917 | 0 | float footprint = static_cast<float>(x_footprint * y_footprint); |
918 | 0 | float threshold = 0.9f / (255.0f * footprint); |
919 | | |
920 | | // Do we have any alpha values? |
921 | 0 | use_full_block = false; |
922 | 0 | for (int ay = start_y; ay < end_y; ay++) |
923 | 0 | { |
924 | 0 | for (int ax = start_x; ax < end_x; ax++) |
925 | 0 | { |
926 | 0 | float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; |
927 | 0 | if (a_avg > threshold) |
928 | 0 | { |
929 | 0 | use_full_block = true; |
930 | 0 | ax = end_x; |
931 | 0 | ay = end_y; |
932 | 0 | } |
933 | 0 | } |
934 | 0 | } |
935 | 0 | } |
936 | | |
937 | | // Fetch the full block for compression |
938 | 2.21k | if (use_full_block) |
939 | 2.21k | { |
940 | 2.21k | load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); |
941 | | |
942 | | // Scale RGB error contribution by the maximum alpha in the block |
943 | | // This encourages preserving alpha accuracy in regions with high |
944 | | // transparency, and can buy up to 0.5 dB PSNR. |
945 | 2.21k | if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) |
946 | 820 | { |
947 | 820 | float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f); |
948 | 820 | blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale, |
949 | 820 | ctx.config.cw_g_weight * alpha_scale, |
950 | 820 | ctx.config.cw_b_weight * alpha_scale, |
951 | 820 | ctx.config.cw_a_weight); |
952 | 820 | } |
953 | 2.21k | } |
954 | | // Apply alpha scale RDO - substitute constant color block |
955 | 0 | else |
956 | 0 | { |
957 | 0 | blk.origin_texel = vfloat4::zero(); |
958 | 0 | blk.data_min = vfloat4::zero(); |
959 | 0 | blk.data_mean = vfloat4::zero(); |
960 | 0 | blk.data_max = vfloat4::zero(); |
961 | 0 | blk.grayscale = true; |
962 | 0 | } |
963 | | |
964 | 2.21k | int offset = ((z * yblocks + y) * xblocks + x) * 16; |
965 | 2.21k | uint8_t *bp = buffer + offset; |
966 | 2.21k | compress_block(ctx, blk, bp, temp_buffers); |
967 | 2.21k | } |
968 | | |
969 | 2.21k | ctxo.manage_compress.complete_task_assignment(count); |
970 | 2.21k | } |
971 | 2.21k | } |
972 | | |
973 | | /** |
974 | | * @brief Compute regional averages in an image. |
975 | | * |
976 | | * This function can be called by multiple threads, but only after a single |
977 | | * thread calls the setup function @c init_compute_averages(). |
978 | | * |
979 | | * Results are written back into @c img->input_alpha_averages. |
980 | | * |
981 | | * @param[out] ctx The context. |
982 | | * @param ag The average and variance arguments created during setup. |
983 | | */ |
984 | | static void compute_averages( |
985 | | astcenc_context& ctx, |
986 | | const avg_args &ag |
987 | 0 | ) { |
988 | 0 | pixel_region_args arg = ag.arg; |
989 | 0 | arg.work_memory = new vfloat4[ag.work_memory_size]; |
990 | |
|
991 | 0 | int size_x = ag.img_size_x; |
992 | 0 | int size_y = ag.img_size_y; |
993 | 0 | int size_z = ag.img_size_z; |
994 | |
|
995 | 0 | int step_xy = ag.blk_size_xy; |
996 | 0 | int step_z = ag.blk_size_z; |
997 | |
|
998 | 0 | int y_tasks = (size_y + step_xy - 1) / step_xy; |
999 | | |
1000 | | // All threads run this processing loop until there is no work remaining |
1001 | 0 | while (true) |
1002 | 0 | { |
1003 | 0 | unsigned int count; |
1004 | 0 | unsigned int base = ctx.manage_avg.get_task_assignment(16, count); |
1005 | 0 | if (!count) |
1006 | 0 | { |
1007 | 0 | break; |
1008 | 0 | } |
1009 | | |
1010 | 0 | for (unsigned int i = base; i < base + count; i++) |
1011 | 0 | { |
1012 | 0 | int z = (i / (y_tasks)) * step_z; |
1013 | 0 | int y = (i - (z * y_tasks)) * step_xy; |
1014 | |
|
1015 | 0 | arg.size_z = astc::min(step_z, size_z - z); |
1016 | 0 | arg.offset_z = z; |
1017 | |
|
1018 | 0 | arg.size_y = astc::min(step_xy, size_y - y); |
1019 | 0 | arg.offset_y = y; |
1020 | |
|
1021 | 0 | for (int x = 0; x < size_x; x += step_xy) |
1022 | 0 | { |
1023 | 0 | arg.size_x = astc::min(step_xy, size_x - x); |
1024 | 0 | arg.offset_x = x; |
1025 | 0 | compute_pixel_region_variance(ctx.context, arg); |
1026 | 0 | } |
1027 | 0 | } |
1028 | |
|
1029 | 0 | ctx.manage_avg.complete_task_assignment(count); |
1030 | 0 | } |
1031 | |
|
1032 | 0 | delete[] arg.work_memory; |
1033 | 0 | } |
1034 | | |
1035 | | #endif |
1036 | | |
1037 | | /* See header for documentation. */ |
1038 | | astcenc_error astcenc_compress_image( |
1039 | | astcenc_context* ctxo, |
1040 | | astcenc_image* imagep, |
1041 | | const astcenc_swizzle* swizzle, |
1042 | | uint8_t* data_out, |
1043 | | size_t data_len, |
1044 | | unsigned int thread_index |
1045 | 2.21k | ) { |
1046 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1047 | | (void)ctxo; |
1048 | | (void)imagep; |
1049 | | (void)swizzle; |
1050 | | (void)data_out; |
1051 | | (void)data_len; |
1052 | | (void)thread_index; |
1053 | | return ASTCENC_ERR_BAD_CONTEXT; |
1054 | | #else |
1055 | 2.21k | astcenc_contexti* ctx = &ctxo->context; |
1056 | 2.21k | astcenc_error status; |
1057 | 2.21k | astcenc_image& image = *imagep; |
1058 | | |
1059 | 2.21k | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1060 | 0 | { |
1061 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1062 | 0 | } |
1063 | | |
1064 | 2.21k | status = validate_compression_swizzle(*swizzle); |
1065 | 2.21k | if (status != ASTCENC_SUCCESS) |
1066 | 0 | { |
1067 | 0 | return status; |
1068 | 0 | } |
1069 | | |
1070 | 2.21k | if (thread_index >= ctx->thread_count) |
1071 | 0 | { |
1072 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1073 | 0 | } |
1074 | | |
1075 | 2.21k | unsigned int block_x = ctx->config.block_x; |
1076 | 2.21k | unsigned int block_y = ctx->config.block_y; |
1077 | 2.21k | unsigned int block_z = ctx->config.block_z; |
1078 | | |
1079 | 2.21k | unsigned int xblocks = (image.dim_x + block_x - 1) / block_x; |
1080 | 2.21k | unsigned int yblocks = (image.dim_y + block_y - 1) / block_y; |
1081 | 2.21k | unsigned int zblocks = (image.dim_z + block_z - 1) / block_z; |
1082 | | |
1083 | | // Check we have enough output space (16 bytes per block) |
1084 | 2.21k | size_t size_needed = xblocks * yblocks * zblocks * 16; |
1085 | 2.21k | if (data_len < size_needed) |
1086 | 0 | { |
1087 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
1088 | 0 | } |
1089 | | |
1090 | | // If context thread count is one then implicitly reset |
1091 | 2.21k | if (ctx->thread_count == 1) |
1092 | 2.21k | { |
1093 | 2.21k | astcenc_compress_reset(ctxo); |
1094 | 2.21k | } |
1095 | | |
1096 | 2.21k | if (ctx->config.a_scale_radius != 0) |
1097 | 0 | { |
1098 | | // First thread to enter will do setup, other threads will subsequently |
1099 | | // enter the critical section but simply skip over the initialization |
1100 | 0 | auto init_avg = [ctx, &image, swizzle]() { |
1101 | | // Perform memory allocations for the destination buffers |
1102 | 0 | size_t texel_count = image.dim_x * image.dim_y * image.dim_z; |
1103 | 0 | ctx->input_alpha_averages = new float[texel_count]; |
1104 | |
|
1105 | 0 | return init_compute_averages( |
1106 | 0 | image, ctx->config.a_scale_radius, *swizzle, |
1107 | 0 | ctx->avg_preprocess_args); |
1108 | 0 | }; |
1109 | | |
1110 | | // Only the first thread actually runs the initializer |
1111 | 0 | ctxo->manage_avg.init(init_avg); |
1112 | | |
1113 | | // All threads will enter this function and dynamically grab work |
1114 | 0 | compute_averages(*ctxo, ctx->avg_preprocess_args); |
1115 | 0 | } |
1116 | | |
1117 | | // Wait for compute_averages to complete before compressing |
1118 | 2.21k | ctxo->manage_avg.wait(); |
1119 | | |
1120 | 2.21k | compress_image(*ctxo, thread_index, image, *swizzle, data_out); |
1121 | | |
1122 | | // Wait for compress to complete before freeing memory |
1123 | 2.21k | ctxo->manage_compress.wait(); |
1124 | | |
1125 | 2.21k | auto term_compress = [ctx]() { |
1126 | 2.21k | delete[] ctx->input_alpha_averages; |
1127 | 2.21k | ctx->input_alpha_averages = nullptr; |
1128 | 2.21k | }; |
1129 | | |
1130 | | // Only the first thread to arrive actually runs the term |
1131 | 2.21k | ctxo->manage_compress.term(term_compress); |
1132 | | |
1133 | 2.21k | return ASTCENC_SUCCESS; |
1134 | 2.21k | #endif |
1135 | 2.21k | } |
1136 | | |
1137 | | /* See header for documentation. */ |
1138 | | astcenc_error astcenc_compress_reset( |
1139 | | astcenc_context* ctxo |
1140 | 2.21k | ) { |
1141 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1142 | | (void)ctxo; |
1143 | | return ASTCENC_ERR_BAD_CONTEXT; |
1144 | | #else |
1145 | 2.21k | astcenc_contexti* ctx = &ctxo->context; |
1146 | 2.21k | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1147 | 0 | { |
1148 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1149 | 0 | } |
1150 | | |
1151 | 2.21k | ctxo->manage_avg.reset(); |
1152 | 2.21k | ctxo->manage_compress.reset(); |
1153 | 2.21k | return ASTCENC_SUCCESS; |
1154 | 2.21k | #endif |
1155 | 2.21k | } |
1156 | | |
1157 | | /* See header for documentation. */ |
1158 | | astcenc_error astcenc_compress_cancel( |
1159 | | astcenc_context* ctxo |
1160 | 0 | ) { |
1161 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1162 | | (void)ctxo; |
1163 | | return ASTCENC_ERR_BAD_CONTEXT; |
1164 | | #else |
1165 | 0 | astcenc_contexti* ctx = &ctxo->context; |
1166 | 0 | if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) |
1167 | 0 | { |
1168 | 0 | return ASTCENC_ERR_BAD_CONTEXT; |
1169 | 0 | } |
1170 | | |
1171 | | // Cancel compression before cancelling avg. This avoids the race condition |
1172 | | // where cancelling them in the other order could see a compression worker |
1173 | | // starting to process even though some of the avg data is undefined. |
1174 | 0 | ctxo->manage_compress.cancel(); |
1175 | 0 | ctxo->manage_avg.cancel(); |
1176 | 0 | return ASTCENC_SUCCESS; |
1177 | 0 | #endif |
1178 | 0 | } |
1179 | | |
1180 | | /* See header for documentation. */ |
1181 | | astcenc_error astcenc_decompress_image( |
1182 | | astcenc_context* ctxo, |
1183 | | const uint8_t* data, |
1184 | | size_t data_len, |
1185 | | astcenc_image* image_outp, |
1186 | | const astcenc_swizzle* swizzle, |
1187 | | unsigned int thread_index |
1188 | 1.11k | ) { |
1189 | 1.11k | astcenc_error status; |
1190 | 1.11k | astcenc_image& image_out = *image_outp; |
1191 | 1.11k | astcenc_contexti* ctx = &ctxo->context; |
1192 | | |
1193 | | // Today this doesn't matter (working set on stack) but might in future ... |
1194 | 1.11k | if (thread_index >= ctx->thread_count) |
1195 | 0 | { |
1196 | 0 | return ASTCENC_ERR_BAD_PARAM; |
1197 | 0 | } |
1198 | | |
1199 | 1.11k | status = validate_decompression_swizzle(*swizzle); |
1200 | 1.11k | if (status != ASTCENC_SUCCESS) |
1201 | 0 | { |
1202 | 0 | return status; |
1203 | 0 | } |
1204 | | |
1205 | 1.11k | unsigned int block_x = ctx->config.block_x; |
1206 | 1.11k | unsigned int block_y = ctx->config.block_y; |
1207 | 1.11k | unsigned int block_z = ctx->config.block_z; |
1208 | | |
1209 | 1.11k | unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; |
1210 | 1.11k | unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; |
1211 | 1.11k | unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; |
1212 | 1.11k | unsigned int block_count = zblocks * yblocks * xblocks; |
1213 | | |
1214 | 1.11k | int row_blocks = xblocks; |
1215 | 1.11k | int plane_blocks = xblocks * yblocks; |
1216 | | |
1217 | | // Check we have enough output space (16 bytes per block) |
1218 | 1.11k | size_t size_needed = xblocks * yblocks * zblocks * 16; |
1219 | 1.11k | if (data_len < size_needed) |
1220 | 0 | { |
1221 | 0 | return ASTCENC_ERR_OUT_OF_MEM; |
1222 | 0 | } |
1223 | | |
1224 | 1.11k | image_block blk {}; |
1225 | 1.11k | blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); |
1226 | | |
1227 | | // Decode mode inferred from the output data type |
1228 | 1.11k | blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8; |
1229 | | |
1230 | | // If context thread count is one then implicitly reset |
1231 | 1.11k | if (ctx->thread_count == 1) |
1232 | 1.11k | { |
1233 | 1.11k | astcenc_decompress_reset(ctxo); |
1234 | 1.11k | } |
1235 | | |
1236 | | // Only the first thread actually runs the initializer |
1237 | 1.11k | ctxo->manage_decompress.init(block_count, nullptr); |
1238 | | |
1239 | | // All threads run this processing loop until there is no work remaining |
1240 | 2.22k | while (true) |
1241 | 2.22k | { |
1242 | 2.22k | unsigned int count; |
1243 | 2.22k | unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count); |
1244 | 2.22k | if (!count) |
1245 | 1.11k | { |
1246 | 1.11k | break; |
1247 | 1.11k | } |
1248 | | |
1249 | 5.06k | for (unsigned int i = base; i < base + count; i++) |
1250 | 3.95k | { |
1251 | | // Decode i into x, y, z block indices |
1252 | 3.95k | int z = i / plane_blocks; |
1253 | 3.95k | unsigned int rem = i - (z * plane_blocks); |
1254 | 3.95k | int y = rem / row_blocks; |
1255 | 3.95k | int x = rem - (y * row_blocks); |
1256 | | |
1257 | 3.95k | unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; |
1258 | 3.95k | const uint8_t* bp = data + offset; |
1259 | | |
1260 | 3.95k | symbolic_compressed_block scb; |
1261 | | |
1262 | 3.95k | physical_to_symbolic(*ctx->bsd, bp, scb); |
1263 | | |
1264 | 3.95k | decompress_symbolic_block(ctx->config.profile, *ctx->bsd, |
1265 | 3.95k | x * block_x, y * block_y, z * block_z, |
1266 | 3.95k | scb, blk); |
1267 | | |
1268 | 3.95k | store_image_block(image_out, blk, *ctx->bsd, |
1269 | 3.95k | x * block_x, y * block_y, z * block_z, *swizzle); |
1270 | 3.95k | } |
1271 | | |
1272 | 1.11k | ctxo->manage_decompress.complete_task_assignment(count); |
1273 | 1.11k | } |
1274 | | |
1275 | 1.11k | return ASTCENC_SUCCESS; |
1276 | 1.11k | } |
1277 | | |
1278 | | /* See header for documentation. */ |
1279 | | astcenc_error astcenc_decompress_reset( |
1280 | | astcenc_context* ctxo |
1281 | 1.11k | ) { |
1282 | 1.11k | ctxo->manage_decompress.reset(); |
1283 | 1.11k | return ASTCENC_SUCCESS; |
1284 | 1.11k | } |
1285 | | |
1286 | | /* See header for documentation. */ |
1287 | | astcenc_error astcenc_get_block_info( |
1288 | | astcenc_context* ctxo, |
1289 | | const uint8_t data[16], |
1290 | | astcenc_block_info* info |
1291 | 0 | ) { |
1292 | | #if defined(ASTCENC_DECOMPRESS_ONLY) |
1293 | | (void)ctxo; |
1294 | | (void)data; |
1295 | | (void)info; |
1296 | | return ASTCENC_ERR_BAD_CONTEXT; |
1297 | | #else |
1298 | 0 | astcenc_contexti* ctx = &ctxo->context; |
1299 | | |
1300 | | // Decode the compressed data into a symbolic form |
1301 | 0 | symbolic_compressed_block scb; |
1302 | 0 | physical_to_symbolic(*ctx->bsd, data, scb); |
1303 | | |
1304 | | // Fetch the appropriate partition and decimation tables |
1305 | 0 | const block_size_descriptor& bsd = *ctx->bsd; |
1306 | | |
1307 | | // Start from a clean slate |
1308 | 0 | memset(info, 0, sizeof(*info)); |
1309 | | |
1310 | | // Basic info we can always populate |
1311 | 0 | info->profile = ctx->config.profile; |
1312 | |
|
1313 | 0 | info->block_x = ctx->config.block_x; |
1314 | 0 | info->block_y = ctx->config.block_y; |
1315 | 0 | info->block_z = ctx->config.block_z; |
1316 | 0 | info->texel_count = bsd.texel_count; |
1317 | | |
1318 | | // Check for error blocks first |
1319 | 0 | info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; |
1320 | 0 | if (info->is_error_block) |
1321 | 0 | { |
1322 | 0 | return ASTCENC_SUCCESS; |
1323 | 0 | } |
1324 | | |
1325 | | // Check for constant color blocks second |
1326 | 0 | info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || |
1327 | 0 | scb.block_type == SYM_BTYPE_CONST_U16; |
1328 | 0 | if (info->is_constant_block) |
1329 | 0 | { |
1330 | 0 | return ASTCENC_SUCCESS; |
1331 | 0 | } |
1332 | | |
1333 | | // Otherwise handle a full block ; known to be valid after conditions above have been checked |
1334 | 0 | int partition_count = scb.partition_count; |
1335 | 0 | const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); |
1336 | |
|
1337 | 0 | const block_mode& bm = bsd.get_block_mode(scb.block_mode); |
1338 | 0 | const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); |
1339 | |
|
1340 | 0 | info->weight_x = di.weight_x; |
1341 | 0 | info->weight_y = di.weight_y; |
1342 | 0 | info->weight_z = di.weight_z; |
1343 | |
|
1344 | 0 | info->is_dual_plane_block = bm.is_dual_plane != 0; |
1345 | |
|
1346 | 0 | info->partition_count = scb.partition_count; |
1347 | 0 | info->partition_index = scb.partition_index; |
1348 | 0 | info->dual_plane_component = scb.plane2_component; |
1349 | |
|
1350 | 0 | info->color_level_count = get_quant_level(scb.get_color_quant_mode()); |
1351 | 0 | info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); |
1352 | | |
1353 | | // Unpack color endpoints for each active partition |
1354 | 0 | for (unsigned int i = 0; i < scb.partition_count; i++) |
1355 | 0 | { |
1356 | 0 | bool rgb_hdr; |
1357 | 0 | bool a_hdr; |
1358 | 0 | vint4 endpnt[2]; |
1359 | |
|
1360 | 0 | unpack_color_endpoints(ctx->config.profile, |
1361 | 0 | scb.color_formats[i], |
1362 | 0 | scb.color_values[i], |
1363 | 0 | rgb_hdr, a_hdr, |
1364 | 0 | endpnt[0], endpnt[1]); |
1365 | | |
1366 | | // Store the color endpoint mode info |
1367 | 0 | info->color_endpoint_modes[i] = scb.color_formats[i]; |
1368 | 0 | info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr; |
1369 | | |
1370 | | // Store the unpacked and decoded color endpoint |
1371 | 0 | vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); |
1372 | 0 | for (int j = 0; j < 2; j++) |
1373 | 0 | { |
1374 | 0 | vint4 color_lns = lns_to_sf16(endpnt[j]); |
1375 | 0 | vint4 color_unorm = unorm16_to_sf16(endpnt[j]); |
1376 | 0 | vint4 datai = select(color_unorm, color_lns, hdr_mask); |
1377 | 0 | store(float16_to_float(datai), info->color_endpoints[i][j]); |
1378 | 0 | } |
1379 | 0 | } |
1380 | | |
1381 | | // Unpack weights for each texel |
1382 | 0 | int weight_plane1[BLOCK_MAX_TEXELS]; |
1383 | 0 | int weight_plane2[BLOCK_MAX_TEXELS]; |
1384 | |
|
1385 | 0 | unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); |
1386 | 0 | for (unsigned int i = 0; i < bsd.texel_count; i++) |
1387 | 0 | { |
1388 | 0 | info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); |
1389 | 0 | if (info->is_dual_plane_block) |
1390 | 0 | { |
1391 | 0 | info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM); |
1392 | 0 | } |
1393 | 0 | } |
1394 | | |
1395 | | // Unpack partition assignments for each texel |
1396 | 0 | for (unsigned int i = 0; i < bsd.texel_count; i++) |
1397 | 0 | { |
1398 | 0 | info->partition_assignment[i] = pi.partition_of_texel[i]; |
1399 | 0 | } |
1400 | |
|
1401 | 0 | return ASTCENC_SUCCESS; |
1402 | 0 | #endif |
1403 | 0 | } |
1404 | | |
1405 | | /* See header for documentation. */ |
1406 | | const char* astcenc_get_error_string( |
1407 | | astcenc_error status |
1408 | 0 | ) { |
1409 | | // Values in this enum are from an external user, so not guaranteed to be |
1410 | | // bounded to the enum values |
1411 | 0 | switch (static_cast<int>(status)) |
1412 | 0 | { |
1413 | 0 | case ASTCENC_SUCCESS: |
1414 | 0 | return "ASTCENC_SUCCESS"; |
1415 | 0 | case ASTCENC_ERR_OUT_OF_MEM: |
1416 | 0 | return "ASTCENC_ERR_OUT_OF_MEM"; |
1417 | 0 | case ASTCENC_ERR_BAD_CPU_FLOAT: |
1418 | 0 | return "ASTCENC_ERR_BAD_CPU_FLOAT"; |
1419 | 0 | case ASTCENC_ERR_BAD_PARAM: |
1420 | 0 | return "ASTCENC_ERR_BAD_PARAM"; |
1421 | 0 | case ASTCENC_ERR_BAD_BLOCK_SIZE: |
1422 | 0 | return "ASTCENC_ERR_BAD_BLOCK_SIZE"; |
1423 | 0 | case ASTCENC_ERR_BAD_PROFILE: |
1424 | 0 | return "ASTCENC_ERR_BAD_PROFILE"; |
1425 | 0 | case ASTCENC_ERR_BAD_QUALITY: |
1426 | 0 | return "ASTCENC_ERR_BAD_QUALITY"; |
1427 | 0 | case ASTCENC_ERR_BAD_FLAGS: |
1428 | 0 | return "ASTCENC_ERR_BAD_FLAGS"; |
1429 | 0 | case ASTCENC_ERR_BAD_SWIZZLE: |
1430 | 0 | return "ASTCENC_ERR_BAD_SWIZZLE"; |
1431 | 0 | case ASTCENC_ERR_BAD_CONTEXT: |
1432 | 0 | return "ASTCENC_ERR_BAD_CONTEXT"; |
1433 | 0 | case ASTCENC_ERR_NOT_IMPLEMENTED: |
1434 | 0 | return "ASTCENC_ERR_NOT_IMPLEMENTED"; |
1435 | 0 | case ASTCENC_ERR_BAD_DECODE_MODE: |
1436 | 0 | return "ASTCENC_ERR_BAD_DECODE_MODE"; |
1437 | | #if defined(ASTCENC_DIAGNOSTICS) |
1438 | | case ASTCENC_ERR_DTRACE_FAILURE: |
1439 | | return "ASTCENC_ERR_DTRACE_FAILURE"; |
1440 | | #endif |
1441 | 0 | default: |
1442 | 0 | return nullptr; |
1443 | 0 | } |
1444 | 0 | } |