Coverage Report

Created: 2026-06-04 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/astc-encoder/Source/astcenc_entry.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2026 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Functions for the library entrypoint.
20
 */
21
22
#include <array>
23
#include <cstring>
24
#include <new>
25
26
#include "astcenc.h"
27
#include "astcenc_diagnostic_trace.h"
28
#include "astcenc_internal_entry.h"
29
#include "astcenc_mathlib.h"
30
31
/**
32
 * @brief Record of the quality tuning parameter values.
33
 *
34
 * See the @c astcenc_config structure for detailed parameter documentation.
35
 *
36
 * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
37
 * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
38
 * for the more through search presets because the underlying db_limit is so much higher.
39
 */
40
struct astcenc_preset_config
41
{
42
  float quality;
43
  unsigned int tune_partition_count_limit;
44
  unsigned int tune_2partition_index_limit;
45
  unsigned int tune_3partition_index_limit;
46
  unsigned int tune_4partition_index_limit;
47
  unsigned int tune_block_mode_limit;
48
  unsigned int tune_refinement_limit;
49
  unsigned int tune_candidate_limit;
50
  unsigned int tune_2partitioning_candidate_limit;
51
  unsigned int tune_3partitioning_candidate_limit;
52
  unsigned int tune_4partitioning_candidate_limit;
53
  float tune_db_limit_a_base;
54
  float tune_db_limit_b_base;
55
  float tune_mse_overshoot;
56
  float tune_2partition_early_out_limit_factor;
57
  float tune_3partition_early_out_limit_factor;
58
  float tune_2plane_early_out_limit_correlation;
59
  float tune_search_mode0_enable;
60
};
61
62
/**
63
 * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
64
 */
65
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
66
  {
67
    ASTCENC_PRE_FASTEST,
68
    2,  10,   6,   4,  43, 2, 2, 2, 2, 2,  85.2f,  63.2f,  3.5f, 1.00f, 1.00f, 0.85f, 0.0f
69
  }, {
70
    ASTCENC_PRE_FAST,
71
    3,  18,  10,   8,  55, 3, 3, 2, 2, 2,  85.2f,  63.2f,  3.5f, 1.00f, 1.00f, 0.90f, 0.0f
72
  }, {
73
    ASTCENC_PRE_MEDIUM,
74
    4,  34,  28,  16,  77, 3, 3, 2, 2, 2,  95.0f,  70.0f,  2.5f, 1.10f, 1.05f, 0.95f, 0.0f
75
  }, {
76
    ASTCENC_PRE_THOROUGH,
77
    4,  82,  60,  30,  94, 4, 4, 3, 2, 2, 105.0f,  77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
78
  }, {
79
    ASTCENC_PRE_VERYTHOROUGH,
80
    4, 256, 128,  64,  98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 0.0f
81
  }, {
82
    ASTCENC_PRE_EXHAUSTIVE,
83
    4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 0.0f
84
  }
85
}};
86
87
/**
88
 * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
89
 */
90
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
91
  {
92
    ASTCENC_PRE_FASTEST,
93
    2,  10,   6,   4,  43, 2, 2, 2, 2, 2,  85.2f,  63.2f,  3.5f, 1.00f, 1.00f, 0.80f, 1.0f
94
  }, {
95
    ASTCENC_PRE_FAST,
96
    3,  18,  12,  10,  55, 3, 3, 2, 2, 2,  85.2f,  63.2f,  3.5f, 1.00f, 1.00f, 0.85f, 1.0f
97
  }, {
98
    ASTCENC_PRE_MEDIUM,
99
    3,  34,  28,  16,  77, 3, 3, 2, 2, 2,  95.0f,  70.0f,  3.0f, 1.10f, 1.05f, 0.90f, 1.0f
100
  }, {
101
    ASTCENC_PRE_THOROUGH,
102
    4,  82,  60,  30,  94, 4, 4, 3, 2, 2, 105.0f,  77.0f, 10.0f, 1.40f, 1.20f, 0.95f, 0.0f
103
  }, {
104
    ASTCENC_PRE_VERYTHOROUGH,
105
    4, 256, 128,  64,  98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 0.0f
106
  }, {
107
    ASTCENC_PRE_EXHAUSTIVE,
108
    4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 0.0f
109
  }
110
}};
111
112
/**
113
 * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
114
 */
115
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
116
  {
117
    ASTCENC_PRE_FASTEST,
118
    2,  10,   6,   4,  40, 2, 2, 2, 2, 2,  85.0f,  63.0f,  3.5f, 1.00f, 1.00f, 0.80f, 1.0f
119
  }, {
120
    ASTCENC_PRE_FAST,
121
    2,  18,  12,  10,  55, 3, 3, 2, 2, 2,  85.0f,  63.0f,  3.5f, 1.00f, 1.00f, 0.85f, 1.0f
122
  }, {
123
    ASTCENC_PRE_MEDIUM,
124
    3,  34,  28,  16,  77, 3, 3, 2, 2, 2,  95.0f,  70.0f,  3.5f, 1.10f, 1.05f, 0.90f, 1.0f
125
  }, {
126
    ASTCENC_PRE_THOROUGH,
127
    4,  82,  60,  30,  93, 4, 4, 3, 2, 2, 105.0f,  77.0f, 10.0f, 1.30f, 1.20f, 0.97f, 1.0f
128
  }, {
129
    ASTCENC_PRE_VERYTHOROUGH,
130
    4, 256, 128,  64,  98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.60f, 1.40f, 0.98f, 1.0f
131
  }, {
132
    ASTCENC_PRE_EXHAUSTIVE,
133
    4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.00f, 2.00f, 0.99f, 1.0f
134
  }
135
}};
136
137
/**
138
 * @brief Get the total number of texels in an image.
139
 *
140
 * This function validates that the total size would fit in a size_t and returns
141
 * 0 if it does not.
142
 *
143
 * @param texels_x   Number of texels in the X axis.
144
 * @param texels_y   Number of texels in the Y axis.
145
 * @param texels_z   Number of texels in the Z axis.
146
 *
147
 * @return The number of texels in the image, or zero if total size would not
148
 *         fit into a size_t.
149
 */
150
static size_t get_texels_count(
151
  size_t texels_x,
152
  size_t texels_y,
153
  size_t texels_z
154
3.34k
) {
155
3.34k
  bool overflow { false };
156
157
  // Compute texel count
158
3.34k
  size_t texels_count = astc::mul_safe(texels_x, texels_y, overflow);
159
3.34k
  texels_count = astc::mul_safe(texels_count, texels_z, overflow);
160
161
3.34k
  if (overflow)
162
0
  {
163
0
    return 0;
164
0
  }
165
166
3.34k
  return texels_count;
167
3.34k
}
168
169
/**
170
 * @brief Get the total number of blocks in an image.
171
 *
172
 * This function also validates that the total size of the compressed image,
173
 * in bytes, would fit in a size_t.
174
 *
175
 * @param blocks_x   Number of blocks in the X axis.
176
 * @param blocks_y   Number of blocks in the Y axis.
177
 * @param blocks_z   Number of blocks in the Z axis.
178
 *
179
 * @return The number of blocks in the image, or zero if total size would not
180
 *         fit into a size_t.
181
 */
182
static size_t get_blocks_count(
183
  size_t blocks_x,
184
  size_t blocks_y,
185
  size_t blocks_z
186
5.58k
) {
187
5.58k
  bool overflow { false };
188
189
  // Compute block count
190
5.58k
  size_t blocks_count = astc::mul_safe(blocks_x, blocks_y, overflow);
191
5.58k
  blocks_count = astc::mul_safe(blocks_count, blocks_z, overflow);
192
193
  // Also compute byte count, but we only use overflow and not the result
194
5.58k
  astc::mul_safe(blocks_count, 16, overflow);
195
196
5.58k
  if (overflow)
197
0
  {
198
0
    return 0;
199
0
  }
200
201
5.58k
  return blocks_count;
202
5.58k
}
203
204
/**
205
 * @brief Validate CPU floating point meets assumptions made in the codec.
206
 *
207
 * The codec is written with the assumption that float bit patterns are valid
208
 * IEEE754 values that are stored and reloaded with round-to-nearest rounding.
209
 * This is always the case in an IEEE-754 compliant system, however not every
210
 * system or compilation mode is actually IEEE-754 compliant. This normally
211
 * fails if the code is compiled with fast math enabled, for example.
212
 *
213
 * @return Return @c ASTCENC_SUCCESS if validated, an error on failure.
214
 */
215
static astcenc_error validate_cpu_float()
216
7.25k
{
217
7.25k
  volatile float xprec_testval = 2.51f;
218
7.25k
  float store = xprec_testval + 12582912.0f;
219
7.25k
  float q = store - 12582912.0f;
220
221
7.25k
  if (q != 3.0f)
222
0
  {
223
0
    return ASTCENC_ERR_BAD_CPU_FLOAT;
224
0
  }
225
226
7.25k
  return ASTCENC_SUCCESS;
227
7.25k
}
228
229
/**
230
 * @brief Validate config profile.
231
 *
232
 * @param profile   The profile to check.
233
 *
234
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
235
 */
236
static astcenc_error validate_profile(
237
  astcenc_profile profile
238
3.60k
) {
239
  // Values in this enum are from an external user, so not guaranteed to be
240
  // bounded to the enum values
241
3.60k
  switch (static_cast<int>(profile))
242
3.60k
  {
243
1.18k
  case ASTCENC_PRF_LDR_SRGB:
244
1.85k
  case ASTCENC_PRF_LDR:
245
2.72k
  case ASTCENC_PRF_HDR_RGB_LDR_A:
246
3.60k
  case ASTCENC_PRF_HDR:
247
3.60k
    return ASTCENC_SUCCESS;
248
0
  default:
249
0
    return ASTCENC_ERR_BAD_PROFILE;
250
3.60k
  }
251
3.60k
}
252
253
/**
254
 * @brief Validate block size.
255
 *
256
 * @param block_x   The block x dimensions.
257
 * @param block_y   The block y dimensions.
258
 * @param block_z   The block z dimensions.
259
 *
260
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
261
 */
262
static astcenc_error validate_block_size(
263
  unsigned int block_x,
264
  unsigned int block_y,
265
  unsigned int block_z
266
7.25k
) {
267
  // Test if this is a legal block size at all
268
7.25k
  bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
269
0
                   ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
270
7.25k
  if (!is_legal)
271
0
  {
272
0
    return ASTCENC_ERR_BAD_BLOCK_SIZE;
273
0
  }
274
275
  // Test if this build has sufficient capacity for this block size
276
7.25k
  bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
277
7.25k
  if (!have_capacity)
278
0
  {
279
0
    return ASTCENC_ERR_NOT_IMPLEMENTED;
280
0
  }
281
282
7.25k
  return ASTCENC_SUCCESS;
283
7.25k
}
284
285
/**
286
 * @brief Validate flags.
287
 *
288
 * @param profile   The profile to check.
289
 * @param flags     The flags to check.
290
 *
291
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
292
 */
293
static astcenc_error validate_flags(
294
  astcenc_profile profile,
295
  unsigned int flags
296
7.25k
) {
297
  // Flags field must not contain any unknown flag bits
298
7.25k
  unsigned int exMask = ~ASTCENC_ALL_FLAGS;
299
7.25k
  if (popcount(flags & exMask) != 0)
300
0
  {
301
0
    return ASTCENC_ERR_BAD_FLAGS;
302
0
  }
303
304
  // Flags field must only contain at most a single map type
305
7.25k
  exMask = ASTCENC_FLG_MAP_NORMAL
306
7.25k
         | ASTCENC_FLG_MAP_RGBM;
307
7.25k
  if (popcount(flags & exMask) > 1)
308
29
  {
309
29
    return ASTCENC_ERR_BAD_FLAGS;
310
29
  }
311
312
  // Decode_unorm8 must only be used with an LDR profile
313
7.22k
  bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8;
314
7.22k
  bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A);
315
7.22k
  if (is_unorm8 && is_hdr)
316
18
  {
317
18
    return ASTCENC_ERR_BAD_DECODE_MODE;
318
18
  }
319
320
7.20k
  return ASTCENC_SUCCESS;
321
7.22k
}
322
323
#if !defined(ASTCENC_DECOMPRESS_ONLY)
324
325
/**
326
 * @brief Validate single channel compression swizzle.
327
 *
328
 * @param swizzle   The swizzle to check.
329
 *
330
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
331
 */
332
static astcenc_error validate_compression_swz(
333
  astcenc_swz swizzle
334
8.94k
) {
335
  // Not all enum values are handled; SWZ_Z is invalid for compression
336
8.94k
  switch (static_cast<int>(swizzle))
337
8.94k
  {
338
2.23k
  case ASTCENC_SWZ_R:
339
4.47k
  case ASTCENC_SWZ_G:
340
6.70k
  case ASTCENC_SWZ_B:
341
8.94k
  case ASTCENC_SWZ_A:
342
8.94k
  case ASTCENC_SWZ_0:
343
8.94k
  case ASTCENC_SWZ_1:
344
8.94k
    return ASTCENC_SUCCESS;
345
0
  default:
346
0
    return ASTCENC_ERR_BAD_SWIZZLE;
347
8.94k
  }
348
8.94k
}
349
350
/**
351
 * @brief Validate overall compression swizzle.
352
 *
353
 * @param swizzle   The swizzle to check.
354
 *
355
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
356
 */
357
static astcenc_error validate_compression_swizzle(
358
  const astcenc_swizzle& swizzle
359
2.23k
) {
360
2.23k
  if (validate_compression_swz(swizzle.r) ||
361
2.23k
      validate_compression_swz(swizzle.g) ||
362
2.23k
      validate_compression_swz(swizzle.b) ||
363
2.23k
      validate_compression_swz(swizzle.a))
364
0
  {
365
0
    return ASTCENC_ERR_BAD_SWIZZLE;
366
0
  }
367
368
2.23k
  return ASTCENC_SUCCESS;
369
2.23k
}
370
#endif
371
372
/**
373
 * @brief Validate single channel decompression swizzle.
374
 *
375
 * @param swizzle   The swizzle to check.
376
 *
377
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
378
 */
379
static astcenc_error validate_decompression_swz(
380
  astcenc_swz swizzle
381
4.45k
) {
382
  // Values in this enum are from an external user, so not guaranteed to be
383
  // bounded to the enum values
384
4.45k
  switch (static_cast<int>(swizzle))
385
4.45k
  {
386
1.11k
  case ASTCENC_SWZ_R:
387
2.22k
  case ASTCENC_SWZ_G:
388
3.34k
  case ASTCENC_SWZ_B:
389
4.45k
  case ASTCENC_SWZ_A:
390
4.45k
  case ASTCENC_SWZ_0:
391
4.45k
  case ASTCENC_SWZ_1:
392
4.45k
  case ASTCENC_SWZ_Z:
393
4.45k
    return ASTCENC_SUCCESS;
394
0
  default:
395
0
    return ASTCENC_ERR_BAD_SWIZZLE;
396
4.45k
  }
397
4.45k
}
398
399
/**
400
 * @brief Validate overall decompression swizzle.
401
 *
402
 * @param swizzle   The swizzle to check.
403
 *
404
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
405
 */
406
static astcenc_error validate_decompression_swizzle(
407
  const astcenc_swizzle& swizzle
408
1.11k
) {
409
1.11k
  if (validate_decompression_swz(swizzle.r) ||
410
1.11k
      validate_decompression_swz(swizzle.g) ||
411
1.11k
      validate_decompression_swz(swizzle.b) ||
412
1.11k
      validate_decompression_swz(swizzle.a))
413
0
  {
414
0
    return ASTCENC_ERR_BAD_SWIZZLE;
415
0
  }
416
417
1.11k
  return ASTCENC_SUCCESS;
418
1.11k
}
419
420
/**
421
 * Validate that an incoming configuration is in-spec.
422
 *
423
 * This function can respond in two ways:
424
 *
425
 *   * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
426
 *     for out-of-range inputs in this case.
427
 *   * Numerical inputs and logic inputs are are logically invalid and which make no sense
428
 *     algorithmically will return an error.
429
 *
430
 * @param[in,out] config   The input compressor configuration.
431
 *
432
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
433
 */
434
static astcenc_error validate_config(
435
  astcenc_config &config
436
3.60k
) {
437
3.60k
  astcenc_error status;
438
439
3.60k
  status = validate_profile(config.profile);
440
3.60k
  if (status != ASTCENC_SUCCESS)
441
0
  {
442
0
    return status;
443
0
  }
444
445
3.60k
  status = validate_flags(config.profile, config.flags);
446
3.60k
  if (status != ASTCENC_SUCCESS)
447
0
  {
448
0
    return status;
449
0
  }
450
451
3.60k
  status = validate_block_size(config.block_x, config.block_y, config.block_z);
452
3.60k
  if (status != ASTCENC_SUCCESS)
453
0
  {
454
0
    return status;
455
0
  }
456
457
#if defined(ASTCENC_DECOMPRESS_ONLY)
458
  // Decompress-only builds only support decompress-only contexts
459
  if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
460
  {
461
    return ASTCENC_ERR_BAD_PARAM;
462
  }
463
#endif
464
465
3.60k
  config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
466
467
3.60k
  config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
468
3.60k
  config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
469
3.60k
  config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
470
3.60k
  config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
471
3.60k
  config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
472
3.60k
  config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
473
3.60k
  config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
474
3.60k
  config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
475
3.60k
  config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
476
3.60k
  config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
477
3.60k
  config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
478
3.60k
  config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
479
3.60k
  config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
480
3.60k
  config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
481
3.60k
  config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
482
483
  // Specifying a zero weight color component is not allowed; force to small value
484
3.60k
  float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
485
3.60k
                               astc::max(config.cw_b_weight, config.cw_a_weight));
486
3.60k
  if (max_weight > 0.0f)
487
3.60k
  {
488
3.60k
    max_weight /= 1000.0f;
489
3.60k
    config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
490
3.60k
    config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
491
3.60k
    config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
492
3.60k
    config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
493
3.60k
  }
494
  // If all color components error weights are zero then return an error
495
0
  else
496
0
  {
497
0
    return ASTCENC_ERR_BAD_PARAM;
498
0
  }
499
500
3.60k
  return ASTCENC_SUCCESS;
501
3.60k
}
502
503
/* See header for documentation. */
504
astcenc_error astcenc_config_init(
505
  astcenc_profile profile,
506
  unsigned int block_x,
507
  unsigned int block_y,
508
  unsigned int block_z,
509
  float quality,
510
  unsigned int flags,
511
  astcenc_config* configp
512
3.65k
) {
513
3.65k
  astcenc_error status;
514
515
3.65k
  status = validate_cpu_float();
516
3.65k
  if (status != ASTCENC_SUCCESS)
517
0
  {
518
0
    return status;
519
0
  }
520
521
  // Zero init all config fields; although most of will be over written
522
3.65k
  astcenc_config& config = *configp;
523
3.65k
  std::memset(&config, 0, sizeof(config));
524
525
  // Process the block size
526
  // For 2D blocks Z==0 is accepted, but convert to 1
527
3.65k
  block_z = astc::max(block_z, 1u);
528
3.65k
  status = validate_block_size(block_x, block_y, block_z);
529
3.65k
  if (status != ASTCENC_SUCCESS)
530
0
  {
531
0
    return status;
532
0
  }
533
534
3.65k
  config.block_x = block_x;
535
3.65k
  config.block_y = block_y;
536
3.65k
  config.block_z = block_z;
537
538
3.65k
  float texels = static_cast<float>(block_x * block_y * block_z);
539
3.65k
  float ltexels = logf(texels) / logf(10.0f);
540
541
  // Process the performance quality level or preset; note that this must be done before we
542
  // process any additional settings, such as color profile and flags, which may replace some of
543
  // these settings with more use case tuned values
544
3.65k
  if (quality < ASTCENC_PRE_FASTEST ||
545
3.65k
      quality > ASTCENC_PRE_EXHAUSTIVE)
546
0
  {
547
0
    return ASTCENC_ERR_BAD_QUALITY;
548
0
  }
549
550
3.65k
  static const std::array<astcenc_preset_config, 6>* preset_configs;
551
3.65k
  size_t texels_int = block_x * block_y * block_z;
552
3.65k
  if (texels_int < 25)
553
1.97k
  {
554
1.97k
    preset_configs = &preset_configs_high;
555
1.97k
  }
556
1.67k
  else if (texels_int < 64)
557
1.01k
  {
558
1.01k
    preset_configs = &preset_configs_mid;
559
1.01k
  }
560
660
  else
561
660
  {
562
660
    preset_configs = &preset_configs_low;
563
660
  }
564
565
  // Determine which preset to use, or which pair to interpolate
566
3.65k
  size_t start;
567
3.65k
  size_t end;
568
10.3k
  for (end = 0; end < preset_configs->size(); end++)
569
10.3k
  {
570
10.3k
    if ((*preset_configs)[end].quality >= quality)
571
3.65k
    {
572
3.65k
      break;
573
3.65k
    }
574
10.3k
  }
575
576
3.65k
  start = end == 0 ? 0 : end - 1;
577
578
  // Start and end node are the same - so just transfer the values.
579
3.65k
  if (start == end)
580
175
  {
581
175
    config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
582
175
    config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
583
175
    config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
584
175
    config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
585
175
    config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
586
175
    config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
587
175
    config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit;
588
175
    config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit;
589
175
    config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit;
590
175
    config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit;
591
175
    config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
592
175
                                     (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
593
594
175
    config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
595
596
175
    config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
597
175
    config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
598
175
    config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
599
175
    config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
600
175
  }
601
  // Start and end node are not the same - so interpolate between them
602
3.47k
  else
603
3.47k
  {
604
3.47k
    auto& node_a = (*preset_configs)[start];
605
3.47k
    auto& node_b = (*preset_configs)[end];
606
607
3.47k
    float wt_range = node_b.quality - node_a.quality;
608
3.47k
    assert(wt_range > 0);
609
610
    // Compute interpolation factors
611
3.47k
    float wt_node_a = (node_b.quality - quality) / wt_range;
612
3.47k
    float wt_node_b = (quality - node_a.quality) / wt_range;
613
614
24.3k
    #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
615
34.7k
    #define LERPI(param) astc::flt2int_rtn(\
616
34.7k
                             (static_cast<float>(node_a.param) * wt_node_a) + \
617
34.7k
                             (static_cast<float>(node_b.param) * wt_node_b))
618
13.9k
    #define LERPUI(param) static_cast<unsigned int>(LERPI(param))
619
620
3.47k
    config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
621
3.47k
    config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
622
3.47k
    config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
623
3.47k
    config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
624
3.47k
    config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
625
3.47k
    config.tune_refinement_limit = LERPI(tune_refinement_limit);
626
3.47k
    config.tune_candidate_limit = LERPUI(tune_candidate_limit);
627
3.47k
    config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit);
628
3.47k
    config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit);
629
3.47k
    config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit);
630
3.47k
    config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
631
3.47k
                                     LERP(tune_db_limit_b_base) - 19 * ltexels);
632
633
3.47k
    config.tune_mse_overshoot = LERP(tune_mse_overshoot);
634
635
3.47k
    config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
636
3.47k
    config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
637
3.47k
    config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
638
3.47k
    config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
639
3.47k
    #undef LERP
640
3.47k
    #undef LERPI
641
3.47k
    #undef LERPUI
642
3.47k
  }
643
644
  // Set heuristics to the defaults for each color profile
645
3.65k
  config.cw_r_weight = 1.0f;
646
3.65k
  config.cw_g_weight = 1.0f;
647
3.65k
  config.cw_b_weight = 1.0f;
648
3.65k
  config.cw_a_weight = 1.0f;
649
650
3.65k
  config.a_scale_radius = 0;
651
652
3.65k
  config.rgbm_m_scale = 0.0f;
653
654
3.65k
  config.profile = profile;
655
656
  // Values in this enum are from an external user, so not guaranteed to be
657
  // bounded to the enum values
658
3.65k
  switch (static_cast<int>(profile))
659
3.65k
  {
660
676
  case ASTCENC_PRF_LDR:
661
1.87k
  case ASTCENC_PRF_LDR_SRGB:
662
1.87k
    break;
663
883
  case ASTCENC_PRF_HDR_RGB_LDR_A:
664
1.78k
  case ASTCENC_PRF_HDR:
665
1.78k
    config.tune_db_limit = 999.0f;
666
1.78k
    config.tune_search_mode0_enable = 0.0f;
667
1.78k
    break;
668
0
  default:
669
0
    return ASTCENC_ERR_BAD_PROFILE;
670
3.65k
  }
671
672
  // Flags field must not contain any unknown flag bits
673
3.65k
  status = validate_flags(profile, flags);
674
3.65k
  if (status != ASTCENC_SUCCESS)
675
47
  {
676
47
    return status;
677
47
  }
678
679
3.60k
  if (flags & ASTCENC_FLG_MAP_NORMAL)
680
1.01k
  {
681
    // Normal map encoding uses L+A blocks, so allow one more partitioning
682
    // than normal. We need need fewer bits for endpoints, so more likely
683
    // to be able to use more partitions than an RGB/RGBA block
684
1.01k
    config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
685
686
1.01k
    config.cw_g_weight = 0.0f;
687
1.01k
    config.cw_b_weight = 0.0f;
688
1.01k
    config.tune_2partition_early_out_limit_factor *= 1.5f;
689
1.01k
    config.tune_3partition_early_out_limit_factor *= 1.5f;
690
1.01k
    config.tune_2plane_early_out_limit_correlation = 0.99f;
691
692
    // Normals are prone to blocking artifacts on smooth curves
693
    // so force compressor to try harder here ...
694
1.01k
    config.tune_db_limit *= 1.03f;
695
1.01k
  }
696
2.58k
  else if (flags & ASTCENC_FLG_MAP_RGBM)
697
937
  {
698
937
    config.rgbm_m_scale = 5.0f;
699
937
    config.cw_a_weight = 2.0f * config.rgbm_m_scale;
700
937
  }
701
1.65k
  else // (This is color data)
702
1.65k
  {
703
    // This is a very basic perceptual metric for RGB color data, which weights error
704
    // significance by the perceptual luminance contribution of each color channel. For
705
    // luminance the usual weights to compute luminance from a linear RGB value are as
706
    // follows:
707
    //
708
    //     l = r * 0.3 + g * 0.59 + b * 0.11
709
    //
710
    // ... but we scale these up to keep a better balance between color and alpha. Note
711
    // that if the content is using alpha we'd recommend using the -a option to weight
712
    // the color contribution by the alpha transparency.
713
1.65k
    if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
714
233
    {
715
233
      config.cw_r_weight = 0.30f * 2.25f;
716
233
      config.cw_g_weight = 0.59f * 2.25f;
717
233
      config.cw_b_weight = 0.11f * 2.25f;
718
233
    }
719
1.65k
  }
720
3.60k
  config.flags = flags;
721
722
3.60k
  return ASTCENC_SUCCESS;
723
3.65k
}
724
725
/* See header for documentation. */
726
astcenc_error astcenc_context_alloc(
727
  const astcenc_config* configp,
728
  unsigned int thread_count,
729
  astcenc_context** context,
730
  const astcenc_context* parent_context
731
3.60k
) {
732
3.60k
  astcenc_error status;
733
734
3.60k
  status = validate_cpu_float();
735
3.60k
  if (status != ASTCENC_SUCCESS)
736
0
  {
737
0
    return status;
738
0
  }
739
740
3.60k
  if (thread_count == 0)
741
0
  {
742
0
    return ASTCENC_ERR_BAD_PARAM;
743
0
  }
744
745
#if defined(ASTCENC_DIAGNOSTICS)
746
  // Force single threaded compressor use in diagnostic mode
747
  if (thread_count != 1)
748
  {
749
    return ASTCENC_ERR_BAD_PARAM;
750
  }
751
#endif
752
753
  // Exactly one of config or parent_context must be set
754
3.60k
  bool has_config = configp != nullptr;
755
3.60k
  bool has_parent = parent_context != nullptr;
756
3.60k
  if (!(has_config ^ has_parent))
757
0
  {
758
0
    return ASTCENC_ERR_BAD_PARAM;
759
0
  }
760
761
3.60k
  if (has_parent)
762
0
  {
763
0
    configp = &parent_context->context.config;
764
0
  }
765
766
3.60k
  const astcenc_config& config = *configp;
767
3.60k
  astcenc_context* ctxo = new astcenc_context;
768
3.60k
  astcenc_contexti* ctx = &ctxo->context;
769
3.60k
  ctx->thread_count = thread_count;
770
3.60k
  ctx->config = *configp;
771
3.60k
  ctx->working_buffers = nullptr;
772
773
  // These are allocated per-compress, as they depend on image size
774
3.60k
  ctx->input_alpha_averages = nullptr;
775
776
  // Copy the config first and validate the copy (we may modify it)
777
3.60k
  status = validate_config(ctx->config);
778
3.60k
  if (status != ASTCENC_SUCCESS)
779
0
  {
780
0
    delete ctxo;
781
0
    return status;
782
0
  }
783
784
3.60k
  if (!parent_context)
785
3.60k
  {
786
3.60k
    block_size_descriptor* bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
787
3.60k
    if (!bsd)
788
0
    {
789
0
      delete ctxo;
790
0
      return ASTCENC_ERR_OUT_OF_MEM;
791
0
    }
792
793
3.60k
    bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
794
3.60k
    init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
795
3.60k
                               can_omit_modes,
796
3.60k
                               config.tune_partition_count_limit,
797
3.60k
                               static_cast<float>(config.tune_block_mode_limit) / 100.0f,
798
3.60k
                               *bsd);
799
800
3.60k
    ctx->owns_bsd = true;
801
3.60k
    ctx->bsd = bsd;
802
3.60k
  }
803
0
  else
804
0
  {
805
0
    ctx->owns_bsd = false;
806
0
    ctx->bsd = parent_context->context.bsd;
807
0
  }
808
809
3.60k
#if !defined(ASTCENC_DECOMPRESS_ONLY)
810
  // Do setup only needed by compression
811
3.60k
  if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
812
3.29k
  {
813
    // Turn a dB limit into a per-texel error for faster use later
814
3.29k
    if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
815
1.65k
    {
816
1.65k
      ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
817
1.65k
    }
818
1.63k
    else
819
1.63k
    {
820
1.63k
      ctx->config.tune_db_limit = 0.0f;
821
1.63k
    }
822
823
3.29k
    size_t worksize = sizeof(compression_working_buffers) * thread_count;
824
3.29k
    ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
825
3.29k
    static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0),
826
3.29k
                  "compression_working_buffers size must be multiple of vector alignment");
827
3.29k
    if (!ctx->working_buffers)
828
0
    {
829
0
      if (ctx->owns_bsd)
830
0
      {
831
0
        aligned_free<const block_size_descriptor>(ctx->bsd);
832
0
      }
833
0
      delete ctxo;
834
0
      *context = nullptr;
835
0
      return ASTCENC_ERR_OUT_OF_MEM;
836
0
    }
837
3.29k
  }
838
3.60k
#endif
839
840
#if defined(ASTCENC_DIAGNOSTICS)
841
  ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
842
  if (!ctx->trace_log->m_file)
843
  {
844
    return ASTCENC_ERR_DTRACE_FAILURE;
845
  }
846
847
  trace_add_data("block_x", config.block_x);
848
  trace_add_data("block_y", config.block_y);
849
  trace_add_data("block_z", config.block_z);
850
#endif
851
852
3.60k
  *context = ctxo;
853
854
3.60k
#if !defined(ASTCENC_DECOMPRESS_ONLY)
855
3.60k
  prepare_angular_tables();
856
3.60k
#endif
857
858
3.60k
  return ASTCENC_SUCCESS;
859
3.60k
}
860
861
/* See header for documentation. */
862
void astcenc_context_free(
863
  astcenc_context* ctxo
864
3.60k
) {
865
3.60k
  if (ctxo)
866
3.60k
  {
867
3.60k
    astcenc_contexti* ctx = &ctxo->context;
868
3.60k
    aligned_free<compression_working_buffers>(ctx->working_buffers);
869
3.60k
    if (ctx->owns_bsd)
870
3.60k
    {
871
3.60k
      aligned_free<const block_size_descriptor>(ctx->bsd);
872
3.60k
    }
873
#if defined(ASTCENC_DIAGNOSTICS)
874
    delete ctx->trace_log;
875
#endif
876
3.60k
    delete ctxo;
877
3.60k
  }
878
3.60k
}
879
880
#if !defined(ASTCENC_DECOMPRESS_ONLY)
881
882
/**
883
 * @brief Compress an image, after any preflight has completed.
884
 *
885
 * @param[out] ctxo           The compressor context.
886
 * @param      thread_index   The thread index.
887
 * @param      image          The input image.
888
 * @param      swizzle        The input swizzle.
889
 * @param[out] buffer         The output array for the compressed data.
890
 */
891
static void compress_image(
892
  astcenc_context& ctxo,
893
  unsigned int thread_index,
894
  const astcenc_image& image,
895
  const astcenc_swizzle& swizzle,
896
  uint8_t* buffer
897
2.23k
) {
898
2.23k
  astcenc_contexti& ctx = ctxo.context;
899
2.23k
  const block_size_descriptor& bsd = *ctx.bsd;
900
2.23k
  astcenc_profile decode_mode = ctx.config.profile;
901
902
2.23k
  image_block blk;
903
904
2.23k
  size_t block_x = bsd.dim_x;
905
2.23k
  size_t block_y = bsd.dim_y;
906
2.23k
  size_t block_z = bsd.dim_z;
907
2.23k
  blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
908
909
2.23k
  size_t dim_x = image.dim_x;
910
2.23k
  size_t dim_y = image.dim_y;
911
2.23k
  size_t dim_z = image.dim_z;
912
913
2.23k
  size_t blocks_x = astc::get_block_count_safe(dim_x, block_x);
914
2.23k
  size_t blocks_y = astc::get_block_count_safe(dim_y, block_y);
915
2.23k
  size_t blocks_z = astc::get_block_count_safe(dim_z, block_z);
916
917
2.23k
  size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z);
918
  // Should never fail here - tested in caller before calling here
919
2.23k
  assert(block_count > 0);
920
921
922
2.23k
  size_t row_blocks = blocks_x;
923
2.23k
  size_t plane_blocks = blocks_x * blocks_y;
924
925
2.23k
  blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8;
926
927
  // Populate the block channel weights
928
2.23k
  blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
929
2.23k
                               ctx.config.cw_g_weight,
930
2.23k
                               ctx.config.cw_b_weight,
931
2.23k
                               ctx.config.cw_a_weight);
932
933
  // Use preallocated scratch buffer
934
2.23k
  auto& temp_buffers = ctx.working_buffers[thread_index];
935
936
  // Only the first thread actually runs the initializer
937
2.23k
  ctxo.manage_compress.init(block_count, ctx.config.progress_callback);
938
939
  // Determine if we can use an optimized load function
940
2.23k
  bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
941
2.23k
                   (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
942
943
2.23k
  bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
944
1.69k
                   (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
945
946
2.23k
  bool use_fast_load = !needs_swz && !needs_hdr &&
947
1.05k
                       block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
948
949
2.23k
  auto load_func = load_image_block;
950
2.23k
  if (use_fast_load)
951
1.05k
  {
952
1.05k
    load_func = load_image_block_fast_ldr;
953
1.05k
  }
954
955
  // All threads run this processing loop until there is no work remaining
956
4.47k
  while (true)
957
4.47k
  {
958
4.47k
    size_t count;
959
4.47k
    size_t base = ctxo.manage_compress.get_task_assignment(16, count);
960
4.47k
    if (!count)
961
2.23k
    {
962
2.23k
      break;
963
2.23k
    }
964
965
4.47k
    for (size_t i = base; i < base + count; i++)
966
2.23k
    {
967
      // Decode i into x, y, z block indices
968
2.23k
      size_t z = i / plane_blocks;
969
2.23k
      size_t rem = i - (z * plane_blocks);
970
2.23k
      size_t y = rem / row_blocks;
971
2.23k
      size_t x = rem - (y * row_blocks);
972
973
      // Test if we can apply some basic alpha-scale RDO
974
2.23k
      bool use_full_block = true;
975
2.23k
      if (ctx.config.a_scale_radius != 0 && block_z == 1)
976
0
      {
977
0
        size_t start_x = x * block_x;
978
0
        size_t end_x = astc::min(dim_x, start_x + block_x);
979
980
0
        size_t start_y = y * block_y;
981
0
        size_t end_y = astc::min(dim_y, start_y + block_y);
982
983
        // SATs accumulate error, so don't test exactly zero. Test for
984
        // less than 1 alpha in the expanded block footprint that
985
        // includes the alpha radius.
986
0
        size_t x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
987
0
        size_t y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
988
989
0
        float footprint = static_cast<float>(x_footprint * y_footprint);
990
0
        float threshold = 0.9f / (255.0f * footprint);
991
992
        // Do we have any alpha values?
993
0
        use_full_block = false;
994
0
        for (size_t ay = start_y; ay < end_y; ay++)
995
0
        {
996
0
          for (size_t ax = start_x; ax < end_x; ax++)
997
0
          {
998
0
            float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
999
0
            if (a_avg > threshold)
1000
0
            {
1001
0
              use_full_block = true;
1002
0
              ax = end_x;
1003
0
              ay = end_y;
1004
0
            }
1005
0
          }
1006
0
        }
1007
0
      }
1008
1009
      // Fetch the full block for compression
1010
2.23k
      if (use_full_block)
1011
2.23k
      {
1012
2.23k
        load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
1013
1014
        // Scale RGB error contribution by the maximum alpha in the block
1015
        // This encourages preserving alpha accuracy in regions with high
1016
        // transparency, and can buy up to 0.5 dB PSNR.
1017
2.23k
        if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
1018
897
        {
1019
897
          float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
1020
897
          blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
1021
897
                                       ctx.config.cw_g_weight * alpha_scale,
1022
897
                                       ctx.config.cw_b_weight * alpha_scale,
1023
897
                                       ctx.config.cw_a_weight);
1024
897
        }
1025
2.23k
      }
1026
      // Apply alpha scale RDO - substitute constant color block
1027
0
      else
1028
0
      {
1029
0
        blk.origin_texel = vfloat4::zero();
1030
0
        blk.data_min = vfloat4::zero();
1031
0
        blk.data_mean = vfloat4::zero();
1032
0
        blk.data_max = vfloat4::zero();
1033
0
        blk.grayscale = true;
1034
0
      }
1035
1036
2.23k
      size_t offset = ((z * blocks_y + y) * blocks_x + x) * 16;
1037
2.23k
      uint8_t *bp = buffer + offset;
1038
2.23k
      compress_block(ctx, blk, bp, temp_buffers);
1039
2.23k
    }
1040
1041
2.23k
    ctxo.manage_compress.complete_task_assignment(count);
1042
2.23k
  }
1043
2.23k
}
1044
1045
/**
1046
 * @brief Compute regional averages in an image.
1047
 *
1048
 * This function can be called by multiple threads, but only after a single
1049
 * thread calls the setup function @c init_compute_averages().
1050
 *
1051
 * Results are written back into @c img->input_alpha_averages.
1052
 *
1053
 * @param[out] ctx   The context.
1054
 * @param      ag    The average and variance arguments created during setup.
1055
 */
1056
static void compute_averages(
1057
  astcenc_context& ctx,
1058
  const avg_args &ag
1059
0
) {
1060
0
  pixel_region_args arg = ag.arg;
1061
0
  arg.work_memory = new vfloat4[ag.work_memory_size];
1062
1063
0
  size_t size_x = ag.img_size_x;
1064
0
  size_t size_y = ag.img_size_y;
1065
0
  size_t size_z = ag.img_size_z;
1066
1067
0
  size_t step_xy = ag.blk_size_xy;
1068
0
  size_t step_z = ag.blk_size_z;
1069
1070
0
  size_t tasks_y = (size_y + step_xy - 1) / step_xy;
1071
1072
  // All threads run this processing loop until there is no work remaining
1073
0
  while (true)
1074
0
  {
1075
0
    size_t count;
1076
0
    size_t base = ctx.manage_avg.get_task_assignment(16, count);
1077
0
    if (!count)
1078
0
    {
1079
0
      break;
1080
0
    }
1081
1082
0
    for (size_t i = base; i < base + count; i++)
1083
0
    {
1084
0
      size_t z_task = i / tasks_y;
1085
0
      size_t y_task = i - (z_task * tasks_y);
1086
1087
0
      size_t z = z_task * step_z;
1088
0
      size_t y = y_task * step_xy;
1089
1090
0
      arg.size_z = astc::min(step_z, size_z - z);
1091
0
      arg.offset_z = z;
1092
1093
0
      arg.size_y = astc::min(step_xy, size_y - y);
1094
0
      arg.offset_y = y;
1095
1096
0
      for (size_t x = 0; x < size_x; x += step_xy)
1097
0
      {
1098
0
        arg.size_x = astc::min(step_xy, size_x - x);
1099
0
        arg.offset_x = x;
1100
0
        compute_pixel_region_variance(ctx.context, arg);
1101
0
      }
1102
0
    }
1103
1104
0
    ctx.manage_avg.complete_task_assignment(count);
1105
0
  }
1106
1107
0
  delete[] arg.work_memory;
1108
0
}
1109
1110
#endif
1111
1112
/* See header for documentation. */
1113
astcenc_error astcenc_compress_image(
1114
  astcenc_context* ctxo,
1115
  astcenc_image* imagep,
1116
  const astcenc_swizzle* swizzle,
1117
  uint8_t* data_out,
1118
  size_t data_len,
1119
  unsigned int thread_index
1120
2.23k
) {
1121
#if defined(ASTCENC_DECOMPRESS_ONLY)
1122
  (void)ctxo;
1123
  (void)imagep;
1124
  (void)swizzle;
1125
  (void)data_out;
1126
  (void)data_len;
1127
  (void)thread_index;
1128
  return ASTCENC_ERR_BAD_CONTEXT;
1129
#else
1130
2.23k
  astcenc_contexti* ctx = &ctxo->context;
1131
2.23k
  astcenc_error status;
1132
2.23k
  astcenc_image& image = *imagep;
1133
1134
2.23k
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1135
0
  {
1136
0
    return ASTCENC_ERR_BAD_CONTEXT;
1137
0
  }
1138
1139
2.23k
  status = validate_compression_swizzle(*swizzle);
1140
2.23k
  if (status != ASTCENC_SUCCESS)
1141
0
  {
1142
0
    return status;
1143
0
  }
1144
1145
2.23k
  if (thread_index >= ctx->thread_count)
1146
0
  {
1147
0
    return ASTCENC_ERR_BAD_PARAM;
1148
0
  }
1149
1150
2.23k
  size_t dim_x = image.dim_x;
1151
2.23k
  size_t dim_y = image.dim_y;
1152
2.23k
  size_t dim_z = image.dim_z;
1153
1154
2.23k
  size_t texel_count = get_texels_count(dim_x, dim_y, dim_z);
1155
  // Cumulative texel sizes would overflow a size_t
1156
2.23k
  if (texel_count == 0)
1157
0
  {
1158
0
    return ASTCENC_ERR_BAD_PARAM;
1159
0
  }
1160
1161
2.23k
  size_t block_x = ctx->config.block_x;
1162
2.23k
  size_t block_y = ctx->config.block_y;
1163
2.23k
  size_t block_z = ctx->config.block_z;
1164
1165
2.23k
  size_t blocks_x = astc::get_block_count_safe(dim_x, block_x);
1166
2.23k
  size_t blocks_y = astc::get_block_count_safe(dim_y, block_y);
1167
2.23k
  size_t blocks_z = astc::get_block_count_safe(dim_z, block_z);
1168
1169
2.23k
  size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z);
1170
  // Cumulative block sizes would overflow a size_t
1171
2.23k
  if (block_count == 0)
1172
0
  {
1173
0
    return ASTCENC_ERR_BAD_PARAM;
1174
0
  }
1175
1176
  // Check we have enough output space, size_needed calc cannot overflow as
1177
  // get_blocks_count() already validated that a byte count would fit
1178
2.23k
  size_t size_needed = block_count * 16;
1179
2.23k
  if (data_len < size_needed)
1180
0
  {
1181
0
    return ASTCENC_ERR_OUT_OF_MEM;
1182
0
  }
1183
1184
  // If context thread count is one then implicitly reset
1185
2.23k
  if (ctx->thread_count == 1)
1186
2.23k
  {
1187
2.23k
    astcenc_compress_reset(ctxo);
1188
2.23k
  }
1189
1190
2.23k
  if (ctx->config.a_scale_radius != 0)
1191
0
  {
1192
    // First thread to enter will do setup, other threads will subsequently
1193
    // enter the critical section but simply skip over the initialization
1194
0
    auto init_avg = [ctx, &image, swizzle, texel_count]() {
1195
      // Perform memory allocations for the destination buffers
1196
0
      ctx->input_alpha_averages = new float[texel_count];
1197
1198
0
      return init_compute_averages(
1199
0
        image, ctx->config.a_scale_radius, *swizzle,
1200
0
        ctx->avg_preprocess_args);
1201
0
    };
1202
1203
    // Only the first thread actually runs the initializer
1204
0
    ctxo->manage_avg.init(init_avg);
1205
1206
    // All threads will enter this function and dynamically grab work
1207
0
    compute_averages(*ctxo, ctx->avg_preprocess_args);
1208
0
  }
1209
1210
  // Wait for compute_averages to complete before compressing
1211
2.23k
  ctxo->manage_avg.wait();
1212
1213
2.23k
  compress_image(*ctxo, thread_index, image, *swizzle, data_out);
1214
1215
  // Wait for compress to complete before freeing memory
1216
2.23k
  ctxo->manage_compress.wait();
1217
1218
2.23k
  auto term_compress = [ctx]() {
1219
2.23k
    delete[] ctx->input_alpha_averages;
1220
2.23k
    ctx->input_alpha_averages = nullptr;
1221
2.23k
  };
1222
1223
  // Only the first thread to arrive actually runs the term
1224
2.23k
  ctxo->manage_compress.term(term_compress);
1225
1226
2.23k
  return ASTCENC_SUCCESS;
1227
2.23k
#endif
1228
2.23k
}
1229
1230
/* See header for documentation. */
1231
astcenc_error astcenc_compress_reset(
1232
  astcenc_context* ctxo
1233
2.23k
) {
1234
#if defined(ASTCENC_DECOMPRESS_ONLY)
1235
  (void)ctxo;
1236
  return ASTCENC_ERR_BAD_CONTEXT;
1237
#else
1238
2.23k
  astcenc_contexti* ctx = &ctxo->context;
1239
2.23k
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1240
0
  {
1241
0
    return ASTCENC_ERR_BAD_CONTEXT;
1242
0
  }
1243
1244
2.23k
  ctxo->manage_avg.reset();
1245
2.23k
  ctxo->manage_compress.reset();
1246
2.23k
  return ASTCENC_SUCCESS;
1247
2.23k
#endif
1248
2.23k
}
1249
1250
/* See header for documentation. */
1251
astcenc_error astcenc_compress_cancel(
1252
  astcenc_context* ctxo
1253
0
) {
1254
#if defined(ASTCENC_DECOMPRESS_ONLY)
1255
  (void)ctxo;
1256
  return ASTCENC_ERR_BAD_CONTEXT;
1257
#else
1258
0
  astcenc_contexti* ctx = &ctxo->context;
1259
0
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1260
0
  {
1261
0
    return ASTCENC_ERR_BAD_CONTEXT;
1262
0
  }
1263
1264
  // Cancel compression before cancelling avg. This avoids the race condition
1265
  // where cancelling them in the other order could see a compression worker
1266
  // starting to process even though some of the avg data is undefined.
1267
0
  ctxo->manage_compress.cancel();
1268
0
  ctxo->manage_avg.cancel();
1269
0
  return ASTCENC_SUCCESS;
1270
0
#endif
1271
0
}
1272
1273
/* See header for documentation. */
1274
astcenc_error astcenc_decompress_image(
1275
  astcenc_context* ctxo,
1276
  const uint8_t* data,
1277
  size_t data_len,
1278
  astcenc_image* image_outp,
1279
  const astcenc_swizzle* swizzle,
1280
  unsigned int thread_index
1281
1.11k
) {
1282
1.11k
  astcenc_error status;
1283
1.11k
  astcenc_image& image_out = *image_outp;
1284
1.11k
  astcenc_contexti* ctx = &ctxo->context;
1285
1286
  // Today this doesn't matter (working set on stack) but might in future ...
1287
1.11k
  if (thread_index >= ctx->thread_count)
1288
0
  {
1289
0
    return ASTCENC_ERR_BAD_PARAM;
1290
0
  }
1291
1292
1.11k
  status = validate_decompression_swizzle(*swizzle);
1293
1.11k
  if (status != ASTCENC_SUCCESS)
1294
0
  {
1295
0
    return status;
1296
0
  }
1297
1298
1.11k
  size_t dim_x = image_out.dim_x;
1299
1.11k
  size_t dim_y = image_out.dim_y;
1300
1.11k
  size_t dim_z = image_out.dim_z;
1301
1302
1.11k
  size_t texel_count = get_texels_count(dim_x, dim_y, dim_z);
1303
  // Cumulative texel sizes would overflow a size_t
1304
1.11k
  if (texel_count == 0)
1305
0
  {
1306
0
    return ASTCENC_ERR_BAD_PARAM;
1307
0
  }
1308
1309
1.11k
  size_t block_x = ctx->config.block_x;
1310
1.11k
  size_t block_y = ctx->config.block_y;
1311
1.11k
  size_t block_z = ctx->config.block_z;
1312
1313
1.11k
  size_t blocks_x = astc::get_block_count_safe(dim_x, block_x);
1314
1.11k
  size_t blocks_y = astc::get_block_count_safe(dim_y, block_y);
1315
1.11k
  size_t blocks_z = astc::get_block_count_safe(dim_z, block_z);
1316
1317
1.11k
  size_t block_count = get_blocks_count(blocks_x, blocks_y, blocks_z);
1318
  // Cumulative block sizes would overflow a size_t
1319
1.11k
  if (block_count == 0)
1320
0
  {
1321
0
    return ASTCENC_ERR_BAD_PARAM;
1322
0
  }
1323
1324
  // Check we have enough output space, size_needed calc cannot overflow as
1325
  // get_blocks_count() already validated that a byte count would fit
1326
1.11k
  size_t size_needed = block_count * 16;
1327
1.11k
  if (data_len < size_needed)
1328
0
  {
1329
0
    return ASTCENC_ERR_OUT_OF_MEM;
1330
0
  }
1331
1332
1.11k
  size_t row_blocks = blocks_x;
1333
1.11k
  size_t plane_blocks = blocks_x * blocks_y;
1334
1335
1.11k
  image_block blk {};
1336
1.11k
  blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
1337
1338
  // Decode mode inferred from the output data type
1339
1.11k
  blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8;
1340
1341
  // If context thread count is one then implicitly reset
1342
1.11k
  if (ctx->thread_count == 1)
1343
1.11k
  {
1344
1.11k
    astcenc_decompress_reset(ctxo);
1345
1.11k
  }
1346
1347
  // Only the first thread actually runs the initializer
1348
1.11k
  ctxo->manage_decompress.init(block_count, nullptr);
1349
1350
  // All threads run this processing loop until there is no work remaining
1351
2.22k
  while (true)
1352
2.22k
  {
1353
2.22k
    size_t count;
1354
2.22k
    size_t base = ctxo->manage_decompress.get_task_assignment(128, count);
1355
2.22k
    if (!count)
1356
1.11k
    {
1357
1.11k
      break;
1358
1.11k
    }
1359
1360
5.06k
    for (size_t i = base; i < base + count; i++)
1361
3.95k
    {
1362
      // Decode i into x, y, z block indices
1363
3.95k
      size_t z = i / plane_blocks;
1364
3.95k
      size_t rem = i - (z * plane_blocks);
1365
3.95k
      size_t y = rem / row_blocks;
1366
3.95k
      size_t x = rem - (y * row_blocks);
1367
1368
3.95k
      size_t offset = (((z * blocks_y + y) * blocks_x) + x) * 16;
1369
3.95k
      const uint8_t* bp = data + offset;
1370
1371
3.95k
      symbolic_compressed_block scb;
1372
1373
3.95k
      physical_to_symbolic(*ctx->bsd, bp, scb);
1374
1375
3.95k
      decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
1376
3.95k
                                x * block_x,
1377
3.95k
                                y * block_y,
1378
3.95k
                                z * block_z,
1379
3.95k
                                scb, blk);
1380
1381
3.95k
      store_image_block(image_out, blk, *ctx->bsd,
1382
3.95k
                        x * block_x, y * block_y, z * block_z,
1383
3.95k
                        *swizzle);
1384
3.95k
    }
1385
1386
1.11k
    ctxo->manage_decompress.complete_task_assignment(count);
1387
1.11k
  }
1388
1389
1.11k
  return ASTCENC_SUCCESS;
1390
1.11k
}
1391
1392
/* See header for documentation. */
1393
astcenc_error astcenc_decompress_reset(
1394
  astcenc_context* ctxo
1395
1.11k
) {
1396
1.11k
  ctxo->manage_decompress.reset();
1397
1.11k
  return ASTCENC_SUCCESS;
1398
1.11k
}
1399
1400
/* See header for documentation. */
1401
astcenc_error astcenc_get_block_info(
1402
  astcenc_context* ctxo,
1403
  const uint8_t data[16],
1404
  astcenc_block_info* info
1405
0
) {
1406
#if defined(ASTCENC_DECOMPRESS_ONLY)
1407
  (void)ctxo;
1408
  (void)data;
1409
  (void)info;
1410
  return ASTCENC_ERR_BAD_CONTEXT;
1411
#else
1412
0
  astcenc_contexti* ctx = &ctxo->context;
1413
1414
  // Decode the compressed data into a symbolic form
1415
0
  symbolic_compressed_block scb;
1416
0
  physical_to_symbolic(*ctx->bsd, data, scb);
1417
1418
  // Fetch the appropriate partition and decimation tables
1419
0
  const block_size_descriptor& bsd = *ctx->bsd;
1420
1421
  // Start from a clean slate
1422
0
  memset(info, 0, sizeof(*info));
1423
1424
  // Basic info we can always populate
1425
0
  info->profile = ctx->config.profile;
1426
1427
0
  info->block_x = ctx->config.block_x;
1428
0
  info->block_y = ctx->config.block_y;
1429
0
  info->block_z = ctx->config.block_z;
1430
0
  info->texel_count = bsd.texel_count;
1431
1432
  // Check for error blocks first
1433
0
  info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
1434
0
  if (info->is_error_block)
1435
0
  {
1436
0
    return ASTCENC_SUCCESS;
1437
0
  }
1438
1439
  // Check for constant color blocks second
1440
0
  info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
1441
0
                            scb.block_type == SYM_BTYPE_CONST_U16;
1442
0
  if (info->is_constant_block)
1443
0
  {
1444
0
    return ASTCENC_SUCCESS;
1445
0
  }
1446
1447
  // Otherwise handle a full block ; known to be valid after conditions above have been checked
1448
0
  unsigned int partition_count = scb.partition_count;
1449
0
  const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
1450
1451
0
  const block_mode& bm = bsd.get_block_mode(scb.block_mode);
1452
0
  const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
1453
1454
0
  info->weight_x = di.weight_x;
1455
0
  info->weight_y = di.weight_y;
1456
0
  info->weight_z = di.weight_z;
1457
1458
0
  info->is_dual_plane_block = bm.is_dual_plane != 0;
1459
1460
0
  info->partition_count = scb.partition_count;
1461
0
  info->partition_index = scb.partition_index;
1462
0
  info->dual_plane_component = scb.plane2_component;
1463
1464
0
  info->color_level_count = get_quant_level(scb.get_color_quant_mode());
1465
0
  info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
1466
1467
  // Unpack color endpoints for each active partition
1468
0
  for (size_t i = 0; i < scb.partition_count; i++)
1469
0
  {
1470
0
    bool rgb_hdr;
1471
0
    bool a_hdr;
1472
0
    vint4 endpnt[2];
1473
1474
0
    unpack_color_endpoints(ctx->config.profile,
1475
0
                           scb.color_formats[i],
1476
0
                           scb.color_values[i],
1477
0
                           rgb_hdr, a_hdr,
1478
0
                           endpnt[0], endpnt[1]);
1479
1480
    // Store the color endpoint mode info
1481
0
    info->color_endpoint_modes[i] = scb.color_formats[i];
1482
0
    info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
1483
1484
    // Store the unpacked and decoded color endpoint
1485
0
    vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
1486
0
    for (size_t j = 0; j < 2; j++)
1487
0
    {
1488
0
      vint4 color_lns = lns_to_sf16(endpnt[j]);
1489
0
      vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
1490
0
      vint4 datai = select(color_unorm, color_lns, hdr_mask);
1491
0
      store(float16_to_float(datai), info->color_endpoints[i][j]);
1492
0
    }
1493
0
  }
1494
1495
  // Unpack weights for each texel
1496
0
  int weight_plane1[BLOCK_MAX_TEXELS];
1497
0
  int weight_plane2[BLOCK_MAX_TEXELS];
1498
1499
0
  unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
1500
0
  for (size_t i = 0; i < bsd.texel_count; i++)
1501
0
  {
1502
0
    info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1503
0
    if (info->is_dual_plane_block)
1504
0
    {
1505
0
      info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1506
0
    }
1507
0
  }
1508
1509
  // Unpack partition assignments for each texel
1510
0
  for (size_t i = 0; i < bsd.texel_count; i++)
1511
0
  {
1512
0
    info->partition_assignment[i] = pi.partition_of_texel[i];
1513
0
  }
1514
1515
0
  return ASTCENC_SUCCESS;
1516
0
#endif
1517
0
}
1518
1519
/* See header for documentation. */
1520
const char* astcenc_get_error_string(
1521
  astcenc_error status
1522
0
) {
1523
  // Values in this enum are from an external user, so not guaranteed to be
1524
  // bounded to the enum values
1525
0
  switch (static_cast<int>(status))
1526
0
  {
1527
0
  case ASTCENC_SUCCESS:
1528
0
    return "ASTCENC_SUCCESS";
1529
0
  case ASTCENC_ERR_OUT_OF_MEM:
1530
0
    return "ASTCENC_ERR_OUT_OF_MEM";
1531
0
  case ASTCENC_ERR_BAD_CPU_FLOAT:
1532
0
    return "ASTCENC_ERR_BAD_CPU_FLOAT";
1533
0
  case ASTCENC_ERR_BAD_PARAM:
1534
0
    return "ASTCENC_ERR_BAD_PARAM";
1535
0
  case ASTCENC_ERR_BAD_BLOCK_SIZE:
1536
0
    return "ASTCENC_ERR_BAD_BLOCK_SIZE";
1537
0
  case ASTCENC_ERR_BAD_PROFILE:
1538
0
    return "ASTCENC_ERR_BAD_PROFILE";
1539
0
  case ASTCENC_ERR_BAD_QUALITY:
1540
0
    return "ASTCENC_ERR_BAD_QUALITY";
1541
0
  case ASTCENC_ERR_BAD_FLAGS:
1542
0
    return "ASTCENC_ERR_BAD_FLAGS";
1543
0
  case ASTCENC_ERR_BAD_SWIZZLE:
1544
0
    return "ASTCENC_ERR_BAD_SWIZZLE";
1545
0
  case ASTCENC_ERR_BAD_CONTEXT:
1546
0
    return "ASTCENC_ERR_BAD_CONTEXT";
1547
0
  case ASTCENC_ERR_NOT_IMPLEMENTED:
1548
0
    return "ASTCENC_ERR_NOT_IMPLEMENTED";
1549
0
  case ASTCENC_ERR_BAD_DECODE_MODE:
1550
0
    return "ASTCENC_ERR_BAD_DECODE_MODE";
1551
#if defined(ASTCENC_DIAGNOSTICS)
1552
  case ASTCENC_ERR_DTRACE_FAILURE:
1553
    return "ASTCENC_ERR_DTRACE_FAILURE";
1554
#endif
1555
0
  default:
1556
0
    return nullptr;
1557
0
  }
1558
0
}