Coverage Report

Created: 2026-05-30 06:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/astc-encoder/Source/astcenc_entry.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2026 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Functions for the library entrypoint.
20
 */
21
22
#include <array>
23
#include <cstring>
24
#include <new>
25
26
#include "astcenc.h"
27
#include "astcenc_internal_entry.h"
28
#include "astcenc_diagnostic_trace.h"
29
30
/**
31
 * @brief Record of the quality tuning parameter values.
32
 *
33
 * See the @c astcenc_config structure for detailed parameter documentation.
34
 *
35
 * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
36
 * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
37
 * for the more through search presets because the underlying db_limit is so much higher.
38
 */
39
struct astcenc_preset_config
40
{
41
  float quality;
42
  unsigned int tune_partition_count_limit;
43
  unsigned int tune_2partition_index_limit;
44
  unsigned int tune_3partition_index_limit;
45
  unsigned int tune_4partition_index_limit;
46
  unsigned int tune_block_mode_limit;
47
  unsigned int tune_refinement_limit;
48
  unsigned int tune_candidate_limit;
49
  unsigned int tune_2partitioning_candidate_limit;
50
  unsigned int tune_3partitioning_candidate_limit;
51
  unsigned int tune_4partitioning_candidate_limit;
52
  float tune_db_limit_a_base;
53
  float tune_db_limit_b_base;
54
  float tune_mse_overshoot;
55
  float tune_2partition_early_out_limit_factor;
56
  float tune_3partition_early_out_limit_factor;
57
  float tune_2plane_early_out_limit_correlation;
58
  float tune_search_mode0_enable;
59
};
60
61
/**
62
 * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
63
 */
64
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
65
  {
66
    ASTCENC_PRE_FASTEST,
67
    2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
68
  }, {
69
    ASTCENC_PRE_FAST,
70
    3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
71
  }, {
72
    ASTCENC_PRE_MEDIUM,
73
    4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
74
  }, {
75
    ASTCENC_PRE_THOROUGH,
76
    4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
77
  }, {
78
    ASTCENC_PRE_VERYTHOROUGH,
79
    4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
80
  }, {
81
    ASTCENC_PRE_EXHAUSTIVE,
82
    4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
83
  }
84
}};
85
86
/**
87
 * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
88
 */
89
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
90
  {
91
    ASTCENC_PRE_FASTEST,
92
    2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
93
  }, {
94
    ASTCENC_PRE_FAST,
95
    3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
96
  }, {
97
    ASTCENC_PRE_MEDIUM,
98
    3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
99
  }, {
100
    ASTCENC_PRE_THOROUGH,
101
    4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
102
  }, {
103
    ASTCENC_PRE_VERYTHOROUGH,
104
    4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
105
  }, {
106
    ASTCENC_PRE_EXHAUSTIVE,
107
    4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
108
  }
109
}};
110
111
/**
112
 * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
113
 */
114
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
115
  {
116
    ASTCENC_PRE_FASTEST,
117
    2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
118
  }, {
119
    ASTCENC_PRE_FAST,
120
    2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
121
  }, {
122
    ASTCENC_PRE_MEDIUM,
123
    3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
124
  }, {
125
    ASTCENC_PRE_THOROUGH,
126
    4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
127
  }, {
128
    ASTCENC_PRE_VERYTHOROUGH,
129
    4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
130
  }, {
131
    ASTCENC_PRE_EXHAUSTIVE,
132
    4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
133
  }
134
}};
135
136
/**
137
 * @brief Validate CPU floating point meets assumptions made in the codec.
138
 *
139
 * The codec is written with the assumption that float bit patterns are valid
140
 * IEEE754 values that are stored and reloaded with round-to-nearest rounding.
141
 * This is always the case in an IEEE-754 compliant system, however not every
142
 * system or compilation mode is actually IEEE-754 compliant. This normally
143
 * fails if the code is compiled with fast math enabled, for example.
144
 *
145
 * @return Return @c ASTCENC_SUCCESS if validated, an error on failure.
146
 */
147
static astcenc_error validate_cpu_float()
148
7.21k
{
149
7.21k
  volatile float xprec_testval = 2.51f;
150
7.21k
  float store = xprec_testval + 12582912.0f;
151
7.21k
  float q = store - 12582912.0f;
152
153
7.21k
  if (q != 3.0f)
154
0
  {
155
0
    return ASTCENC_ERR_BAD_CPU_FLOAT;
156
0
  }
157
158
7.21k
  return ASTCENC_SUCCESS;
159
7.21k
}
160
161
/**
162
 * @brief Validate config profile.
163
 *
164
 * @param profile   The profile to check.
165
 *
166
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
167
 */
168
static astcenc_error validate_profile(
169
  astcenc_profile profile
170
3.58k
) {
171
  // Values in this enum are from an external user, so not guaranteed to be
172
  // bounded to the enum values
173
3.58k
  switch (static_cast<int>(profile))
174
3.58k
  {
175
1.21k
  case ASTCENC_PRF_LDR_SRGB:
176
1.89k
  case ASTCENC_PRF_LDR:
177
2.72k
  case ASTCENC_PRF_HDR_RGB_LDR_A:
178
3.58k
  case ASTCENC_PRF_HDR:
179
3.58k
    return ASTCENC_SUCCESS;
180
0
  default:
181
0
    return ASTCENC_ERR_BAD_PROFILE;
182
3.58k
  }
183
3.58k
}
184
185
/**
186
 * @brief Validate block size.
187
 *
188
 * @param block_x   The block x dimensions.
189
 * @param block_y   The block y dimensions.
190
 * @param block_z   The block z dimensions.
191
 *
192
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
193
 */
194
static astcenc_error validate_block_size(
195
  unsigned int block_x,
196
  unsigned int block_y,
197
  unsigned int block_z
198
7.21k
) {
199
  // Test if this is a legal block size at all
200
7.21k
  bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
201
0
                   ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
202
7.21k
  if (!is_legal)
203
0
  {
204
0
    return ASTCENC_ERR_BAD_BLOCK_SIZE;
205
0
  }
206
207
  // Test if this build has sufficient capacity for this block size
208
7.21k
  bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
209
7.21k
  if (!have_capacity)
210
0
  {
211
0
    return ASTCENC_ERR_NOT_IMPLEMENTED;
212
0
  }
213
214
7.21k
  return ASTCENC_SUCCESS;
215
7.21k
}
216
217
/**
218
 * @brief Validate flags.
219
 *
220
 * @param profile   The profile to check.
221
 * @param flags     The flags to check.
222
 *
223
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
224
 */
225
static astcenc_error validate_flags(
226
  astcenc_profile profile,
227
  unsigned int flags
228
7.21k
) {
229
  // Flags field must not contain any unknown flag bits
230
7.21k
  unsigned int exMask = ~ASTCENC_ALL_FLAGS;
231
7.21k
  if (popcount(flags & exMask) != 0)
232
0
  {
233
0
    return ASTCENC_ERR_BAD_FLAGS;
234
0
  }
235
236
  // Flags field must only contain at most a single map type
237
7.21k
  exMask = ASTCENC_FLG_MAP_NORMAL
238
7.21k
         | ASTCENC_FLG_MAP_RGBM;
239
7.21k
  if (popcount(flags & exMask) > 1)
240
32
  {
241
32
    return ASTCENC_ERR_BAD_FLAGS;
242
32
  }
243
244
  // Decode_unorm8 must only be used with an LDR profile
245
7.18k
  bool is_unorm8 = flags & ASTCENC_FLG_USE_DECODE_UNORM8;
246
7.18k
  bool is_hdr = (profile == ASTCENC_PRF_HDR) || (profile == ASTCENC_PRF_HDR_RGB_LDR_A);
247
7.18k
  if (is_unorm8 && is_hdr)
248
17
  {
249
17
    return ASTCENC_ERR_BAD_DECODE_MODE;
250
17
  }
251
252
7.17k
  return ASTCENC_SUCCESS;
253
7.18k
}
254
255
#if !defined(ASTCENC_DECOMPRESS_ONLY)
256
257
/**
258
 * @brief Validate single channel compression swizzle.
259
 *
260
 * @param swizzle   The swizzle to check.
261
 *
262
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
263
 */
264
static astcenc_error validate_compression_swz(
265
  astcenc_swz swizzle
266
8.86k
) {
267
  // Not all enum values are handled; SWZ_Z is invalid for compression
268
8.86k
  switch (static_cast<int>(swizzle))
269
8.86k
  {
270
2.21k
  case ASTCENC_SWZ_R:
271
4.43k
  case ASTCENC_SWZ_G:
272
6.64k
  case ASTCENC_SWZ_B:
273
8.86k
  case ASTCENC_SWZ_A:
274
8.86k
  case ASTCENC_SWZ_0:
275
8.86k
  case ASTCENC_SWZ_1:
276
8.86k
    return ASTCENC_SUCCESS;
277
0
  default:
278
0
    return ASTCENC_ERR_BAD_SWIZZLE;
279
8.86k
  }
280
8.86k
}
281
282
/**
283
 * @brief Validate overall compression swizzle.
284
 *
285
 * @param swizzle   The swizzle to check.
286
 *
287
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
288
 */
289
static astcenc_error validate_compression_swizzle(
290
  const astcenc_swizzle& swizzle
291
2.21k
) {
292
2.21k
  if (validate_compression_swz(swizzle.r) ||
293
2.21k
      validate_compression_swz(swizzle.g) ||
294
2.21k
      validate_compression_swz(swizzle.b) ||
295
2.21k
      validate_compression_swz(swizzle.a))
296
0
  {
297
0
    return ASTCENC_ERR_BAD_SWIZZLE;
298
0
  }
299
300
2.21k
  return ASTCENC_SUCCESS;
301
2.21k
}
302
#endif
303
304
/**
305
 * @brief Validate single channel decompression swizzle.
306
 *
307
 * @param swizzle   The swizzle to check.
308
 *
309
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
310
 */
311
static astcenc_error validate_decompression_swz(
312
  astcenc_swz swizzle
313
4.45k
) {
314
  // Values in this enum are from an external user, so not guaranteed to be
315
  // bounded to the enum values
316
4.45k
  switch (static_cast<int>(swizzle))
317
4.45k
  {
318
1.11k
  case ASTCENC_SWZ_R:
319
2.22k
  case ASTCENC_SWZ_G:
320
3.34k
  case ASTCENC_SWZ_B:
321
4.45k
  case ASTCENC_SWZ_A:
322
4.45k
  case ASTCENC_SWZ_0:
323
4.45k
  case ASTCENC_SWZ_1:
324
4.45k
  case ASTCENC_SWZ_Z:
325
4.45k
    return ASTCENC_SUCCESS;
326
0
  default:
327
0
    return ASTCENC_ERR_BAD_SWIZZLE;
328
4.45k
  }
329
4.45k
}
330
331
/**
332
 * @brief Validate overall decompression swizzle.
333
 *
334
 * @param swizzle   The swizzle to check.
335
 *
336
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
337
 */
338
static astcenc_error validate_decompression_swizzle(
339
  const astcenc_swizzle& swizzle
340
1.11k
) {
341
1.11k
  if (validate_decompression_swz(swizzle.r) ||
342
1.11k
      validate_decompression_swz(swizzle.g) ||
343
1.11k
      validate_decompression_swz(swizzle.b) ||
344
1.11k
      validate_decompression_swz(swizzle.a))
345
0
  {
346
0
    return ASTCENC_ERR_BAD_SWIZZLE;
347
0
  }
348
349
1.11k
  return ASTCENC_SUCCESS;
350
1.11k
}
351
352
/**
353
 * Validate that an incoming configuration is in-spec.
354
 *
355
 * This function can respond in two ways:
356
 *
357
 *   * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
358
 *     for out-of-range inputs in this case.
359
 *   * Numerical inputs and logic inputs are are logically invalid and which make no sense
360
 *     algorithmically will return an error.
361
 *
362
 * @param[in,out] config   The input compressor configuration.
363
 *
364
 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
365
 */
366
static astcenc_error validate_config(
367
  astcenc_config &config
368
3.58k
) {
369
3.58k
  astcenc_error status;
370
371
3.58k
  status = validate_profile(config.profile);
372
3.58k
  if (status != ASTCENC_SUCCESS)
373
0
  {
374
0
    return status;
375
0
  }
376
377
3.58k
  status = validate_flags(config.profile, config.flags);
378
3.58k
  if (status != ASTCENC_SUCCESS)
379
0
  {
380
0
    return status;
381
0
  }
382
383
3.58k
  status = validate_block_size(config.block_x, config.block_y, config.block_z);
384
3.58k
  if (status != ASTCENC_SUCCESS)
385
0
  {
386
0
    return status;
387
0
  }
388
389
#if defined(ASTCENC_DECOMPRESS_ONLY)
390
  // Decompress-only builds only support decompress-only contexts
391
  if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
392
  {
393
    return ASTCENC_ERR_BAD_PARAM;
394
  }
395
#endif
396
397
3.58k
  config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
398
399
3.58k
  config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
400
3.58k
  config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
401
3.58k
  config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
402
3.58k
  config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
403
3.58k
  config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
404
3.58k
  config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
405
3.58k
  config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
406
3.58k
  config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
407
3.58k
  config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
408
3.58k
  config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
409
3.58k
  config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
410
3.58k
  config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
411
3.58k
  config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
412
3.58k
  config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
413
3.58k
  config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
414
415
  // Specifying a zero weight color component is not allowed; force to small value
416
3.58k
  float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
417
3.58k
                               astc::max(config.cw_b_weight, config.cw_a_weight));
418
3.58k
  if (max_weight > 0.0f)
419
3.58k
  {
420
3.58k
    max_weight /= 1000.0f;
421
3.58k
    config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
422
3.58k
    config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
423
3.58k
    config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
424
3.58k
    config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
425
3.58k
  }
426
  // If all color components error weights are zero then return an error
427
0
  else
428
0
  {
429
0
    return ASTCENC_ERR_BAD_PARAM;
430
0
  }
431
432
3.58k
  return ASTCENC_SUCCESS;
433
3.58k
}
434
435
/* See header for documentation. */
436
astcenc_error astcenc_config_init(
437
  astcenc_profile profile,
438
  unsigned int block_x,
439
  unsigned int block_y,
440
  unsigned int block_z,
441
  float quality,
442
  unsigned int flags,
443
  astcenc_config* configp
444
3.63k
) {
445
3.63k
  astcenc_error status;
446
447
3.63k
  status = validate_cpu_float();
448
3.63k
  if (status != ASTCENC_SUCCESS)
449
0
  {
450
0
    return status;
451
0
  }
452
453
  // Zero init all config fields; although most of will be over written
454
3.63k
  astcenc_config& config = *configp;
455
3.63k
  std::memset(&config, 0, sizeof(config));
456
457
  // Process the block size
458
3.63k
  block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
459
3.63k
  status = validate_block_size(block_x, block_y, block_z);
460
3.63k
  if (status != ASTCENC_SUCCESS)
461
0
  {
462
0
    return status;
463
0
  }
464
465
3.63k
  config.block_x = block_x;
466
3.63k
  config.block_y = block_y;
467
3.63k
  config.block_z = block_z;
468
469
3.63k
  float texels = static_cast<float>(block_x * block_y * block_z);
470
3.63k
  float ltexels = logf(texels) / logf(10.0f);
471
472
  // Process the performance quality level or preset; note that this must be done before we
473
  // process any additional settings, such as color profile and flags, which may replace some of
474
  // these settings with more use case tuned values
475
3.63k
  if (quality < ASTCENC_PRE_FASTEST ||
476
3.63k
      quality > ASTCENC_PRE_EXHAUSTIVE)
477
0
  {
478
0
    return ASTCENC_ERR_BAD_QUALITY;
479
0
  }
480
481
3.63k
  static const std::array<astcenc_preset_config, 6>* preset_configs;
482
3.63k
  int texels_int = block_x * block_y * block_z;
483
3.63k
  if (texels_int < 25)
484
1.96k
  {
485
1.96k
    preset_configs = &preset_configs_high;
486
1.96k
  }
487
1.67k
  else if (texels_int < 64)
488
1.00k
  {
489
1.00k
    preset_configs = &preset_configs_mid;
490
1.00k
  }
491
669
  else
492
669
  {
493
669
    preset_configs = &preset_configs_low;
494
669
  }
495
496
  // Determine which preset to use, or which pair to interpolate
497
3.63k
  size_t start;
498
3.63k
  size_t end;
499
10.3k
  for (end = 0; end < preset_configs->size(); end++)
500
10.3k
  {
501
10.3k
    if ((*preset_configs)[end].quality >= quality)
502
3.63k
    {
503
3.63k
      break;
504
3.63k
    }
505
10.3k
  }
506
507
3.63k
  start = end == 0 ? 0 : end - 1;
508
509
  // Start and end node are the same - so just transfer the values.
510
3.63k
  if (start == end)
511
180
  {
512
180
    config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
513
180
    config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
514
180
    config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
515
180
    config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
516
180
    config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
517
180
    config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
518
180
    config.tune_candidate_limit = (*preset_configs)[start].tune_candidate_limit;
519
180
    config.tune_2partitioning_candidate_limit = (*preset_configs)[start].tune_2partitioning_candidate_limit;
520
180
    config.tune_3partitioning_candidate_limit = (*preset_configs)[start].tune_3partitioning_candidate_limit;
521
180
    config.tune_4partitioning_candidate_limit = (*preset_configs)[start].tune_4partitioning_candidate_limit;
522
180
    config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
523
180
                                     (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
524
525
180
    config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
526
527
180
    config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
528
180
    config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
529
180
    config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
530
180
    config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
531
180
  }
532
  // Start and end node are not the same - so interpolate between them
533
3.45k
  else
534
3.45k
  {
535
3.45k
    auto& node_a = (*preset_configs)[start];
536
3.45k
    auto& node_b = (*preset_configs)[end];
537
538
3.45k
    float wt_range = node_b.quality - node_a.quality;
539
3.45k
    assert(wt_range > 0);
540
541
    // Compute interpolation factors
542
3.45k
    float wt_node_a = (node_b.quality - quality) / wt_range;
543
3.45k
    float wt_node_b = (quality - node_a.quality) / wt_range;
544
545
24.1k
    #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
546
34.5k
    #define LERPI(param) astc::flt2int_rtn(\
547
34.5k
                             (static_cast<float>(node_a.param) * wt_node_a) + \
548
34.5k
                             (static_cast<float>(node_b.param) * wt_node_b))
549
13.8k
    #define LERPUI(param) static_cast<unsigned int>(LERPI(param))
550
551
3.45k
    config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
552
3.45k
    config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
553
3.45k
    config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
554
3.45k
    config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
555
3.45k
    config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
556
3.45k
    config.tune_refinement_limit = LERPI(tune_refinement_limit);
557
3.45k
    config.tune_candidate_limit = LERPUI(tune_candidate_limit);
558
3.45k
    config.tune_2partitioning_candidate_limit = LERPUI(tune_2partitioning_candidate_limit);
559
3.45k
    config.tune_3partitioning_candidate_limit = LERPUI(tune_3partitioning_candidate_limit);
560
3.45k
    config.tune_4partitioning_candidate_limit = LERPUI(tune_4partitioning_candidate_limit);
561
3.45k
    config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
562
3.45k
                                     LERP(tune_db_limit_b_base) - 19 * ltexels);
563
564
3.45k
    config.tune_mse_overshoot = LERP(tune_mse_overshoot);
565
566
3.45k
    config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
567
3.45k
    config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
568
3.45k
    config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
569
3.45k
    config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
570
3.45k
    #undef LERP
571
3.45k
    #undef LERPI
572
3.45k
    #undef LERPUI
573
3.45k
  }
574
575
  // Set heuristics to the defaults for each color profile
576
3.63k
  config.cw_r_weight = 1.0f;
577
3.63k
  config.cw_g_weight = 1.0f;
578
3.63k
  config.cw_b_weight = 1.0f;
579
3.63k
  config.cw_a_weight = 1.0f;
580
581
3.63k
  config.a_scale_radius = 0;
582
583
3.63k
  config.rgbm_m_scale = 0.0f;
584
585
3.63k
  config.profile = profile;
586
587
  // Values in this enum are from an external user, so not guaranteed to be
588
  // bounded to the enum values
589
3.63k
  switch (static_cast<int>(profile))
590
3.63k
  {
591
691
  case ASTCENC_PRF_LDR:
592
1.91k
  case ASTCENC_PRF_LDR_SRGB:
593
1.91k
    break;
594
839
  case ASTCENC_PRF_HDR_RGB_LDR_A:
595
1.72k
  case ASTCENC_PRF_HDR:
596
1.72k
    config.tune_db_limit = 999.0f;
597
1.72k
    config.tune_search_mode0_enable = 0.0f;
598
1.72k
    break;
599
0
  default:
600
0
    return ASTCENC_ERR_BAD_PROFILE;
601
3.63k
  }
602
603
  // Flags field must not contain any unknown flag bits
604
3.63k
  status = validate_flags(profile, flags);
605
3.63k
  if (status != ASTCENC_SUCCESS)
606
49
  {
607
49
    return status;
608
49
  }
609
610
3.58k
  if (flags & ASTCENC_FLG_MAP_NORMAL)
611
998
  {
612
    // Normal map encoding uses L+A blocks, so allow one more partitioning
613
    // than normal. We need need fewer bits for endpoints, so more likely
614
    // to be able to use more partitions than an RGB/RGBA block
615
998
    config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
616
617
998
    config.cw_g_weight = 0.0f;
618
998
    config.cw_b_weight = 0.0f;
619
998
    config.tune_2partition_early_out_limit_factor *= 1.5f;
620
998
    config.tune_3partition_early_out_limit_factor *= 1.5f;
621
998
    config.tune_2plane_early_out_limit_correlation = 0.99f;
622
623
    // Normals are prone to blocking artifacts on smooth curves
624
    // so force compressor to try harder here ...
625
998
    config.tune_db_limit *= 1.03f;
626
998
  }
627
2.58k
  else if (flags & ASTCENC_FLG_MAP_RGBM)
628
911
  {
629
911
    config.rgbm_m_scale = 5.0f;
630
911
    config.cw_a_weight = 2.0f * config.rgbm_m_scale;
631
911
  }
632
1.67k
  else // (This is color data)
633
1.67k
  {
634
    // This is a very basic perceptual metric for RGB color data, which weights error
635
    // significance by the perceptual luminance contribution of each color channel. For
636
    // luminance the usual weights to compute luminance from a linear RGB value are as
637
    // follows:
638
    //
639
    //     l = r * 0.3 + g * 0.59 + b * 0.11
640
    //
641
    // ... but we scale these up to keep a better balance between color and alpha. Note
642
    // that if the content is using alpha we'd recommend using the -a option to weight
643
    // the color contribution by the alpha transparency.
644
1.67k
    if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
645
242
    {
646
242
      config.cw_r_weight = 0.30f * 2.25f;
647
242
      config.cw_g_weight = 0.59f * 2.25f;
648
242
      config.cw_b_weight = 0.11f * 2.25f;
649
242
    }
650
1.67k
  }
651
3.58k
  config.flags = flags;
652
653
3.58k
  return ASTCENC_SUCCESS;
654
3.63k
}
655
656
/* See header for documentation. */
657
astcenc_error astcenc_context_alloc(
658
  const astcenc_config* configp,
659
  unsigned int thread_count,
660
  astcenc_context** context,
661
  const astcenc_context* parent_context
662
3.58k
) {
663
3.58k
  astcenc_error status;
664
665
3.58k
  status = validate_cpu_float();
666
3.58k
  if (status != ASTCENC_SUCCESS)
667
0
  {
668
0
    return status;
669
0
  }
670
671
3.58k
  if (thread_count == 0)
672
0
  {
673
0
    return ASTCENC_ERR_BAD_PARAM;
674
0
  }
675
676
#if defined(ASTCENC_DIAGNOSTICS)
677
  // Force single threaded compressor use in diagnostic mode
678
  if (thread_count != 1)
679
  {
680
    return ASTCENC_ERR_BAD_PARAM;
681
  }
682
#endif
683
684
  // Exactly one of config or parent_context must be set
685
3.58k
  bool has_config = configp != nullptr;
686
3.58k
  bool has_parent = parent_context != nullptr;
687
3.58k
  if (!(has_config ^ has_parent))
688
0
  {
689
0
    return ASTCENC_ERR_BAD_PARAM;
690
0
  }
691
692
3.58k
  if (has_parent)
693
0
  {
694
0
    configp = &parent_context->context.config;
695
0
  }
696
697
3.58k
  const astcenc_config& config = *configp;
698
3.58k
  astcenc_context* ctxo = new astcenc_context;
699
3.58k
  astcenc_contexti* ctx = &ctxo->context;
700
3.58k
  ctx->thread_count = thread_count;
701
3.58k
  ctx->config = *configp;
702
3.58k
  ctx->working_buffers = nullptr;
703
704
  // These are allocated per-compress, as they depend on image size
705
3.58k
  ctx->input_alpha_averages = nullptr;
706
707
  // Copy the config first and validate the copy (we may modify it)
708
3.58k
  status = validate_config(ctx->config);
709
3.58k
  if (status != ASTCENC_SUCCESS)
710
0
  {
711
0
    delete ctxo;
712
0
    return status;
713
0
  }
714
715
3.58k
  if (!parent_context)
716
3.58k
  {
717
3.58k
    block_size_descriptor* bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
718
3.58k
    if (!bsd)
719
0
    {
720
0
      delete ctxo;
721
0
      return ASTCENC_ERR_OUT_OF_MEM;
722
0
    }
723
724
3.58k
    bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
725
3.58k
    init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
726
3.58k
                               can_omit_modes,
727
3.58k
                               config.tune_partition_count_limit,
728
3.58k
                               static_cast<float>(config.tune_block_mode_limit) / 100.0f,
729
3.58k
                               *bsd);
730
731
3.58k
    ctx->owns_bsd = true;
732
3.58k
    ctx->bsd = bsd;
733
3.58k
  }
734
0
  else
735
0
  {
736
0
    ctx->owns_bsd = false;
737
0
    ctx->bsd = parent_context->context.bsd;
738
0
  }
739
740
3.58k
#if !defined(ASTCENC_DECOMPRESS_ONLY)
741
  // Do setup only needed by compression
742
3.58k
  if (!(ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
743
3.27k
  {
744
    // Turn a dB limit into a per-texel error for faster use later
745
3.27k
    if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
746
1.70k
    {
747
1.70k
      ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
748
1.70k
    }
749
1.57k
    else
750
1.57k
    {
751
1.57k
      ctx->config.tune_db_limit = 0.0f;
752
1.57k
    }
753
754
3.27k
    size_t worksize = sizeof(compression_working_buffers) * thread_count;
755
3.27k
    ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
756
3.27k
    static_assert((ASTCENC_VECALIGN == 0) || ((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0),
757
3.27k
                  "compression_working_buffers size must be multiple of vector alignment");
758
3.27k
    if (!ctx->working_buffers)
759
0
    {
760
0
      if (ctx->owns_bsd)
761
0
      {
762
0
        aligned_free<const block_size_descriptor>(ctx->bsd);
763
0
      }
764
0
      delete ctxo;
765
0
      *context = nullptr;
766
0
      return ASTCENC_ERR_OUT_OF_MEM;
767
0
    }
768
3.27k
  }
769
3.58k
#endif
770
771
#if defined(ASTCENC_DIAGNOSTICS)
772
  ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
773
  if (!ctx->trace_log->m_file)
774
  {
775
    return ASTCENC_ERR_DTRACE_FAILURE;
776
  }
777
778
  trace_add_data("block_x", config.block_x);
779
  trace_add_data("block_y", config.block_y);
780
  trace_add_data("block_z", config.block_z);
781
#endif
782
783
3.58k
  *context = ctxo;
784
785
3.58k
#if !defined(ASTCENC_DECOMPRESS_ONLY)
786
3.58k
  prepare_angular_tables();
787
3.58k
#endif
788
789
3.58k
  return ASTCENC_SUCCESS;
790
3.58k
}
791
792
/* See header dor documentation. */
793
void astcenc_context_free(
794
  astcenc_context* ctxo
795
3.58k
) {
796
3.58k
  if (ctxo)
797
3.58k
  {
798
3.58k
    astcenc_contexti* ctx = &ctxo->context;
799
3.58k
    aligned_free<compression_working_buffers>(ctx->working_buffers);
800
3.58k
    if (ctx->owns_bsd)
801
3.58k
    {
802
3.58k
      aligned_free<const block_size_descriptor>(ctx->bsd);
803
3.58k
    }
804
#if defined(ASTCENC_DIAGNOSTICS)
805
    delete ctx->trace_log;
806
#endif
807
3.58k
    delete ctxo;
808
3.58k
  }
809
3.58k
}
810
811
#if !defined(ASTCENC_DECOMPRESS_ONLY)
812
813
/**
814
 * @brief Compress an image, after any preflight has completed.
815
 *
816
 * @param[out] ctxo           The compressor context.
817
 * @param      thread_index   The thread index.
818
 * @param      image          The intput image.
819
 * @param      swizzle        The input swizzle.
820
 * @param[out] buffer         The output array for the compressed data.
821
 */
822
static void compress_image(
823
  astcenc_context& ctxo,
824
  unsigned int thread_index,
825
  const astcenc_image& image,
826
  const astcenc_swizzle& swizzle,
827
  uint8_t* buffer
828
2.21k
) {
829
2.21k
  astcenc_contexti& ctx = ctxo.context;
830
2.21k
  const block_size_descriptor& bsd = *ctx.bsd;
831
2.21k
  astcenc_profile decode_mode = ctx.config.profile;
832
833
2.21k
  image_block blk;
834
835
2.21k
  int block_x = bsd.xdim;
836
2.21k
  int block_y = bsd.ydim;
837
2.21k
  int block_z = bsd.zdim;
838
2.21k
  blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
839
840
2.21k
  int dim_x = image.dim_x;
841
2.21k
  int dim_y = image.dim_y;
842
2.21k
  int dim_z = image.dim_z;
843
844
2.21k
  int xblocks = (dim_x + block_x - 1) / block_x;
845
2.21k
  int yblocks = (dim_y + block_y - 1) / block_y;
846
2.21k
  int zblocks = (dim_z + block_z - 1) / block_z;
847
2.21k
  int block_count = zblocks * yblocks * xblocks;
848
849
2.21k
  int row_blocks = xblocks;
850
2.21k
  int plane_blocks = xblocks * yblocks;
851
852
2.21k
  blk.decode_unorm8 = ctxo.context.config.flags & ASTCENC_FLG_USE_DECODE_UNORM8;
853
854
  // Populate the block channel weights
855
2.21k
  blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
856
2.21k
                               ctx.config.cw_g_weight,
857
2.21k
                               ctx.config.cw_b_weight,
858
2.21k
                               ctx.config.cw_a_weight);
859
860
  // Use preallocated scratch buffer
861
2.21k
  auto& temp_buffers = ctx.working_buffers[thread_index];
862
863
  // Only the first thread actually runs the initializer
864
2.21k
  ctxo.manage_compress.init(block_count, ctx.config.progress_callback);
865
866
  // Determine if we can use an optimized load function
867
2.21k
  bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
868
2.21k
                   (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
869
870
2.21k
  bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
871
1.70k
                   (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
872
873
2.21k
  bool use_fast_load = !needs_swz && !needs_hdr &&
874
1.09k
                       block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
875
876
2.21k
  auto load_func = load_image_block;
877
2.21k
  if (use_fast_load)
878
1.09k
  {
879
1.09k
    load_func = load_image_block_fast_ldr;
880
1.09k
  }
881
882
  // All threads run this processing loop until there is no work remaining
883
4.43k
  while (true)
884
4.43k
  {
885
4.43k
    unsigned int count;
886
4.43k
    unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
887
4.43k
    if (!count)
888
2.21k
    {
889
2.21k
      break;
890
2.21k
    }
891
892
4.43k
    for (unsigned int i = base; i < base + count; i++)
893
2.21k
    {
894
      // Decode i into x, y, z block indices
895
2.21k
      int z = i / plane_blocks;
896
2.21k
      unsigned int rem = i - (z * plane_blocks);
897
2.21k
      int y = rem / row_blocks;
898
2.21k
      int x = rem - (y * row_blocks);
899
900
      // Test if we can apply some basic alpha-scale RDO
901
2.21k
      bool use_full_block = true;
902
2.21k
      if (ctx.config.a_scale_radius != 0 && block_z == 1)
903
0
      {
904
0
        int start_x = x * block_x;
905
0
        int end_x = astc::min(dim_x, start_x + block_x);
906
907
0
        int start_y = y * block_y;
908
0
        int end_y = astc::min(dim_y, start_y + block_y);
909
910
        // SATs accumulate error, so don't test exactly zero. Test for
911
        // less than 1 alpha in the expanded block footprint that
912
        // includes the alpha radius.
913
0
        int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
914
915
0
        int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
916
917
0
        float footprint = static_cast<float>(x_footprint * y_footprint);
918
0
        float threshold = 0.9f / (255.0f * footprint);
919
920
        // Do we have any alpha values?
921
0
        use_full_block = false;
922
0
        for (int ay = start_y; ay < end_y; ay++)
923
0
        {
924
0
          for (int ax = start_x; ax < end_x; ax++)
925
0
          {
926
0
            float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
927
0
            if (a_avg > threshold)
928
0
            {
929
0
              use_full_block = true;
930
0
              ax = end_x;
931
0
              ay = end_y;
932
0
            }
933
0
          }
934
0
        }
935
0
      }
936
937
      // Fetch the full block for compression
938
2.21k
      if (use_full_block)
939
2.21k
      {
940
2.21k
        load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
941
942
        // Scale RGB error contribution by the maximum alpha in the block
943
        // This encourages preserving alpha accuracy in regions with high
944
        // transparency, and can buy up to 0.5 dB PSNR.
945
2.21k
        if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
946
820
        {
947
820
          float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
948
820
          blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
949
820
                                       ctx.config.cw_g_weight * alpha_scale,
950
820
                                       ctx.config.cw_b_weight * alpha_scale,
951
820
                                       ctx.config.cw_a_weight);
952
820
        }
953
2.21k
      }
954
      // Apply alpha scale RDO - substitute constant color block
955
0
      else
956
0
      {
957
0
        blk.origin_texel = vfloat4::zero();
958
0
        blk.data_min = vfloat4::zero();
959
0
        blk.data_mean = vfloat4::zero();
960
0
        blk.data_max = vfloat4::zero();
961
0
        blk.grayscale = true;
962
0
      }
963
964
2.21k
      int offset = ((z * yblocks + y) * xblocks + x) * 16;
965
2.21k
      uint8_t *bp = buffer + offset;
966
2.21k
      compress_block(ctx, blk, bp, temp_buffers);
967
2.21k
    }
968
969
2.21k
    ctxo.manage_compress.complete_task_assignment(count);
970
2.21k
  }
971
2.21k
}
972
973
/**
974
 * @brief Compute regional averages in an image.
975
 *
976
 * This function can be called by multiple threads, but only after a single
977
 * thread calls the setup function @c init_compute_averages().
978
 *
979
 * Results are written back into @c img->input_alpha_averages.
980
 *
981
 * @param[out] ctx   The context.
982
 * @param      ag    The average and variance arguments created during setup.
983
 */
984
static void compute_averages(
985
  astcenc_context& ctx,
986
  const avg_args &ag
987
0
) {
988
0
  pixel_region_args arg = ag.arg;
989
0
  arg.work_memory = new vfloat4[ag.work_memory_size];
990
991
0
  int size_x = ag.img_size_x;
992
0
  int size_y = ag.img_size_y;
993
0
  int size_z = ag.img_size_z;
994
995
0
  int step_xy = ag.blk_size_xy;
996
0
  int step_z = ag.blk_size_z;
997
998
0
  int y_tasks = (size_y + step_xy - 1) / step_xy;
999
1000
  // All threads run this processing loop until there is no work remaining
1001
0
  while (true)
1002
0
  {
1003
0
    unsigned int count;
1004
0
    unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
1005
0
    if (!count)
1006
0
    {
1007
0
      break;
1008
0
    }
1009
1010
0
    for (unsigned int i = base; i < base + count; i++)
1011
0
    {
1012
0
      int z = (i / (y_tasks)) * step_z;
1013
0
      int y = (i - (z * y_tasks)) * step_xy;
1014
1015
0
      arg.size_z = astc::min(step_z, size_z - z);
1016
0
      arg.offset_z = z;
1017
1018
0
      arg.size_y = astc::min(step_xy, size_y - y);
1019
0
      arg.offset_y = y;
1020
1021
0
      for (int x = 0; x < size_x; x += step_xy)
1022
0
      {
1023
0
        arg.size_x = astc::min(step_xy, size_x - x);
1024
0
        arg.offset_x = x;
1025
0
        compute_pixel_region_variance(ctx.context, arg);
1026
0
      }
1027
0
    }
1028
1029
0
    ctx.manage_avg.complete_task_assignment(count);
1030
0
  }
1031
1032
0
  delete[] arg.work_memory;
1033
0
}
1034
1035
#endif
1036
1037
/* See header for documentation. */
1038
astcenc_error astcenc_compress_image(
1039
  astcenc_context* ctxo,
1040
  astcenc_image* imagep,
1041
  const astcenc_swizzle* swizzle,
1042
  uint8_t* data_out,
1043
  size_t data_len,
1044
  unsigned int thread_index
1045
2.21k
) {
1046
#if defined(ASTCENC_DECOMPRESS_ONLY)
1047
  (void)ctxo;
1048
  (void)imagep;
1049
  (void)swizzle;
1050
  (void)data_out;
1051
  (void)data_len;
1052
  (void)thread_index;
1053
  return ASTCENC_ERR_BAD_CONTEXT;
1054
#else
1055
2.21k
  astcenc_contexti* ctx = &ctxo->context;
1056
2.21k
  astcenc_error status;
1057
2.21k
  astcenc_image& image = *imagep;
1058
1059
2.21k
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1060
0
  {
1061
0
    return ASTCENC_ERR_BAD_CONTEXT;
1062
0
  }
1063
1064
2.21k
  status = validate_compression_swizzle(*swizzle);
1065
2.21k
  if (status != ASTCENC_SUCCESS)
1066
0
  {
1067
0
    return status;
1068
0
  }
1069
1070
2.21k
  if (thread_index >= ctx->thread_count)
1071
0
  {
1072
0
    return ASTCENC_ERR_BAD_PARAM;
1073
0
  }
1074
1075
2.21k
  unsigned int block_x = ctx->config.block_x;
1076
2.21k
  unsigned int block_y = ctx->config.block_y;
1077
2.21k
  unsigned int block_z = ctx->config.block_z;
1078
1079
2.21k
  unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
1080
2.21k
  unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
1081
2.21k
  unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
1082
1083
  // Check we have enough output space (16 bytes per block)
1084
2.21k
  size_t size_needed = xblocks * yblocks * zblocks * 16;
1085
2.21k
  if (data_len < size_needed)
1086
0
  {
1087
0
    return ASTCENC_ERR_OUT_OF_MEM;
1088
0
  }
1089
1090
  // If context thread count is one then implicitly reset
1091
2.21k
  if (ctx->thread_count == 1)
1092
2.21k
  {
1093
2.21k
    astcenc_compress_reset(ctxo);
1094
2.21k
  }
1095
1096
2.21k
  if (ctx->config.a_scale_radius != 0)
1097
0
  {
1098
    // First thread to enter will do setup, other threads will subsequently
1099
    // enter the critical section but simply skip over the initialization
1100
0
    auto init_avg = [ctx, &image, swizzle]() {
1101
      // Perform memory allocations for the destination buffers
1102
0
      size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
1103
0
      ctx->input_alpha_averages = new float[texel_count];
1104
1105
0
      return init_compute_averages(
1106
0
        image, ctx->config.a_scale_radius, *swizzle,
1107
0
        ctx->avg_preprocess_args);
1108
0
    };
1109
1110
    // Only the first thread actually runs the initializer
1111
0
    ctxo->manage_avg.init(init_avg);
1112
1113
    // All threads will enter this function and dynamically grab work
1114
0
    compute_averages(*ctxo, ctx->avg_preprocess_args);
1115
0
  }
1116
1117
  // Wait for compute_averages to complete before compressing
1118
2.21k
  ctxo->manage_avg.wait();
1119
1120
2.21k
  compress_image(*ctxo, thread_index, image, *swizzle, data_out);
1121
1122
  // Wait for compress to complete before freeing memory
1123
2.21k
  ctxo->manage_compress.wait();
1124
1125
2.21k
  auto term_compress = [ctx]() {
1126
2.21k
    delete[] ctx->input_alpha_averages;
1127
2.21k
    ctx->input_alpha_averages = nullptr;
1128
2.21k
  };
1129
1130
  // Only the first thread to arrive actually runs the term
1131
2.21k
  ctxo->manage_compress.term(term_compress);
1132
1133
2.21k
  return ASTCENC_SUCCESS;
1134
2.21k
#endif
1135
2.21k
}
1136
1137
/* See header for documentation. */
1138
astcenc_error astcenc_compress_reset(
1139
  astcenc_context* ctxo
1140
2.21k
) {
1141
#if defined(ASTCENC_DECOMPRESS_ONLY)
1142
  (void)ctxo;
1143
  return ASTCENC_ERR_BAD_CONTEXT;
1144
#else
1145
2.21k
  astcenc_contexti* ctx = &ctxo->context;
1146
2.21k
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1147
0
  {
1148
0
    return ASTCENC_ERR_BAD_CONTEXT;
1149
0
  }
1150
1151
2.21k
  ctxo->manage_avg.reset();
1152
2.21k
  ctxo->manage_compress.reset();
1153
2.21k
  return ASTCENC_SUCCESS;
1154
2.21k
#endif
1155
2.21k
}
1156
1157
/* See header for documentation. */
1158
astcenc_error astcenc_compress_cancel(
1159
  astcenc_context* ctxo
1160
0
) {
1161
#if defined(ASTCENC_DECOMPRESS_ONLY)
1162
  (void)ctxo;
1163
  return ASTCENC_ERR_BAD_CONTEXT;
1164
#else
1165
0
  astcenc_contexti* ctx = &ctxo->context;
1166
0
  if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1167
0
  {
1168
0
    return ASTCENC_ERR_BAD_CONTEXT;
1169
0
  }
1170
1171
  // Cancel compression before cancelling avg. This avoids the race condition
1172
  // where cancelling them in the other order could see a compression worker
1173
  // starting to process even though some of the avg data is undefined.
1174
0
  ctxo->manage_compress.cancel();
1175
0
  ctxo->manage_avg.cancel();
1176
0
  return ASTCENC_SUCCESS;
1177
0
#endif
1178
0
}
1179
1180
/* See header for documentation. */
1181
astcenc_error astcenc_decompress_image(
1182
  astcenc_context* ctxo,
1183
  const uint8_t* data,
1184
  size_t data_len,
1185
  astcenc_image* image_outp,
1186
  const astcenc_swizzle* swizzle,
1187
  unsigned int thread_index
1188
1.11k
) {
1189
1.11k
  astcenc_error status;
1190
1.11k
  astcenc_image& image_out = *image_outp;
1191
1.11k
  astcenc_contexti* ctx = &ctxo->context;
1192
1193
  // Today this doesn't matter (working set on stack) but might in future ...
1194
1.11k
  if (thread_index >= ctx->thread_count)
1195
0
  {
1196
0
    return ASTCENC_ERR_BAD_PARAM;
1197
0
  }
1198
1199
1.11k
  status = validate_decompression_swizzle(*swizzle);
1200
1.11k
  if (status != ASTCENC_SUCCESS)
1201
0
  {
1202
0
    return status;
1203
0
  }
1204
1205
1.11k
  unsigned int block_x = ctx->config.block_x;
1206
1.11k
  unsigned int block_y = ctx->config.block_y;
1207
1.11k
  unsigned int block_z = ctx->config.block_z;
1208
1209
1.11k
  unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
1210
1.11k
  unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
1211
1.11k
  unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
1212
1.11k
  unsigned int block_count = zblocks * yblocks * xblocks;
1213
1214
1.11k
  int row_blocks = xblocks;
1215
1.11k
  int plane_blocks = xblocks * yblocks;
1216
1217
  // Check we have enough output space (16 bytes per block)
1218
1.11k
  size_t size_needed = xblocks * yblocks * zblocks * 16;
1219
1.11k
  if (data_len < size_needed)
1220
0
  {
1221
0
    return ASTCENC_ERR_OUT_OF_MEM;
1222
0
  }
1223
1224
1.11k
  image_block blk {};
1225
1.11k
  blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
1226
1227
  // Decode mode inferred from the output data type
1228
1.11k
  blk.decode_unorm8 = image_out.data_type == ASTCENC_TYPE_U8;
1229
1230
  // If context thread count is one then implicitly reset
1231
1.11k
  if (ctx->thread_count == 1)
1232
1.11k
  {
1233
1.11k
    astcenc_decompress_reset(ctxo);
1234
1.11k
  }
1235
1236
  // Only the first thread actually runs the initializer
1237
1.11k
  ctxo->manage_decompress.init(block_count, nullptr);
1238
1239
  // All threads run this processing loop until there is no work remaining
1240
2.22k
  while (true)
1241
2.22k
  {
1242
2.22k
    unsigned int count;
1243
2.22k
    unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
1244
2.22k
    if (!count)
1245
1.11k
    {
1246
1.11k
      break;
1247
1.11k
    }
1248
1249
5.06k
    for (unsigned int i = base; i < base + count; i++)
1250
3.95k
    {
1251
      // Decode i into x, y, z block indices
1252
3.95k
      int z = i / plane_blocks;
1253
3.95k
      unsigned int rem = i - (z * plane_blocks);
1254
3.95k
      int y = rem / row_blocks;
1255
3.95k
      int x = rem - (y * row_blocks);
1256
1257
3.95k
      unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
1258
3.95k
      const uint8_t* bp = data + offset;
1259
1260
3.95k
      symbolic_compressed_block scb;
1261
1262
3.95k
      physical_to_symbolic(*ctx->bsd, bp, scb);
1263
1264
3.95k
      decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
1265
3.95k
                                x * block_x, y * block_y, z * block_z,
1266
3.95k
                                scb, blk);
1267
1268
3.95k
      store_image_block(image_out, blk, *ctx->bsd,
1269
3.95k
                        x * block_x, y * block_y, z * block_z, *swizzle);
1270
3.95k
    }
1271
1272
1.11k
    ctxo->manage_decompress.complete_task_assignment(count);
1273
1.11k
  }
1274
1275
1.11k
  return ASTCENC_SUCCESS;
1276
1.11k
}
1277
1278
/* See header for documentation. */
1279
astcenc_error astcenc_decompress_reset(
1280
  astcenc_context* ctxo
1281
1.11k
) {
1282
1.11k
  ctxo->manage_decompress.reset();
1283
1.11k
  return ASTCENC_SUCCESS;
1284
1.11k
}
1285
1286
/* See header for documentation. */
1287
astcenc_error astcenc_get_block_info(
1288
  astcenc_context* ctxo,
1289
  const uint8_t data[16],
1290
  astcenc_block_info* info
1291
0
) {
1292
#if defined(ASTCENC_DECOMPRESS_ONLY)
1293
  (void)ctxo;
1294
  (void)data;
1295
  (void)info;
1296
  return ASTCENC_ERR_BAD_CONTEXT;
1297
#else
1298
0
  astcenc_contexti* ctx = &ctxo->context;
1299
1300
  // Decode the compressed data into a symbolic form
1301
0
  symbolic_compressed_block scb;
1302
0
  physical_to_symbolic(*ctx->bsd, data, scb);
1303
1304
  // Fetch the appropriate partition and decimation tables
1305
0
  const block_size_descriptor& bsd = *ctx->bsd;
1306
1307
  // Start from a clean slate
1308
0
  memset(info, 0, sizeof(*info));
1309
1310
  // Basic info we can always populate
1311
0
  info->profile = ctx->config.profile;
1312
1313
0
  info->block_x = ctx->config.block_x;
1314
0
  info->block_y = ctx->config.block_y;
1315
0
  info->block_z = ctx->config.block_z;
1316
0
  info->texel_count = bsd.texel_count;
1317
1318
  // Check for error blocks first
1319
0
  info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
1320
0
  if (info->is_error_block)
1321
0
  {
1322
0
    return ASTCENC_SUCCESS;
1323
0
  }
1324
1325
  // Check for constant color blocks second
1326
0
  info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
1327
0
                            scb.block_type == SYM_BTYPE_CONST_U16;
1328
0
  if (info->is_constant_block)
1329
0
  {
1330
0
    return ASTCENC_SUCCESS;
1331
0
  }
1332
1333
  // Otherwise handle a full block ; known to be valid after conditions above have been checked
1334
0
  int partition_count = scb.partition_count;
1335
0
  const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
1336
1337
0
  const block_mode& bm = bsd.get_block_mode(scb.block_mode);
1338
0
  const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
1339
1340
0
  info->weight_x = di.weight_x;
1341
0
  info->weight_y = di.weight_y;
1342
0
  info->weight_z = di.weight_z;
1343
1344
0
  info->is_dual_plane_block = bm.is_dual_plane != 0;
1345
1346
0
  info->partition_count = scb.partition_count;
1347
0
  info->partition_index = scb.partition_index;
1348
0
  info->dual_plane_component = scb.plane2_component;
1349
1350
0
  info->color_level_count = get_quant_level(scb.get_color_quant_mode());
1351
0
  info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
1352
1353
  // Unpack color endpoints for each active partition
1354
0
  for (unsigned int i = 0; i < scb.partition_count; i++)
1355
0
  {
1356
0
    bool rgb_hdr;
1357
0
    bool a_hdr;
1358
0
    vint4 endpnt[2];
1359
1360
0
    unpack_color_endpoints(ctx->config.profile,
1361
0
                           scb.color_formats[i],
1362
0
                           scb.color_values[i],
1363
0
                           rgb_hdr, a_hdr,
1364
0
                           endpnt[0], endpnt[1]);
1365
1366
    // Store the color endpoint mode info
1367
0
    info->color_endpoint_modes[i] = scb.color_formats[i];
1368
0
    info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
1369
1370
    // Store the unpacked and decoded color endpoint
1371
0
    vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
1372
0
    for (int j = 0; j < 2; j++)
1373
0
    {
1374
0
      vint4 color_lns = lns_to_sf16(endpnt[j]);
1375
0
      vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
1376
0
      vint4 datai = select(color_unorm, color_lns, hdr_mask);
1377
0
      store(float16_to_float(datai), info->color_endpoints[i][j]);
1378
0
    }
1379
0
  }
1380
1381
  // Unpack weights for each texel
1382
0
  int weight_plane1[BLOCK_MAX_TEXELS];
1383
0
  int weight_plane2[BLOCK_MAX_TEXELS];
1384
1385
0
  unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
1386
0
  for (unsigned int i = 0; i < bsd.texel_count; i++)
1387
0
  {
1388
0
    info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1389
0
    if (info->is_dual_plane_block)
1390
0
    {
1391
0
      info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1392
0
    }
1393
0
  }
1394
1395
  // Unpack partition assignments for each texel
1396
0
  for (unsigned int i = 0; i < bsd.texel_count; i++)
1397
0
  {
1398
0
    info->partition_assignment[i] = pi.partition_of_texel[i];
1399
0
  }
1400
1401
0
  return ASTCENC_SUCCESS;
1402
0
#endif
1403
0
}
1404
1405
/* See header for documentation. */
1406
const char* astcenc_get_error_string(
1407
  astcenc_error status
1408
0
) {
1409
  // Values in this enum are from an external user, so not guaranteed to be
1410
  // bounded to the enum values
1411
0
  switch (static_cast<int>(status))
1412
0
  {
1413
0
  case ASTCENC_SUCCESS:
1414
0
    return "ASTCENC_SUCCESS";
1415
0
  case ASTCENC_ERR_OUT_OF_MEM:
1416
0
    return "ASTCENC_ERR_OUT_OF_MEM";
1417
0
  case ASTCENC_ERR_BAD_CPU_FLOAT:
1418
0
    return "ASTCENC_ERR_BAD_CPU_FLOAT";
1419
0
  case ASTCENC_ERR_BAD_PARAM:
1420
0
    return "ASTCENC_ERR_BAD_PARAM";
1421
0
  case ASTCENC_ERR_BAD_BLOCK_SIZE:
1422
0
    return "ASTCENC_ERR_BAD_BLOCK_SIZE";
1423
0
  case ASTCENC_ERR_BAD_PROFILE:
1424
0
    return "ASTCENC_ERR_BAD_PROFILE";
1425
0
  case ASTCENC_ERR_BAD_QUALITY:
1426
0
    return "ASTCENC_ERR_BAD_QUALITY";
1427
0
  case ASTCENC_ERR_BAD_FLAGS:
1428
0
    return "ASTCENC_ERR_BAD_FLAGS";
1429
0
  case ASTCENC_ERR_BAD_SWIZZLE:
1430
0
    return "ASTCENC_ERR_BAD_SWIZZLE";
1431
0
  case ASTCENC_ERR_BAD_CONTEXT:
1432
0
    return "ASTCENC_ERR_BAD_CONTEXT";
1433
0
  case ASTCENC_ERR_NOT_IMPLEMENTED:
1434
0
    return "ASTCENC_ERR_NOT_IMPLEMENTED";
1435
0
  case ASTCENC_ERR_BAD_DECODE_MODE:
1436
0
    return "ASTCENC_ERR_BAD_DECODE_MODE";
1437
#if defined(ASTCENC_DIAGNOSTICS)
1438
  case ASTCENC_ERR_DTRACE_FAILURE:
1439
    return "ASTCENC_ERR_DTRACE_FAILURE";
1440
#endif
1441
0
  default:
1442
0
    return nullptr;
1443
0
  }
1444
0
}