Coverage Report

Created: 2025-12-14 06:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/astc-encoder/Source/astcenc_block_sizes.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Functions to generate block size descriptor and decimation tables.
20
 */
21
22
#include "astcenc_internal.h"
23
24
/**
25
 * @brief Decode the properties of an encoded 2D block mode.
26
 *
27
 * @param      block_mode      The encoded block mode.
28
 * @param[out] x_weights       The number of weights in the X dimension.
29
 * @param[out] y_weights       The number of weights in the Y dimension.
30
 * @param[out] is_dual_plane   True if this block mode has two weight planes.
31
 * @param[out] quant_mode      The quantization level for the weights.
32
 * @param[out] weight_bits     The storage bit count for the weights.
33
 *
34
 * @return Returns true if a valid mode, false otherwise.
35
 */
36
static bool decode_block_mode_2d(
37
  unsigned int block_mode,
38
  unsigned int& x_weights,
39
  unsigned int& y_weights,
40
  bool& is_dual_plane,
41
  unsigned int& quant_mode,
42
  unsigned int& weight_bits
43
14.8k
) {
44
14.8k
  unsigned int base_quant_mode = (block_mode >> 4) & 1;
45
14.8k
  unsigned int H = (block_mode >> 9) & 1;
46
14.8k
  unsigned int D = (block_mode >> 10) & 1;
47
14.8k
  unsigned int A = (block_mode >> 5) & 0x3;
48
49
14.8k
  x_weights = 0;
50
14.8k
  y_weights = 0;
51
52
14.8k
  if ((block_mode & 3) != 0)
53
10.9k
  {
54
10.9k
    base_quant_mode |= (block_mode & 3) << 1;
55
10.9k
    unsigned int B = (block_mode >> 7) & 3;
56
10.9k
    switch ((block_mode >> 2) & 3)
57
10.9k
    {
58
2.62k
    case 0:
59
2.62k
      x_weights = B + 4;
60
2.62k
      y_weights = A + 2;
61
2.62k
      break;
62
2.85k
    case 1:
63
2.85k
      x_weights = B + 8;
64
2.85k
      y_weights = A + 2;
65
2.85k
      break;
66
2.85k
    case 2:
67
2.85k
      x_weights = A + 2;
68
2.85k
      y_weights = B + 8;
69
2.85k
      break;
70
2.60k
    case 3:
71
2.60k
      B &= 1;
72
2.60k
      if (block_mode & 0x100)
73
1.23k
      {
74
1.23k
        x_weights = B + 2;
75
1.23k
        y_weights = A + 2;
76
1.23k
      }
77
1.37k
      else
78
1.37k
      {
79
1.37k
        x_weights = A + 2;
80
1.37k
        y_weights = B + 6;
81
1.37k
      }
82
2.60k
      break;
83
10.9k
    }
84
10.9k
  }
85
3.94k
  else
86
3.94k
  {
87
3.94k
    base_quant_mode |= ((block_mode >> 2) & 3) << 1;
88
3.94k
    if (((block_mode >> 2) & 3) == 0)
89
1.02k
    {
90
1.02k
      return false;
91
1.02k
    }
92
93
2.92k
    unsigned int B = (block_mode >> 9) & 3;
94
2.92k
    switch ((block_mode >> 7) & 3)
95
2.92k
    {
96
727
    case 0:
97
727
      x_weights = 12;
98
727
      y_weights = A + 2;
99
727
      break;
100
727
    case 1:
101
727
      x_weights = A + 2;
102
727
      y_weights = 12;
103
727
      break;
104
707
    case 2:
105
707
      x_weights = A + 6;
106
707
      y_weights = B + 6;
107
707
      D = 0;
108
707
      H = 0;
109
707
      break;
110
760
    case 3:
111
760
      switch ((block_mode >> 5) & 3)
112
760
      {
113
188
      case 0:
114
188
        x_weights = 6;
115
188
        y_weights = 10;
116
188
        break;
117
188
      case 1:
118
188
        x_weights = 10;
119
188
        y_weights = 6;
120
188
        break;
121
192
      case 2:
122
384
      case 3:
123
384
        return false;
124
760
      }
125
376
      break;
126
2.92k
    }
127
2.92k
  }
128
129
13.4k
  unsigned int weight_count = x_weights * y_weights * (D + 1);
130
13.4k
  quant_mode = (base_quant_mode - 2) + 6 * H;
131
13.4k
  is_dual_plane = D != 0;
132
133
13.4k
  weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
134
13.4k
  return (weight_count <= BLOCK_MAX_WEIGHTS &&
135
10.8k
          weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
136
10.0k
          weight_bits <= BLOCK_MAX_WEIGHT_BITS);
137
14.8k
}
138
139
/**
140
 * @brief Decode the properties of an encoded 3D block mode.
141
 *
142
 * @param      block_mode      The encoded block mode.
143
 * @param[out] x_weights       The number of weights in the X dimension.
144
 * @param[out] y_weights       The number of weights in the Y dimension.
145
 * @param[out] z_weights       The number of weights in the Z dimension.
146
 * @param[out] is_dual_plane   True if this block mode has two weight planes.
147
 * @param[out] quant_mode      The quantization level for the weights.
148
 * @param[out] weight_bits     The storage bit count for the weights.
149
 *
150
 * @return Returns true if a valid mode, false otherwise.
151
 */
152
static bool decode_block_mode_3d(
153
  unsigned int block_mode,
154
  unsigned int& x_weights,
155
  unsigned int& y_weights,
156
  unsigned int& z_weights,
157
  bool& is_dual_plane,
158
  unsigned int& quant_mode,
159
  unsigned int& weight_bits
160
3.66k
) {
161
3.66k
  unsigned int base_quant_mode = (block_mode >> 4) & 1;
162
3.66k
  unsigned int H = (block_mode >> 9) & 1;
163
3.66k
  unsigned int D = (block_mode >> 10) & 1;
164
3.66k
  unsigned int A = (block_mode >> 5) & 0x3;
165
166
3.66k
  x_weights = 0;
167
3.66k
  y_weights = 0;
168
3.66k
  z_weights = 0;
169
170
3.66k
  if ((block_mode & 3) != 0)
171
2.74k
  {
172
2.74k
    base_quant_mode |= (block_mode & 3) << 1;
173
2.74k
    unsigned int B = (block_mode >> 7) & 3;
174
2.74k
    unsigned int C = (block_mode >> 2) & 0x3;
175
2.74k
    x_weights = A + 2;
176
2.74k
    y_weights = B + 2;
177
2.74k
    z_weights = C + 2;
178
2.74k
  }
179
913
  else
180
913
  {
181
913
    base_quant_mode |= ((block_mode >> 2) & 3) << 1;
182
913
    if (((block_mode >> 2) & 3) == 0)
183
256
    {
184
256
      return false;
185
256
    }
186
187
657
    int B = (block_mode >> 9) & 3;
188
657
    if (((block_mode >> 7) & 3) != 3)
189
492
    {
190
492
      D = 0;
191
492
      H = 0;
192
492
    }
193
657
    switch ((block_mode >> 7) & 3)
194
657
    {
195
164
    case 0:
196
164
      x_weights = 6;
197
164
      y_weights = B + 2;
198
164
      z_weights = A + 2;
199
164
      break;
200
164
    case 1:
201
164
      x_weights = A + 2;
202
164
      y_weights = 6;
203
164
      z_weights = B + 2;
204
164
      break;
205
164
    case 2:
206
164
      x_weights = A + 2;
207
164
      y_weights = B + 2;
208
164
      z_weights = 6;
209
164
      break;
210
165
    case 3:
211
165
      x_weights = 2;
212
165
      y_weights = 2;
213
165
      z_weights = 2;
214
165
      switch ((block_mode >> 5) & 3)
215
165
      {
216
39
      case 0:
217
39
        x_weights = 6;
218
39
        break;
219
39
      case 1:
220
39
        y_weights = 6;
221
39
        break;
222
39
      case 2:
223
39
        z_weights = 6;
224
39
        break;
225
48
      case 3:
226
48
        return false;
227
165
      }
228
117
      break;
229
657
    }
230
657
  }
231
232
3.35k
  unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
233
3.35k
  quant_mode = (base_quant_mode - 2) + 6 * H;
234
3.35k
  is_dual_plane = D != 0;
235
236
3.35k
  weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
237
3.35k
  return (weight_count <= BLOCK_MAX_WEIGHTS &&
238
1.98k
          weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
239
1.94k
          weight_bits <= BLOCK_MAX_WEIGHT_BITS);
240
3.66k
}
241
242
/**
243
 * @brief Create a 2D decimation entry for a block-size and weight-decimation pair.
244
 *
245
 * @param      x_texels    The number of texels in the X dimension.
246
 * @param      y_texels    The number of texels in the Y dimension.
247
 * @param      x_weights   The number of weights in the X dimension.
248
 * @param      y_weights   The number of weights in the Y dimension.
249
 * @param[out] di          The decimation info structure to populate.
250
 * @param[out] wb          The decimation table init scratch working buffers.
251
 */
252
static void init_decimation_info_2d(
253
  unsigned int x_texels,
254
  unsigned int y_texels,
255
  unsigned int x_weights,
256
  unsigned int y_weights,
257
  decimation_info& di,
258
  dt_init_working_buffers& wb
259
96
) {
260
96
  unsigned int texels_per_block = x_texels * y_texels;
261
96
  unsigned int weights_per_block = x_weights * y_weights;
262
263
96
  uint8_t max_texel_count_of_weight = 0;
264
265
96
  promise(weights_per_block > 0);
266
96
  promise(texels_per_block > 0);
267
96
  promise(x_texels > 0);
268
96
  promise(y_texels > 0);
269
270
2.93k
  for (unsigned int i = 0; i < weights_per_block; i++)
271
2.83k
  {
272
2.83k
    wb.texel_count_of_weight[i] = 0;
273
2.83k
  }
274
275
12.7k
  for (unsigned int i = 0; i < texels_per_block; i++)
276
12.6k
  {
277
12.6k
    wb.weight_count_of_texel[i] = 0;
278
12.6k
  }
279
280
1.17k
  for (unsigned int y = 0; y < y_texels; y++)
281
1.08k
  {
282
13.7k
    for (unsigned int x = 0; x < x_texels; x++)
283
12.6k
    {
284
12.6k
      unsigned int texel = y * x_texels + x;
285
286
12.6k
      unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
287
12.6k
      unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
288
289
12.6k
      unsigned int x_weight_frac = x_weight & 0xF;
290
12.6k
      unsigned int y_weight_frac = y_weight & 0xF;
291
12.6k
      unsigned int x_weight_int = x_weight >> 4;
292
12.6k
      unsigned int y_weight_int = y_weight >> 4;
293
294
12.6k
      unsigned int qweight[4];
295
12.6k
      qweight[0] = x_weight_int + y_weight_int * x_weights;
296
12.6k
      qweight[1] = qweight[0] + 1;
297
12.6k
      qweight[2] = qweight[0] + x_weights;
298
12.6k
      qweight[3] = qweight[2] + 1;
299
300
      // Truncated-precision bilinear interpolation
301
12.6k
      unsigned int prod = x_weight_frac * y_weight_frac;
302
303
12.6k
      unsigned int weight[4];
304
12.6k
      weight[3] = (prod + 8) >> 4;
305
12.6k
      weight[1] = x_weight_frac - weight[3];
306
12.6k
      weight[2] = y_weight_frac - weight[3];
307
12.6k
      weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
308
309
63.3k
      for (unsigned int i = 0; i < 4; i++)
310
50.6k
      {
311
50.6k
        if (weight[i] != 0)
312
37.8k
        {
313
37.8k
          wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
314
37.8k
          wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
315
37.8k
          wb.weight_count_of_texel[texel]++;
316
37.8k
          wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
317
37.8k
          wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
318
37.8k
          wb.texel_count_of_weight[qweight[i]]++;
319
37.8k
          max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
320
37.8k
        }
321
50.6k
      }
322
12.6k
    }
323
1.08k
  }
324
325
96
  uint8_t max_texel_weight_count = 0;
326
12.7k
  for (unsigned int i = 0; i < texels_per_block; i++)
327
12.6k
  {
328
12.6k
    di.texel_weight_count[i] = wb.weight_count_of_texel[i];
329
12.6k
    max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
330
331
50.5k
    for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
332
37.8k
    {
333
37.8k
      di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
334
37.8k
      di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
335
37.8k
      di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
336
37.8k
    }
337
338
    // Init all 4 entries so we can rely on zeros for vectorization
339
25.4k
    for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
340
12.7k
    {
341
12.7k
      di.texel_weight_contribs_int_tr[j][i] = 0;
342
12.7k
      di.texel_weight_contribs_float_tr[j][i] = 0.0f;
343
12.7k
      di.texel_weights_tr[j][i] = 0;
344
12.7k
    }
345
12.6k
  }
346
347
96
  di.max_texel_weight_count = max_texel_weight_count;
348
349
2.93k
  for (unsigned int i = 0; i < weights_per_block; i++)
350
2.83k
  {
351
2.83k
    unsigned int texel_count_wt = wb.texel_count_of_weight[i];
352
2.83k
    di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
353
354
40.7k
    for (unsigned int j = 0; j < texel_count_wt; j++)
355
37.8k
    {
356
37.8k
      uint8_t texel = wb.texels_of_weight[i][j];
357
358
      // Create transposed versions of these for better vectorization
359
37.8k
      di.weight_texels_tr[j][i] = texel;
360
37.8k
      di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
361
362
      // Store the per-texel contribution of this weight for each texel it contributes to
363
37.8k
      di.texel_contrib_for_weight[j][i] = 0.0f;
364
81.4k
      for (unsigned int k = 0; k < 4; k++)
365
81.4k
      {
366
81.4k
        uint8_t dttw = di.texel_weights_tr[k][texel];
367
81.4k
        float dttwf = di.texel_weight_contribs_float_tr[k][texel];
368
81.4k
        if (dttw == i && dttwf != 0.0f)
369
37.8k
        {
370
37.8k
          di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
371
37.8k
          break;
372
37.8k
        }
373
81.4k
      }
374
37.8k
    }
375
376
    // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
377
    // Match last texel in active lane in SIMD group, for better gathers
378
2.83k
    uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
379
20.2k
    for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
380
17.4k
    {
381
17.4k
      di.weight_texels_tr[j][i] = last_texel;
382
17.4k
      di.weights_texel_contribs_tr[j][i] = 0.0f;
383
17.4k
    }
384
2.83k
  }
385
386
  // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
387
96
  size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
388
96
  for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
389
0
  {
390
0
    di.texel_weight_count[i] = 0;
391
392
0
    for (size_t j = 0; j < 4; j++)
393
0
    {
394
0
      di.texel_weight_contribs_float_tr[j][i] = 0;
395
0
      di.texel_weights_tr[j][i] = 0;
396
0
      di.texel_weight_contribs_int_tr[j][i] = 0;
397
0
    }
398
0
  }
399
400
  // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
401
  // Match last texel in active lane in SIMD group, for better gathers
402
96
  unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
403
96
  uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
404
405
96
  size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
406
184
  for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
407
88
  {
408
88
    di.weight_texel_count[i] = 0;
409
410
2.79k
    for (size_t j = 0; j < max_texel_count_of_weight; j++)
411
2.70k
    {
412
2.70k
      di.weight_texels_tr[j][i] = last_texel;
413
2.70k
      di.weights_texel_contribs_tr[j][i] = 0.0f;
414
2.70k
    }
415
88
  }
416
417
96
  di.texel_count = static_cast<uint8_t>(texels_per_block);
418
96
  di.weight_count = static_cast<uint8_t>(weights_per_block);
419
96
  di.weight_x = static_cast<uint8_t>(x_weights);
420
96
  di.weight_y = static_cast<uint8_t>(y_weights);
421
96
  di.weight_z = 1;
422
96
}
423
424
/**
425
 * @brief Create a 3D decimation entry for a block-size and weight-decimation pair.
426
 *
427
 * @param      x_texels    The number of texels in the X dimension.
428
 * @param      y_texels    The number of texels in the Y dimension.
429
 * @param      z_texels    The number of texels in the Z dimension.
430
 * @param      x_weights   The number of weights in the X dimension.
431
 * @param      y_weights   The number of weights in the Y dimension.
432
 * @param      z_weights   The number of weights in the Z dimension.
433
 * @param[out] di          The decimation info structure to populate.
434
   @param[out] wb          The decimation table init scratch working buffers.
435
 */
436
static void init_decimation_info_3d(
437
  unsigned int x_texels,
438
  unsigned int y_texels,
439
  unsigned int z_texels,
440
  unsigned int x_weights,
441
  unsigned int y_weights,
442
  unsigned int z_weights,
443
  decimation_info& di,
444
  dt_init_working_buffers& wb
445
78
) {
446
78
  unsigned int texels_per_block = x_texels * y_texels * z_texels;
447
78
  unsigned int weights_per_block = x_weights * y_weights * z_weights;
448
449
78
  uint8_t max_texel_count_of_weight = 0;
450
451
78
  promise(weights_per_block > 0);
452
78
  promise(texels_per_block > 0);
453
454
3.03k
  for (unsigned int i = 0; i < weights_per_block; i++)
455
2.95k
  {
456
2.95k
    wb.texel_count_of_weight[i] = 0;
457
2.95k
  }
458
459
16.9k
  for (unsigned int i = 0; i < texels_per_block; i++)
460
16.8k
  {
461
16.8k
    wb.weight_count_of_texel[i] = 0;
462
16.8k
  }
463
464
546
  for (unsigned int z = 0; z < z_texels; z++)
465
468
  {
466
3.27k
    for (unsigned int y = 0; y < y_texels; y++)
467
2.80k
    {
468
19.6k
      for (unsigned int x = 0; x < x_texels; x++)
469
16.8k
      {
470
16.8k
        int texel = (z * y_texels + y) * x_texels + x;
471
472
16.8k
        int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
473
16.8k
        int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
474
16.8k
        int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
475
476
16.8k
        int x_weight_frac = x_weight & 0xF;
477
16.8k
        int y_weight_frac = y_weight & 0xF;
478
16.8k
        int z_weight_frac = z_weight & 0xF;
479
16.8k
        int x_weight_int = x_weight >> 4;
480
16.8k
        int y_weight_int = y_weight >> 4;
481
16.8k
        int z_weight_int = z_weight >> 4;
482
16.8k
        int qweight[4];
483
16.8k
        int weight[4];
484
16.8k
        qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
485
16.8k
        qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
486
487
        // simplex interpolation
488
16.8k
        int fs = x_weight_frac;
489
16.8k
        int ft = y_weight_frac;
490
16.8k
        int fp = z_weight_frac;
491
492
16.8k
        int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
493
16.8k
        int N = x_weights;
494
16.8k
        int NM = x_weights * y_weights;
495
496
16.8k
        int s1, s2, w0, w1, w2, w3;
497
16.8k
        switch (cas)
498
16.8k
        {
499
1.15k
        case 7:
500
1.15k
          s1 = 1;
501
1.15k
          s2 = N;
502
1.15k
          w0 = 16 - fs;
503
1.15k
          w1 = fs - ft;
504
1.15k
          w2 = ft - fp;
505
1.15k
          w3 = fp;
506
1.15k
          break;
507
2.10k
        case 3:
508
2.10k
          s1 = N;
509
2.10k
          s2 = 1;
510
2.10k
          w0 = 16 - ft;
511
2.10k
          w1 = ft - fs;
512
2.10k
          w2 = fs - fp;
513
2.10k
          w3 = fp;
514
2.10k
          break;
515
3.10k
        case 5:
516
3.10k
          s1 = 1;
517
3.10k
          s2 = NM;
518
3.10k
          w0 = 16 - fs;
519
3.10k
          w1 = fs - fp;
520
3.10k
          w2 = fp - ft;
521
3.10k
          w3 = ft;
522
3.10k
          break;
523
2.10k
        case 4:
524
2.10k
          s1 = NM;
525
2.10k
          s2 = 1;
526
2.10k
          w0 = 16 - fp;
527
2.10k
          w1 = fp - fs;
528
2.10k
          w2 = fs - ft;
529
2.10k
          w3 = ft;
530
2.10k
          break;
531
3.10k
        case 2:
532
3.10k
          s1 = N;
533
3.10k
          s2 = NM;
534
3.10k
          w0 = 16 - ft;
535
3.10k
          w1 = ft - fp;
536
3.10k
          w2 = fp - fs;
537
3.10k
          w3 = fs;
538
3.10k
          break;
539
5.28k
        case 0:
540
5.28k
          s1 = NM;
541
5.28k
          s2 = N;
542
5.28k
          w0 = 16 - fp;
543
5.28k
          w1 = fp - ft;
544
5.28k
          w2 = ft - fs;
545
5.28k
          w3 = fs;
546
5.28k
          break;
547
0
        default:
548
0
          s1 = NM;
549
0
          s2 = N;
550
0
          w0 = 16 - fp;
551
0
          w1 = fp - ft;
552
0
          w2 = ft - fs;
553
0
          w3 = fs;
554
0
          break;
555
16.8k
        }
556
557
16.8k
        qweight[1] = qweight[0] + s1;
558
16.8k
        qweight[2] = qweight[1] + s2;
559
16.8k
        weight[0] = w0;
560
16.8k
        weight[1] = w1;
561
16.8k
        weight[2] = w2;
562
16.8k
        weight[3] = w3;
563
564
84.2k
        for (unsigned int i = 0; i < 4; i++)
565
67.3k
        {
566
67.3k
          if (weight[i] != 0)
567
42.8k
          {
568
42.8k
            wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
569
42.8k
            wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
570
42.8k
            wb.weight_count_of_texel[texel]++;
571
42.8k
            wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
572
42.8k
            wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
573
42.8k
            wb.texel_count_of_weight[qweight[i]]++;
574
42.8k
            max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
575
42.8k
          }
576
67.3k
        }
577
16.8k
      }
578
2.80k
    }
579
468
  }
580
581
78
  uint8_t max_texel_weight_count = 0;
582
16.9k
  for (unsigned int i = 0; i < texels_per_block; i++)
583
16.8k
  {
584
16.8k
    di.texel_weight_count[i] = wb.weight_count_of_texel[i];
585
16.8k
    max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
586
587
    // Init all 4 entries so we can rely on zeros for vectorization
588
84.2k
    for (unsigned int j = 0; j < 4; j++)
589
67.3k
    {
590
67.3k
      di.texel_weight_contribs_int_tr[j][i] = 0;
591
67.3k
      di.texel_weight_contribs_float_tr[j][i] = 0.0f;
592
67.3k
      di.texel_weights_tr[j][i] = 0;
593
67.3k
    }
594
595
59.6k
    for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
596
42.8k
    {
597
42.8k
      di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
598
42.8k
      di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
599
42.8k
      di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
600
42.8k
    }
601
16.8k
  }
602
603
78
  di.max_texel_weight_count = max_texel_weight_count;
604
605
3.03k
  for (unsigned int i = 0; i < weights_per_block; i++)
606
2.95k
  {
607
2.95k
    unsigned int texel_count_wt = wb.texel_count_of_weight[i];
608
2.95k
    di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
609
610
45.7k
    for (unsigned int j = 0; j < texel_count_wt; j++)
611
42.8k
    {
612
42.8k
      unsigned int texel = wb.texels_of_weight[i][j];
613
614
      // Create transposed versions of these for better vectorization
615
42.8k
      di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
616
42.8k
      di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
617
618
      // Store the per-texel contribution of this weight for each texel it contributes to
619
42.8k
      di.texel_contrib_for_weight[j][i] = 0.0f;
620
80.2k
      for (unsigned int k = 0; k < 4; k++)
621
80.2k
      {
622
80.2k
        uint8_t dttw = di.texel_weights_tr[k][texel];
623
80.2k
        float dttwf = di.texel_weight_contribs_float_tr[k][texel];
624
80.2k
        if (dttw == i && dttwf != 0.0f)
625
42.8k
        {
626
42.8k
          di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
627
42.8k
          break;
628
42.8k
        }
629
80.2k
      }
630
42.8k
    }
631
632
    // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
633
    // Match last texel in active lane in SIMD group, for better gathers
634
2.95k
    uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
635
25.4k
    for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
636
22.5k
    {
637
22.5k
      di.weight_texels_tr[j][i] = last_texel;
638
22.5k
      di.weights_texel_contribs_tr[j][i] = 0.0f;
639
22.5k
    }
640
2.95k
  }
641
642
  // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
643
78
  size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
644
78
  for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
645
0
  {
646
0
    di.texel_weight_count[i] = 0;
647
648
0
    for (size_t j = 0; j < 4; j++)
649
0
    {
650
0
      di.texel_weight_contribs_float_tr[j][i] = 0;
651
0
      di.texel_weights_tr[j][i] = 0;
652
0
      di.texel_weight_contribs_int_tr[j][i] = 0;
653
0
    }
654
0
  }
655
656
  // Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
657
  // Match last texel in active lane in SIMD group, for better gathers
658
78
  int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
659
78
  uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
660
661
78
  size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
662
118
  for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
663
40
  {
664
40
    di.weight_texel_count[i] = 0;
665
666
1.15k
    for (size_t j = 0; j < max_texel_count_of_weight; j++)
667
1.11k
    {
668
1.11k
      di.weight_texels_tr[j][i] = last_texel;
669
1.11k
      di.weights_texel_contribs_tr[j][i] = 0.0f;
670
1.11k
    }
671
40
  }
672
673
78
  di.texel_count = static_cast<uint8_t>(texels_per_block);
674
78
  di.weight_count = static_cast<uint8_t>(weights_per_block);
675
78
  di.weight_x = static_cast<uint8_t>(x_weights);
676
78
  di.weight_y = static_cast<uint8_t>(y_weights);
677
78
  di.weight_z = static_cast<uint8_t>(z_weights);
678
78
}
679
680
/**
681
 * @brief Assign the texels to use for kmeans clustering.
682
 *
683
 * The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.
684
 * The @c bsd.texel_count is an input and must be populated beforehand.
685
 *
686
 * @param[in,out] bsd   The block size descriptor to populate.
687
 */
688
static void assign_kmeans_texels(
689
  block_size_descriptor& bsd
690
3
) {
691
  // Use all texels for kmeans on a small block
692
3
  if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
693
1
  {
694
17
    for (uint8_t i = 0; i < bsd.texel_count; i++)
695
16
    {
696
16
      bsd.kmeans_texels[i] = i;
697
16
    }
698
699
1
    return;
700
1
  }
701
702
  // Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
703
2
  uint64_t rng_state[2];
704
2
  astc::rand_init(rng_state);
705
706
  // Initialize array used for tracking used indices
707
2
  bool seen[BLOCK_MAX_TEXELS];
708
362
  for (uint8_t i = 0; i < bsd.texel_count; i++)
709
360
  {
710
360
    seen[i] = false;
711
360
  }
712
713
  // Assign 64 random indices, retrying if we see repeats
714
2
  unsigned int arr_elements_set = 0;
715
155
  while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
716
153
  {
717
153
    uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
718
153
    texel = texel % bsd.texel_count;
719
153
    if (!seen[texel])
720
128
    {
721
128
      bsd.kmeans_texels[arr_elements_set++] = texel;
722
128
      seen[texel] = true;
723
128
    }
724
153
  }
725
2
}
726
727
/**
728
 * @brief Allocate a single 2D decimation table entry.
729
 *
730
 * @param x_texels    The number of texels in the X dimension.
731
 * @param y_texels    The number of texels in the Y dimension.
732
 * @param x_weights   The number of weights in the X dimension.
733
 * @param y_weights   The number of weights in the Y dimension.
734
 * @param bsd         The block size descriptor we are populating.
735
 * @param wb          The decimation table init scratch working buffers.
736
 * @param index       The packed array index to populate.
737
 */
738
static void construct_dt_entry_2d(
739
  unsigned int x_texels,
740
  unsigned int y_texels,
741
  unsigned int x_weights,
742
  unsigned int y_weights,
743
  block_size_descriptor& bsd,
744
  dt_init_working_buffers& wb,
745
  unsigned int index
746
96
) {
747
96
  unsigned int weight_count = x_weights * y_weights;
748
96
  assert(weight_count <= BLOCK_MAX_WEIGHTS);
749
750
96
  bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
751
752
96
  decimation_info& di = bsd.decimation_tables[index];
753
96
  init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
754
755
96
  int maxprec_1plane = -1;
756
96
  int maxprec_2planes = -1;
757
1.24k
  for (int i = 0; i < 12; i++)
758
1.15k
  {
759
1.15k
    unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
760
1.15k
    if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
761
590
    {
762
590
      maxprec_1plane = i;
763
590
    }
764
765
1.15k
    if (try_2planes)
766
684
    {
767
684
      unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
768
684
      if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
769
330
      {
770
330
        maxprec_2planes = i;
771
330
      }
772
684
    }
773
1.15k
  }
774
775
  // At least one of the two should be valid ...
776
96
  assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
777
96
  bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
778
96
  bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
779
96
  bsd.decimation_modes[index].refprec_1plane = 0;
780
96
  bsd.decimation_modes[index].refprec_2planes = 0;
781
96
}
782
783
/**
784
 * @brief Allocate block modes and decimation tables for a single 2D block size.
785
 *
786
 * @param      x_texels         The number of texels in the X dimension.
787
 * @param      y_texels         The number of texels in the Y dimension.
788
 * @param      can_omit_modes   Can we discard modes that astcenc won't use, even if legal?
789
 * @param      mode_cutoff      Percentile cutoff in range [0,1]. Low values more likely to be used.
790
 * @param[out] bsd              The block size descriptor to populate.
791
 */
792
static void construct_block_size_descriptor_2d(
793
  unsigned int x_texels,
794
  unsigned int y_texels,
795
  bool can_omit_modes,
796
  float mode_cutoff,
797
  block_size_descriptor& bsd
798
2
) {
799
  // Store a remap table for storing packed decimation modes.
800
  // Indexing uses [Y * 16 + X] and max size for each axis is 12.
801
2
  static const unsigned int MAX_DMI = 12 * 16 + 12;
802
2
  int decimation_mode_index[MAX_DMI];
803
804
2
  dt_init_working_buffers* wb = new dt_init_working_buffers;
805
806
2
  bsd.xdim = static_cast<uint8_t>(x_texels);
807
2
  bsd.ydim = static_cast<uint8_t>(y_texels);
808
2
  bsd.zdim = 1;
809
2
  bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
810
811
410
  for (unsigned int i = 0; i < MAX_DMI; i++)
812
408
  {
813
408
    decimation_mode_index[i] = -1;
814
408
  }
815
816
  // Gather all the decimation grids that can be used with the current block
817
2
#if !defined(ASTCENC_DECOMPRESS_ONLY)
818
2
  const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
819
2
  float always_cutoff = 0.0f;
820
#else
821
  // Unused in decompress-only builds
822
  (void)can_omit_modes;
823
  (void)mode_cutoff;
824
#endif
825
826
  // Construct the list of block formats referencing the decimation tables
827
2
  unsigned int packed_bm_idx = 0;
828
2
  unsigned int packed_dm_idx = 0;
829
830
  // Trackers
831
2
  unsigned int bm_counts[4] { 0 };
832
2
  unsigned int dm_counts[4] { 0 };
833
834
  // Clear the list to a known-bad value
835
4.09k
  for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
836
4.09k
  {
837
4.09k
    bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
838
4.09k
  }
839
840
  // Iterate four times to build a usefully ordered list:
841
  //   - Pass 0 - keep selected single plane "always" block modes
842
  //   - Pass 1 - keep selected single plane "non-always" block modes
843
  //   - Pass 2 - keep select dual plane block modes
844
  //   - Pass 3 - keep everything else that's legal
845
2
  unsigned int limit = can_omit_modes ? 3 : 4;
846
10
  for (unsigned int j = 0; j < limit; j ++)
847
8
  {
848
16.3k
    for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
849
16.3k
    {
850
      // Skip modes we've already included in a previous pass
851
16.3k
      if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
852
1.51k
      {
853
1.51k
        continue;
854
1.51k
      }
855
856
      // Decode parameters
857
14.8k
      unsigned int x_weights;
858
14.8k
      unsigned int y_weights;
859
14.8k
      bool is_dual_plane;
860
14.8k
      unsigned int quant_mode;
861
14.8k
      unsigned int weight_bits;
862
14.8k
      bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
863
864
      // Always skip invalid encodings for the current block size
865
14.8k
      if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
866
12.7k
      {
867
12.7k
        continue;
868
12.7k
      }
869
870
      // Selectively skip dual plane encodings
871
2.16k
      if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
872
660
      {
873
660
        continue;
874
660
      }
875
876
      // Always skip encodings we can't physically encode based on
877
      // generic encoding bit availability
878
1.50k
      if (is_dual_plane)
879
330
      {
880
         // This is the only check we need as only support 1 partition
881
330
         if ((109 - weight_bits) <= 0)
882
0
         {
883
0
          continue;
884
0
         }
885
330
      }
886
1.17k
      else
887
1.17k
      {
888
        // This is conservative - fewer bits may be available for > 1 partition
889
1.17k
         if ((111 - weight_bits) <= 0)
890
0
         {
891
0
          continue;
892
0
         }
893
1.17k
      }
894
895
      // Selectively skip encodings based on percentile
896
1.50k
      bool percentile_hit = false;
897
1.50k
  #if !defined(ASTCENC_DECOMPRESS_ONLY)
898
1.50k
      if (j == 0)
899
590
      {
900
590
        percentile_hit = percentiles[i] <= always_cutoff;
901
590
      }
902
918
      else
903
918
      {
904
918
        percentile_hit = percentiles[i] <= mode_cutoff;
905
918
      }
906
1.50k
  #endif
907
908
1.50k
      if (j != 3 && !percentile_hit)
909
588
      {
910
588
        continue;
911
588
      }
912
913
      // Allocate and initialize the decimation table entry if we've not used it yet
914
920
      int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
915
920
      if (decimation_mode < 0)
916
96
      {
917
96
        construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
918
96
        decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
919
96
        decimation_mode = packed_dm_idx;
920
921
96
        dm_counts[j]++;
922
96
        packed_dm_idx++;
923
96
      }
924
925
920
      auto& bm = bsd.block_modes[packed_bm_idx];
926
927
920
      bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
928
920
      bm.quant_mode = static_cast<uint8_t>(quant_mode);
929
920
      bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
930
920
      bm.weight_bits = static_cast<uint8_t>(weight_bits);
931
920
      bm.mode_index = static_cast<uint16_t>(i);
932
933
920
      auto& dm = bsd.decimation_modes[decimation_mode];
934
935
920
      if (is_dual_plane)
936
330
      {
937
330
        dm.set_ref_2plane(bm.get_weight_quant_mode());
938
330
      }
939
590
      else
940
590
      {
941
590
        dm.set_ref_1plane(bm.get_weight_quant_mode());
942
590
      }
943
944
920
      bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
945
946
920
      packed_bm_idx++;
947
920
      bm_counts[j]++;
948
920
    }
949
8
  }
950
951
2
  bsd.block_mode_count_1plane_always = bm_counts[0];
952
2
  bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
953
2
  bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
954
2
  bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
955
956
2
  bsd.decimation_mode_count_always = dm_counts[0];
957
2
  bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
958
2
  bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
959
960
2
#if !defined(ASTCENC_DECOMPRESS_ONLY)
961
2
  assert(bsd.block_mode_count_1plane_always > 0);
962
2
  assert(bsd.decimation_mode_count_always > 0);
963
964
2
  delete[] percentiles;
965
2
#endif
966
967
  // Ensure the end of the array contains valid data (should never get read)
968
80
  for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
969
78
  {
970
78
    bsd.decimation_modes[i].maxprec_1plane = -1;
971
78
    bsd.decimation_modes[i].maxprec_2planes = -1;
972
78
    bsd.decimation_modes[i].refprec_1plane = 0;
973
78
    bsd.decimation_modes[i].refprec_2planes = 0;
974
78
  }
975
976
  // Determine the texels to use for kmeans clustering.
977
2
  assign_kmeans_texels(bsd);
978
979
2
  delete wb;
980
2
}
981
982
/**
983
 * @brief Allocate block modes and decimation tables for a single 3D block size.
984
 *
985
 * TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as
986
 * the percentile mode cutoffs. If 3D becomes more widely used we should look at this.
987
 *
988
 * @param      x_texels   The number of texels in the X dimension.
989
 * @param      y_texels   The number of texels in the Y dimension.
990
 * @param      z_texels   The number of texels in the Z dimension.
991
 * @param[out] bsd        The block size descriptor to populate.
992
 */
993
static void construct_block_size_descriptor_3d(
994
  unsigned int x_texels,
995
  unsigned int y_texels,
996
  unsigned int z_texels,
997
  block_size_descriptor& bsd
998
1
) {
999
  // Store a remap table for storing packed decimation modes.
1000
  // Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
1001
1
  static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
1002
1
  int decimation_mode_index[MAX_DMI];
1003
1
  unsigned int decimation_mode_count = 0;
1004
1005
1
  dt_init_working_buffers* wb = new dt_init_working_buffers;
1006
1007
1
  bsd.xdim = static_cast<uint8_t>(x_texels);
1008
1
  bsd.ydim = static_cast<uint8_t>(y_texels);
1009
1
  bsd.zdim = static_cast<uint8_t>(z_texels);
1010
1
  bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
1011
1012
439
  for (unsigned int i = 0; i < MAX_DMI; i++)
1013
438
  {
1014
438
    decimation_mode_index[i] = -1;
1015
438
  }
1016
1017
  // gather all the infill-modes that can be used with the current block size
1018
6
  for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
1019
5
  {
1020
30
    for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
1021
25
    {
1022
150
      for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
1023
125
      {
1024
125
        unsigned int weight_count = x_weights * y_weights * z_weights;
1025
125
        if (weight_count > BLOCK_MAX_WEIGHTS)
1026
47
        {
1027
47
          continue;
1028
47
        }
1029
1030
78
        decimation_info& di = bsd.decimation_tables[decimation_mode_count];
1031
78
        decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
1032
78
        init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
1033
1034
78
        int maxprec_1plane = -1;
1035
78
        int maxprec_2planes = -1;
1036
1.01k
        for (unsigned int i = 0; i < 12; i++)
1037
936
        {
1038
936
          unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
1039
936
          if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
1040
417
          {
1041
417
            maxprec_1plane = i;
1042
417
          }
1043
1044
936
          unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
1045
936
          if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
1046
154
          {
1047
154
            maxprec_2planes = i;
1048
154
          }
1049
936
        }
1050
1051
78
        if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
1052
46
        {
1053
46
          maxprec_2planes = -1;
1054
46
        }
1055
1056
78
        bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
1057
78
        bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
1058
78
        bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
1059
78
        bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
1060
78
        decimation_mode_count++;
1061
78
      }
1062
25
    }
1063
5
  }
1064
1065
  // Ensure the end of the array contains valid data (should never get read)
1066
10
  for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
1067
9
  {
1068
9
    bsd.decimation_modes[i].maxprec_1plane = -1;
1069
9
    bsd.decimation_modes[i].maxprec_2planes = -1;
1070
9
    bsd.decimation_modes[i].refprec_1plane = 0;
1071
9
    bsd.decimation_modes[i].refprec_2planes = 0;
1072
9
  }
1073
1074
1
  bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
1075
1
  bsd.decimation_mode_count_selected = decimation_mode_count;
1076
1
  bsd.decimation_mode_count_all = decimation_mode_count;
1077
1078
  // Construct the list of block formats referencing the decimation tables
1079
1080
  // Clear the list to a known-bad value
1081
2.04k
  for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1082
2.04k
  {
1083
2.04k
    bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
1084
2.04k
  }
1085
1086
1
  unsigned int packed_idx = 0;
1087
1
  unsigned int bm_counts[2] { 0 };
1088
1089
  // Iterate two times to build a usefully ordered list:
1090
  //   - Pass 0 - keep valid single plane block modes
1091
  //   - Pass 1 - keep valid dual plane block modes
1092
3
  for (unsigned int j = 0; j < 2; j++)
1093
2
  {
1094
4.09k
    for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
1095
4.09k
    {
1096
      // Skip modes we've already included in a previous pass
1097
4.09k
      if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
1098
435
      {
1099
435
        continue;
1100
435
      }
1101
1102
3.66k
      unsigned int x_weights;
1103
3.66k
      unsigned int y_weights;
1104
3.66k
      unsigned int z_weights;
1105
3.66k
      bool is_dual_plane;
1106
3.66k
      unsigned int quant_mode;
1107
3.66k
      unsigned int weight_bits;
1108
1109
3.66k
      bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
1110
      // Skip invalid encodings
1111
3.66k
      if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
1112
2.97k
      {
1113
2.97k
        continue;
1114
2.97k
      }
1115
1116
      // Skip encodings in the wrong iteration
1117
689
      if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
1118
127
      {
1119
127
        continue;
1120
127
      }
1121
1122
      // Always skip encodings we can't physically encode based on bit availability
1123
562
      if (is_dual_plane)
1124
127
      {
1125
         // This is the only check we need as only support 1 partition
1126
127
         if ((109 - weight_bits) <= 0)
1127
0
         {
1128
0
          continue;
1129
0
         }
1130
127
      }
1131
435
      else
1132
435
      {
1133
        // This is conservative - fewer bits may be available for > 1 partition
1134
435
         if ((111 - weight_bits) <= 0)
1135
0
         {
1136
0
          continue;
1137
0
         }
1138
435
      }
1139
1140
562
      int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
1141
562
      bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
1142
562
      bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
1143
562
      bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
1144
562
      bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
1145
562
      bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
1146
1147
562
      bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
1148
562
      bm_counts[j]++;
1149
562
      packed_idx++;
1150
562
    }
1151
2
  }
1152
1153
1
  bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
1154
1
  bsd.block_mode_count_1plane_selected = bm_counts[0];
1155
1
  bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
1156
1
  bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
1157
1158
  // Determine the texels to use for kmeans clustering.
1159
1
  assign_kmeans_texels(bsd);
1160
1161
1
  delete wb;
1162
1
}
1163
1164
/* See header for documentation. */
1165
void init_block_size_descriptor(
1166
  unsigned int x_texels,
1167
  unsigned int y_texels,
1168
  unsigned int z_texels,
1169
  bool can_omit_modes,
1170
  unsigned int partition_count_cutoff,
1171
  float mode_cutoff,
1172
  block_size_descriptor& bsd
1173
3
) {
1174
3
  if (z_texels > 1)
1175
1
  {
1176
1
    construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
1177
1
  }
1178
2
  else
1179
2
  {
1180
2
    construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
1181
2
  }
1182
1183
3
  init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
1184
3
}