Coverage Report

Created: 2024-09-08 06:14

/src/astc-encoder/Source/astcenc_partition_tables.cpp
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2023 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Functions for generating partition tables on demand.
20
 */
21
22
#include "astcenc_internal.h"
23
24
/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
25
8.15M
#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
26
27
/**
28
 * @brief Generate a canonical representation of a partition pattern.
29
 *
30
 * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
31
 * the remapped texel index. Remapping ensures that we only match on the partition pattern,
32
 * independent of the partition order generated by the hash.
33
 *
34
 * @param      texel_count          The number of texels in the block.
35
 * @param      partition_of_texel   The partition assignments, in hash order.
36
 * @param[out] bit_pattern          The output bit pattern representation.
37
 */
38
static void generate_canonical_partitioning(
39
  unsigned int texel_count,
40
  const uint8_t* partition_of_texel,
41
  uint64_t bit_pattern[BIT_PATTERN_WORDS]
42
9.80k
) {
43
  // Clear the pattern
44
78.4k
  for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
45
68.6k
  {
46
68.6k
    bit_pattern[i] = 0;
47
68.6k
  }
48
49
  // Store a mapping to reorder the raw partitions so that the partitions are ordered such
50
  // that the lowest texel index in partition N is smaller than the lowest texel index in
51
  // partition N + 1.
52
9.80k
  int mapped_index[BLOCK_MAX_PARTITIONS];
53
9.80k
  int map_weight_count = 0;
54
55
49.0k
  for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
56
39.2k
  {
57
39.2k
    mapped_index[i] = -1;
58
39.2k
  }
59
60
1.19M
  for (unsigned int i = 0; i < texel_count; i++)
61
1.18M
  {
62
1.18M
    int index = partition_of_texel[i];
63
1.18M
    if (mapped_index[index] < 0)
64
24.4k
    {
65
24.4k
      mapped_index[index] = map_weight_count++;
66
24.4k
    }
67
68
1.18M
    uint64_t xlat_index = mapped_index[index];
69
1.18M
    bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
70
1.18M
  }
71
9.80k
}
72
73
/**
74
 * @brief Compare two canonical patterns to see if they are the same.
75
 *
76
 * @param part1   The first canonical bit pattern to check.
77
 * @param part2   The second canonical bit pattern to check.
78
 *
79
 * @return @c true if the patterns are the same, @c false otherwise.
80
 */
81
static bool compare_canonical_partitionings(
82
  const uint64_t part1[BIT_PATTERN_WORDS],
83
  const uint64_t part2[BIT_PATTERN_WORDS]
84
4.03M
) {
85
4.03M
  return (part1[0] == part2[0])
86
4.03M
#if BIT_PATTERN_WORDS > 1
87
4.03M
      && (part1[1] == part2[1])
88
4.03M
#endif
89
4.03M
#if BIT_PATTERN_WORDS > 2
90
4.03M
      && (part1[2] == part2[2])
91
4.03M
#endif
92
4.03M
#if BIT_PATTERN_WORDS > 3
93
4.03M
      && (part1[3] == part2[3])
94
4.03M
#endif
95
4.03M
#if BIT_PATTERN_WORDS > 4
96
4.03M
      && (part1[4] == part2[4])
97
4.03M
#endif
98
4.03M
#if BIT_PATTERN_WORDS > 5
99
4.03M
      && (part1[5] == part2[5])
100
4.03M
#endif
101
4.03M
#if BIT_PATTERN_WORDS > 6
102
4.03M
      && (part1[6] == part2[6])
103
4.03M
#endif
104
4.03M
      ;
105
4.03M
}
106
107
/**
108
 * @brief Hash function used for procedural partition assignment.
109
 *
110
 * @param inp   The hash seed.
111
 *
112
 * @return The hashed value.
113
 */
114
static uint32_t hash52(
115
  uint32_t inp
116
1.55M
) {
117
1.55M
  inp ^= inp >> 15;
118
119
  // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
120
1.55M
  inp *= 0xEEDE0891;
121
1.55M
  inp ^= inp >> 5;
122
1.55M
  inp += inp << 16;
123
1.55M
  inp ^= inp >> 7;
124
1.55M
  inp ^= inp >> 3;
125
1.55M
  inp ^= inp << 6;
126
1.55M
  inp ^= inp >> 17;
127
1.55M
  return inp;
128
1.55M
}
129
130
/**
131
 * @brief Select texel assignment for a single coordinate.
132
 *
133
 * @param seed              The seed - the partition index from the block.
134
 * @param x                 The texel X coordinate in the block.
135
 * @param y                 The texel Y coordinate in the block.
136
 * @param z                 The texel Z coordinate in the block.
137
 * @param partition_count   The total partition count of this encoding.
138
 * @param small_block       @c true if the block has fewer than 32 texels.
139
 *
140
 * @return The assigned partition index for this texel.
141
 */
142
static uint8_t select_partition(
143
  int seed,
144
  int x,
145
  int y,
146
  int z,
147
  int partition_count,
148
  bool small_block
149
1.55M
) {
150
  // For small blocks bias the coordinates to get better distribution
151
1.55M
  if (small_block)
152
80.9k
  {
153
80.9k
    x <<= 1;
154
80.9k
    y <<= 1;
155
80.9k
    z <<= 1;
156
80.9k
  }
157
158
1.55M
  seed += (partition_count - 1) * 1024;
159
160
1.55M
  uint32_t rnum = hash52(seed);
161
162
1.55M
  uint8_t seed1 = rnum & 0xF;
163
1.55M
  uint8_t seed2 = (rnum >> 4) & 0xF;
164
1.55M
  uint8_t seed3 = (rnum >> 8) & 0xF;
165
1.55M
  uint8_t seed4 = (rnum >> 12) & 0xF;
166
1.55M
  uint8_t seed5 = (rnum >> 16) & 0xF;
167
1.55M
  uint8_t seed6 = (rnum >> 20) & 0xF;
168
1.55M
  uint8_t seed7 = (rnum >> 24) & 0xF;
169
1.55M
  uint8_t seed8 = (rnum >> 28) & 0xF;
170
1.55M
  uint8_t seed9 = (rnum >> 18) & 0xF;
171
1.55M
  uint8_t seed10 = (rnum >> 22) & 0xF;
172
1.55M
  uint8_t seed11 = (rnum >> 26) & 0xF;
173
1.55M
  uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
174
175
  // Squaring all the seeds in order to bias their distribution towards lower values.
176
1.55M
  seed1 *= seed1;
177
1.55M
  seed2 *= seed2;
178
1.55M
  seed3 *= seed3;
179
1.55M
  seed4 *= seed4;
180
1.55M
  seed5 *= seed5;
181
1.55M
  seed6 *= seed6;
182
1.55M
  seed7 *= seed7;
183
1.55M
  seed8 *= seed8;
184
1.55M
  seed9 *= seed9;
185
1.55M
  seed10 *= seed10;
186
1.55M
  seed11 *= seed11;
187
1.55M
  seed12 *= seed12;
188
189
1.55M
  int sh1, sh2;
190
1.55M
  if (seed & 1)
191
775k
  {
192
775k
    sh1 = (seed & 2 ? 4 : 5);
193
775k
    sh2 = (partition_count == 3 ? 6 : 5);
194
775k
  }
195
776k
  else
196
776k
  {
197
776k
    sh1 = (partition_count == 3 ? 6 : 5);
198
776k
    sh2 = (seed & 2 ? 4 : 5);
199
776k
  }
200
201
1.55M
  int sh3 = (seed & 0x10) ? sh1 : sh2;
202
203
1.55M
  seed1 >>= sh1;
204
1.55M
  seed2 >>= sh2;
205
1.55M
  seed3 >>= sh1;
206
1.55M
  seed4 >>= sh2;
207
1.55M
  seed5 >>= sh1;
208
1.55M
  seed6 >>= sh2;
209
1.55M
  seed7 >>= sh1;
210
1.55M
  seed8 >>= sh2;
211
212
1.55M
  seed9 >>= sh3;
213
1.55M
  seed10 >>= sh3;
214
1.55M
  seed11 >>= sh3;
215
1.55M
  seed12 >>= sh3;
216
217
1.55M
  int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
218
1.55M
  int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
219
1.55M
  int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
220
1.55M
  int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
221
222
  // Apply the saw
223
1.55M
  a &= 0x3F;
224
1.55M
  b &= 0x3F;
225
1.55M
  c &= 0x3F;
226
1.55M
  d &= 0x3F;
227
228
  // Remove some of the components if we are to output < 4 partitions.
229
1.55M
  if (partition_count <= 3)
230
1.00M
  {
231
1.00M
    d = 0;
232
1.00M
  }
233
234
1.55M
  if (partition_count <= 2)
235
454k
  {
236
454k
    c = 0;
237
454k
  }
238
239
1.55M
  if (partition_count <= 1)
240
376
  {
241
376
    b = 0;
242
376
  }
243
244
1.55M
  uint8_t partition;
245
1.55M
  if (a >= b && a >= c && a >= d)
246
599k
  {
247
599k
    partition = 0;
248
599k
  }
249
951k
  else if (b >= c && b >= d)
250
476k
  {
251
476k
    partition = 1;
252
476k
  }
253
475k
  else if (c >= d)
254
339k
  {
255
339k
    partition = 2;
256
339k
  }
257
135k
  else
258
135k
  {
259
135k
    partition = 3;
260
135k
  }
261
262
1.55M
  return partition;
263
1.55M
}
264
265
/**
266
 * @brief Generate a single partition info structure.
267
 *
268
 * @param[out] bsd                     The block size information.
269
 * @param      partition_count         The partition count of this partitioning.
270
 * @param      partition_index         The partition index / seed of this partitioning.
271
 * @param      partition_remap_index   The remapped partition index of this partitioning.
272
 * @param[out] pi                      The partition info structure to populate.
273
 *
274
 * @return True if this is a useful partition index, False if we can skip it.
275
 */
276
static bool generate_one_partition_info_entry(
277
  block_size_descriptor& bsd,
278
  unsigned int partition_count,
279
  unsigned int partition_index,
280
  unsigned int partition_remap_index,
281
  partition_info& pi
282
13.2k
) {
283
13.2k
  int texels_per_block = bsd.texel_count;
284
13.2k
  bool small_block = texels_per_block < 32;
285
286
13.2k
  uint8_t *partition_of_texel = pi.partition_of_texel;
287
288
  // Assign texels to partitions
289
13.2k
  int texel_idx = 0;
290
13.2k
  int counts[BLOCK_MAX_PARTITIONS] { 0 };
291
47.0k
  for (unsigned int z = 0; z < bsd.zdim; z++)
292
33.7k
  {
293
250k
    for (unsigned int y = 0; y <  bsd.ydim; y++)
294
216k
    {
295
1.76M
      for (unsigned int x = 0; x <  bsd.xdim; x++)
296
1.55M
      {
297
1.55M
        uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
298
1.55M
        pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
299
1.55M
        *partition_of_texel++ = part;
300
1.55M
      }
301
216k
    }
302
33.7k
  }
303
304
  // Fill loop tail so we can overfetch later
305
53.4k
  for (unsigned int i = 0; i < partition_count; i++)
306
40.2k
  {
307
40.2k
    int ptex_count = counts[i];
308
40.2k
    int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
309
78.4k
    for (int j = ptex_count; j < ptex_count_simd; j++)
310
38.2k
    {
311
38.2k
      pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
312
38.2k
    }
313
40.2k
  }
314
315
  // Populate the actual procedural partition count
316
13.2k
  if (counts[0] == 0)
317
2.40k
  {
318
2.40k
    pi.partition_count = 0;
319
2.40k
  }
320
10.8k
  else if (counts[1] == 0)
321
2.85k
  {
322
2.85k
    pi.partition_count = 1;
323
2.85k
  }
324
7.95k
  else if (counts[2] == 0)
325
4.26k
  {
326
4.26k
    pi.partition_count = 2;
327
4.26k
  }
328
3.69k
  else if (counts[3] == 0)
329
2.14k
  {
330
2.14k
    pi.partition_count = 3;
331
2.14k
  }
332
1.54k
  else
333
1.54k
  {
334
1.54k
    pi.partition_count = 4;
335
1.54k
  }
336
337
  // Populate the partition index
338
13.2k
  pi.partition_index = static_cast<uint16_t>(partition_index);
339
340
  // Populate the coverage bitmaps for 2/3/4 partitions
341
13.2k
  uint64_t* bitmaps { nullptr };
342
13.2k
  if (partition_count == 2)
343
3.99k
  {
344
3.99k
    bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
345
3.99k
  }
346
9.21k
  else if (partition_count == 3)
347
4.61k
  {
348
4.61k
    bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
349
4.61k
  }
350
4.60k
  else if (partition_count == 4)
351
4.59k
  {
352
4.59k
    bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
353
4.59k
  }
354
355
66.0k
  for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
356
52.8k
  {
357
52.8k
    pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
358
52.8k
  }
359
360
  // Valid partitionings have texels in all of the requested partitions
361
13.2k
  bool valid = pi.partition_count == partition_count;
362
363
13.2k
  if (bitmaps)
364
13.2k
  {
365
    // Populate the partition coverage bitmap
366
53.4k
    for (unsigned int i = 0; i < partition_count; i++)
367
40.2k
    {
368
40.2k
      bitmaps[i] = 0ULL;
369
40.2k
    }
370
371
13.2k
    unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
372
615k
    for (unsigned int i = 0; i < texels_to_process; i++)
373
602k
    {
374
602k
      unsigned int idx = bsd.kmeans_texels[i];
375
602k
      bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
376
602k
    }
377
13.2k
  }
378
379
13.2k
  return valid;
380
13.2k
}
381
382
static void build_partition_table_for_one_partition_count(
383
  block_size_descriptor& bsd,
384
  bool can_omit_partitionings,
385
  unsigned int partition_count_cutoff,
386
  unsigned int partition_count,
387
  partition_info* ptab,
388
  uint64_t* canonical_patterns
389
9
) {
390
9
  unsigned int next_index = 0;
391
9
  bsd.partitioning_count_selected[partition_count - 1] = 0;
392
9
  bsd.partitioning_count_all[partition_count - 1] = 0;
393
394
  // Skip tables larger than config max partition count if we can omit modes
395
9
  if (can_omit_partitionings && (partition_count > partition_count_cutoff))
396
0
  {
397
0
    return;
398
0
  }
399
400
  // Iterate through twice
401
  //   - Pass 0: Keep selected partitionings
402
  //   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
403
9
  unsigned int max_iter = can_omit_partitionings ? 1 : 2;
404
405
  // Tracker for things we built in the first iteration
406
9
  uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
407
27
  for (unsigned int x = 0; x < max_iter; x++)
408
18
  {
409
18.4k
    for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
410
18.4k
    {
411
      // Don't include things we built in the first pass
412
18.4k
      if ((x == 1) && build[i])
413
5.22k
      {
414
5.22k
        continue;
415
5.22k
      }
416
417
13.2k
      bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
418
13.2k
      if ((x == 0) && !keep_useful)
419
3.40k
      {
420
3.40k
        continue;
421
3.40k
      }
422
423
9.80k
      generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
424
9.80k
      bool keep_canonical = true;
425
4.04M
      for (unsigned int j = 0; j < next_index; j++)
426
4.03M
      {
427
4.03M
        bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
428
4.03M
        if (match)
429
2.55k
        {
430
2.55k
          keep_canonical = false;
431
2.55k
          break;
432
2.55k
        }
433
4.03M
      }
434
435
9.80k
      if (keep_useful && keep_canonical)
436
5.22k
      {
437
5.22k
        if (x == 0)
438
5.22k
        {
439
5.22k
          bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
440
5.22k
          bsd.partitioning_count_selected[partition_count - 1]++;
441
5.22k
          bsd.partitioning_count_all[partition_count - 1]++;
442
5.22k
          build[i] = 1;
443
5.22k
          next_index++;
444
5.22k
        }
445
5.22k
      }
446
4.58k
      else
447
4.58k
      {
448
4.58k
        if (x == 1)
449
3.99k
        {
450
3.99k
          bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
451
3.99k
          bsd.partitioning_count_all[partition_count - 1]++;
452
3.99k
          next_index++;
453
3.99k
        }
454
4.58k
      }
455
9.80k
    }
456
18
  }
457
9
}
458
459
/* See header for documentation. */
460
void init_partition_tables(
461
  block_size_descriptor& bsd,
462
  bool can_omit_partitionings,
463
  unsigned int partition_count_cutoff
464
3
) {
465
3
  partition_info* par_tab2 = bsd.partitionings;
466
3
  partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
467
3
  partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
468
3
  partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
469
470
3
  generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
471
3
  bsd.partitioning_count_selected[0] = 1;
472
3
  bsd.partitioning_count_all[0] = 1;
473
474
3
  uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
475
476
3
  build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
477
3
  build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
478
3
  build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
479
480
3
  delete[] canonical_patterns;
481
3
}