/src/astc-encoder/Source/astcenc_partition_tables.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // SPDX-License-Identifier: Apache-2.0 |
2 | | // ---------------------------------------------------------------------------- |
3 | | // Copyright 2011-2023 Arm Limited |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | | // use this file except in compliance with the License. You may obtain a copy |
7 | | // of the License at: |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | | // License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // ---------------------------------------------------------------------------- |
17 | | |
18 | | /** |
19 | | * @brief Functions for generating partition tables on demand. |
20 | | */ |
21 | | |
22 | | #include "astcenc_internal.h" |
23 | | |
24 | | /** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */ |
25 | 8.15M | #define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64) |
26 | | |
27 | | /** |
28 | | * @brief Generate a canonical representation of a partition pattern. |
29 | | * |
30 | | * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store |
31 | | * the remapped texel index. Remapping ensures that we only match on the partition pattern, |
32 | | * independent of the partition order generated by the hash. |
33 | | * |
34 | | * @param texel_count The number of texels in the block. |
35 | | * @param partition_of_texel The partition assignments, in hash order. |
36 | | * @param[out] bit_pattern The output bit pattern representation. |
37 | | */ |
38 | | static void generate_canonical_partitioning( |
39 | | unsigned int texel_count, |
40 | | const uint8_t* partition_of_texel, |
41 | | uint64_t bit_pattern[BIT_PATTERN_WORDS] |
42 | 9.80k | ) { |
43 | | // Clear the pattern |
44 | 78.4k | for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++) |
45 | 68.6k | { |
46 | 68.6k | bit_pattern[i] = 0; |
47 | 68.6k | } |
48 | | |
49 | | // Store a mapping to reorder the raw partitions so that the partitions are ordered such |
50 | | // that the lowest texel index in partition N is smaller than the lowest texel index in |
51 | | // partition N + 1. |
52 | 9.80k | int mapped_index[BLOCK_MAX_PARTITIONS]; |
53 | 9.80k | int map_weight_count = 0; |
54 | | |
55 | 49.0k | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) |
56 | 39.2k | { |
57 | 39.2k | mapped_index[i] = -1; |
58 | 39.2k | } |
59 | | |
60 | 1.19M | for (unsigned int i = 0; i < texel_count; i++) |
61 | 1.18M | { |
62 | 1.18M | int index = partition_of_texel[i]; |
63 | 1.18M | if (mapped_index[index] < 0) |
64 | 24.4k | { |
65 | 24.4k | mapped_index[index] = map_weight_count++; |
66 | 24.4k | } |
67 | | |
68 | 1.18M | uint64_t xlat_index = mapped_index[index]; |
69 | 1.18M | bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F)); |
70 | 1.18M | } |
71 | 9.80k | } |
72 | | |
73 | | /** |
74 | | * @brief Compare two canonical patterns to see if they are the same. |
75 | | * |
76 | | * @param part1 The first canonical bit pattern to check. |
77 | | * @param part2 The second canonical bit pattern to check. |
78 | | * |
79 | | * @return @c true if the patterns are the same, @c false otherwise. |
80 | | */ |
81 | | static bool compare_canonical_partitionings( |
82 | | const uint64_t part1[BIT_PATTERN_WORDS], |
83 | | const uint64_t part2[BIT_PATTERN_WORDS] |
84 | 4.03M | ) { |
85 | 4.03M | return (part1[0] == part2[0]) |
86 | 4.03M | #if BIT_PATTERN_WORDS > 1 |
87 | 4.03M | && (part1[1] == part2[1]) |
88 | 4.03M | #endif |
89 | 4.03M | #if BIT_PATTERN_WORDS > 2 |
90 | 4.03M | && (part1[2] == part2[2]) |
91 | 4.03M | #endif |
92 | 4.03M | #if BIT_PATTERN_WORDS > 3 |
93 | 4.03M | && (part1[3] == part2[3]) |
94 | 4.03M | #endif |
95 | 4.03M | #if BIT_PATTERN_WORDS > 4 |
96 | 4.03M | && (part1[4] == part2[4]) |
97 | 4.03M | #endif |
98 | 4.03M | #if BIT_PATTERN_WORDS > 5 |
99 | 4.03M | && (part1[5] == part2[5]) |
100 | 4.03M | #endif |
101 | 4.03M | #if BIT_PATTERN_WORDS > 6 |
102 | 4.03M | && (part1[6] == part2[6]) |
103 | 4.03M | #endif |
104 | 4.03M | ; |
105 | 4.03M | } |
106 | | |
107 | | /** |
108 | | * @brief Hash function used for procedural partition assignment. |
109 | | * |
110 | | * @param inp The hash seed. |
111 | | * |
112 | | * @return The hashed value. |
113 | | */ |
114 | | static uint32_t hash52( |
115 | | uint32_t inp |
116 | 1.55M | ) { |
117 | 1.55M | inp ^= inp >> 15; |
118 | | |
119 | | // (2^4 + 1) * (2^7 + 1) * (2^17 - 1) |
120 | 1.55M | inp *= 0xEEDE0891; |
121 | 1.55M | inp ^= inp >> 5; |
122 | 1.55M | inp += inp << 16; |
123 | 1.55M | inp ^= inp >> 7; |
124 | 1.55M | inp ^= inp >> 3; |
125 | 1.55M | inp ^= inp << 6; |
126 | 1.55M | inp ^= inp >> 17; |
127 | 1.55M | return inp; |
128 | 1.55M | } |
129 | | |
130 | | /** |
131 | | * @brief Select texel assignment for a single coordinate. |
132 | | * |
133 | | * @param seed The seed - the partition index from the block. |
134 | | * @param x The texel X coordinate in the block. |
135 | | * @param y The texel Y coordinate in the block. |
136 | | * @param z The texel Z coordinate in the block. |
137 | | * @param partition_count The total partition count of this encoding. |
138 | | * @param small_block @c true if the block has fewer than 32 texels. |
139 | | * |
140 | | * @return The assigned partition index for this texel. |
141 | | */ |
142 | | static uint8_t select_partition( |
143 | | int seed, |
144 | | int x, |
145 | | int y, |
146 | | int z, |
147 | | int partition_count, |
148 | | bool small_block |
149 | 1.55M | ) { |
150 | | // For small blocks bias the coordinates to get better distribution |
151 | 1.55M | if (small_block) |
152 | 80.9k | { |
153 | 80.9k | x <<= 1; |
154 | 80.9k | y <<= 1; |
155 | 80.9k | z <<= 1; |
156 | 80.9k | } |
157 | | |
158 | 1.55M | seed += (partition_count - 1) * 1024; |
159 | | |
160 | 1.55M | uint32_t rnum = hash52(seed); |
161 | | |
162 | 1.55M | uint8_t seed1 = rnum & 0xF; |
163 | 1.55M | uint8_t seed2 = (rnum >> 4) & 0xF; |
164 | 1.55M | uint8_t seed3 = (rnum >> 8) & 0xF; |
165 | 1.55M | uint8_t seed4 = (rnum >> 12) & 0xF; |
166 | 1.55M | uint8_t seed5 = (rnum >> 16) & 0xF; |
167 | 1.55M | uint8_t seed6 = (rnum >> 20) & 0xF; |
168 | 1.55M | uint8_t seed7 = (rnum >> 24) & 0xF; |
169 | 1.55M | uint8_t seed8 = (rnum >> 28) & 0xF; |
170 | 1.55M | uint8_t seed9 = (rnum >> 18) & 0xF; |
171 | 1.55M | uint8_t seed10 = (rnum >> 22) & 0xF; |
172 | 1.55M | uint8_t seed11 = (rnum >> 26) & 0xF; |
173 | 1.55M | uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; |
174 | | |
175 | | // Squaring all the seeds in order to bias their distribution towards lower values. |
176 | 1.55M | seed1 *= seed1; |
177 | 1.55M | seed2 *= seed2; |
178 | 1.55M | seed3 *= seed3; |
179 | 1.55M | seed4 *= seed4; |
180 | 1.55M | seed5 *= seed5; |
181 | 1.55M | seed6 *= seed6; |
182 | 1.55M | seed7 *= seed7; |
183 | 1.55M | seed8 *= seed8; |
184 | 1.55M | seed9 *= seed9; |
185 | 1.55M | seed10 *= seed10; |
186 | 1.55M | seed11 *= seed11; |
187 | 1.55M | seed12 *= seed12; |
188 | | |
189 | 1.55M | int sh1, sh2; |
190 | 1.55M | if (seed & 1) |
191 | 775k | { |
192 | 775k | sh1 = (seed & 2 ? 4 : 5); |
193 | 775k | sh2 = (partition_count == 3 ? 6 : 5); |
194 | 775k | } |
195 | 776k | else |
196 | 776k | { |
197 | 776k | sh1 = (partition_count == 3 ? 6 : 5); |
198 | 776k | sh2 = (seed & 2 ? 4 : 5); |
199 | 776k | } |
200 | | |
201 | 1.55M | int sh3 = (seed & 0x10) ? sh1 : sh2; |
202 | | |
203 | 1.55M | seed1 >>= sh1; |
204 | 1.55M | seed2 >>= sh2; |
205 | 1.55M | seed3 >>= sh1; |
206 | 1.55M | seed4 >>= sh2; |
207 | 1.55M | seed5 >>= sh1; |
208 | 1.55M | seed6 >>= sh2; |
209 | 1.55M | seed7 >>= sh1; |
210 | 1.55M | seed8 >>= sh2; |
211 | | |
212 | 1.55M | seed9 >>= sh3; |
213 | 1.55M | seed10 >>= sh3; |
214 | 1.55M | seed11 >>= sh3; |
215 | 1.55M | seed12 >>= sh3; |
216 | | |
217 | 1.55M | int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
218 | 1.55M | int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
219 | 1.55M | int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
220 | 1.55M | int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
221 | | |
222 | | // Apply the saw |
223 | 1.55M | a &= 0x3F; |
224 | 1.55M | b &= 0x3F; |
225 | 1.55M | c &= 0x3F; |
226 | 1.55M | d &= 0x3F; |
227 | | |
228 | | // Remove some of the components if we are to output < 4 partitions. |
229 | 1.55M | if (partition_count <= 3) |
230 | 1.00M | { |
231 | 1.00M | d = 0; |
232 | 1.00M | } |
233 | | |
234 | 1.55M | if (partition_count <= 2) |
235 | 454k | { |
236 | 454k | c = 0; |
237 | 454k | } |
238 | | |
239 | 1.55M | if (partition_count <= 1) |
240 | 376 | { |
241 | 376 | b = 0; |
242 | 376 | } |
243 | | |
244 | 1.55M | uint8_t partition; |
245 | 1.55M | if (a >= b && a >= c && a >= d) |
246 | 599k | { |
247 | 599k | partition = 0; |
248 | 599k | } |
249 | 951k | else if (b >= c && b >= d) |
250 | 476k | { |
251 | 476k | partition = 1; |
252 | 476k | } |
253 | 475k | else if (c >= d) |
254 | 339k | { |
255 | 339k | partition = 2; |
256 | 339k | } |
257 | 135k | else |
258 | 135k | { |
259 | 135k | partition = 3; |
260 | 135k | } |
261 | | |
262 | 1.55M | return partition; |
263 | 1.55M | } |
264 | | |
265 | | /** |
266 | | * @brief Generate a single partition info structure. |
267 | | * |
268 | | * @param[out] bsd The block size information. |
269 | | * @param partition_count The partition count of this partitioning. |
270 | | * @param partition_index The partition index / seed of this partitioning. |
271 | | * @param partition_remap_index The remapped partition index of this partitioning. |
272 | | * @param[out] pi The partition info structure to populate. |
273 | | * |
274 | | * @return True if this is a useful partition index, False if we can skip it. |
275 | | */ |
276 | | static bool generate_one_partition_info_entry( |
277 | | block_size_descriptor& bsd, |
278 | | unsigned int partition_count, |
279 | | unsigned int partition_index, |
280 | | unsigned int partition_remap_index, |
281 | | partition_info& pi |
282 | 13.2k | ) { |
283 | 13.2k | int texels_per_block = bsd.texel_count; |
284 | 13.2k | bool small_block = texels_per_block < 32; |
285 | | |
286 | 13.2k | uint8_t *partition_of_texel = pi.partition_of_texel; |
287 | | |
288 | | // Assign texels to partitions |
289 | 13.2k | int texel_idx = 0; |
290 | 13.2k | int counts[BLOCK_MAX_PARTITIONS] { 0 }; |
291 | 47.0k | for (unsigned int z = 0; z < bsd.zdim; z++) |
292 | 33.7k | { |
293 | 250k | for (unsigned int y = 0; y < bsd.ydim; y++) |
294 | 216k | { |
295 | 1.76M | for (unsigned int x = 0; x < bsd.xdim; x++) |
296 | 1.55M | { |
297 | 1.55M | uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); |
298 | 1.55M | pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++); |
299 | 1.55M | *partition_of_texel++ = part; |
300 | 1.55M | } |
301 | 216k | } |
302 | 33.7k | } |
303 | | |
304 | | // Fill loop tail so we can overfetch later |
305 | 53.4k | for (unsigned int i = 0; i < partition_count; i++) |
306 | 40.2k | { |
307 | 40.2k | int ptex_count = counts[i]; |
308 | 40.2k | int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); |
309 | 78.4k | for (int j = ptex_count; j < ptex_count_simd; j++) |
310 | 38.2k | { |
311 | 38.2k | pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1]; |
312 | 38.2k | } |
313 | 40.2k | } |
314 | | |
315 | | // Populate the actual procedural partition count |
316 | 13.2k | if (counts[0] == 0) |
317 | 2.40k | { |
318 | 2.40k | pi.partition_count = 0; |
319 | 2.40k | } |
320 | 10.8k | else if (counts[1] == 0) |
321 | 2.85k | { |
322 | 2.85k | pi.partition_count = 1; |
323 | 2.85k | } |
324 | 7.95k | else if (counts[2] == 0) |
325 | 4.26k | { |
326 | 4.26k | pi.partition_count = 2; |
327 | 4.26k | } |
328 | 3.69k | else if (counts[3] == 0) |
329 | 2.14k | { |
330 | 2.14k | pi.partition_count = 3; |
331 | 2.14k | } |
332 | 1.54k | else |
333 | 1.54k | { |
334 | 1.54k | pi.partition_count = 4; |
335 | 1.54k | } |
336 | | |
337 | | // Populate the partition index |
338 | 13.2k | pi.partition_index = static_cast<uint16_t>(partition_index); |
339 | | |
340 | | // Populate the coverage bitmaps for 2/3/4 partitions |
341 | 13.2k | uint64_t* bitmaps { nullptr }; |
342 | 13.2k | if (partition_count == 2) |
343 | 3.99k | { |
344 | 3.99k | bitmaps = bsd.coverage_bitmaps_2[partition_remap_index]; |
345 | 3.99k | } |
346 | 9.21k | else if (partition_count == 3) |
347 | 4.61k | { |
348 | 4.61k | bitmaps = bsd.coverage_bitmaps_3[partition_remap_index]; |
349 | 4.61k | } |
350 | 4.60k | else if (partition_count == 4) |
351 | 4.59k | { |
352 | 4.59k | bitmaps = bsd.coverage_bitmaps_4[partition_remap_index]; |
353 | 4.59k | } |
354 | | |
355 | 66.0k | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) |
356 | 52.8k | { |
357 | 52.8k | pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]); |
358 | 52.8k | } |
359 | | |
360 | | // Valid partitionings have texels in all of the requested partitions |
361 | 13.2k | bool valid = pi.partition_count == partition_count; |
362 | | |
363 | 13.2k | if (bitmaps) |
364 | 13.2k | { |
365 | | // Populate the partition coverage bitmap |
366 | 53.4k | for (unsigned int i = 0; i < partition_count; i++) |
367 | 40.2k | { |
368 | 40.2k | bitmaps[i] = 0ULL; |
369 | 40.2k | } |
370 | | |
371 | 13.2k | unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); |
372 | 615k | for (unsigned int i = 0; i < texels_to_process; i++) |
373 | 602k | { |
374 | 602k | unsigned int idx = bsd.kmeans_texels[i]; |
375 | 602k | bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; |
376 | 602k | } |
377 | 13.2k | } |
378 | | |
379 | 13.2k | return valid; |
380 | 13.2k | } |
381 | | |
382 | | static void build_partition_table_for_one_partition_count( |
383 | | block_size_descriptor& bsd, |
384 | | bool can_omit_partitionings, |
385 | | unsigned int partition_count_cutoff, |
386 | | unsigned int partition_count, |
387 | | partition_info* ptab, |
388 | | uint64_t* canonical_patterns |
389 | 9 | ) { |
390 | 9 | unsigned int next_index = 0; |
391 | 9 | bsd.partitioning_count_selected[partition_count - 1] = 0; |
392 | 9 | bsd.partitioning_count_all[partition_count - 1] = 0; |
393 | | |
394 | | // Skip tables larger than config max partition count if we can omit modes |
395 | 9 | if (can_omit_partitionings && (partition_count > partition_count_cutoff)) |
396 | 0 | { |
397 | 0 | return; |
398 | 0 | } |
399 | | |
400 | | // Iterate through twice |
401 | | // - Pass 0: Keep selected partitionings |
402 | | // - Pass 1: Keep non-selected partitionings (skip if in omit mode) |
403 | 9 | unsigned int max_iter = can_omit_partitionings ? 1 : 2; |
404 | | |
405 | | // Tracker for things we built in the first iteration |
406 | 9 | uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 }; |
407 | 27 | for (unsigned int x = 0; x < max_iter; x++) |
408 | 18 | { |
409 | 18.4k | for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) |
410 | 18.4k | { |
411 | | // Don't include things we built in the first pass |
412 | 18.4k | if ((x == 1) && build[i]) |
413 | 5.22k | { |
414 | 5.22k | continue; |
415 | 5.22k | } |
416 | | |
417 | 13.2k | bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]); |
418 | 13.2k | if ((x == 0) && !keep_useful) |
419 | 3.40k | { |
420 | 3.40k | continue; |
421 | 3.40k | } |
422 | | |
423 | 9.80k | generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS); |
424 | 9.80k | bool keep_canonical = true; |
425 | 4.04M | for (unsigned int j = 0; j < next_index; j++) |
426 | 4.03M | { |
427 | 4.03M | bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS); |
428 | 4.03M | if (match) |
429 | 2.55k | { |
430 | 2.55k | keep_canonical = false; |
431 | 2.55k | break; |
432 | 2.55k | } |
433 | 4.03M | } |
434 | | |
435 | 9.80k | if (keep_useful && keep_canonical) |
436 | 5.22k | { |
437 | 5.22k | if (x == 0) |
438 | 5.22k | { |
439 | 5.22k | bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); |
440 | 5.22k | bsd.partitioning_count_selected[partition_count - 1]++; |
441 | 5.22k | bsd.partitioning_count_all[partition_count - 1]++; |
442 | 5.22k | build[i] = 1; |
443 | 5.22k | next_index++; |
444 | 5.22k | } |
445 | 5.22k | } |
446 | 4.58k | else |
447 | 4.58k | { |
448 | 4.58k | if (x == 1) |
449 | 3.99k | { |
450 | 3.99k | bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); |
451 | 3.99k | bsd.partitioning_count_all[partition_count - 1]++; |
452 | 3.99k | next_index++; |
453 | 3.99k | } |
454 | 4.58k | } |
455 | 9.80k | } |
456 | 18 | } |
457 | 9 | } |
458 | | |
459 | | /* See header for documentation. */ |
460 | | void init_partition_tables( |
461 | | block_size_descriptor& bsd, |
462 | | bool can_omit_partitionings, |
463 | | unsigned int partition_count_cutoff |
464 | 3 | ) { |
465 | 3 | partition_info* par_tab2 = bsd.partitionings; |
466 | 3 | partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; |
467 | 3 | partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS; |
468 | 3 | partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS; |
469 | | |
470 | 3 | generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1); |
471 | 3 | bsd.partitioning_count_selected[0] = 1; |
472 | 3 | bsd.partitioning_count_all[0] = 1; |
473 | | |
474 | 3 | uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS]; |
475 | | |
476 | 3 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns); |
477 | 3 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns); |
478 | 3 | build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns); |
479 | | |
480 | 3 | delete[] canonical_patterns; |
481 | 3 | } |