Coverage Report

Created: 2026-02-26 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/hash_motion.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdbool.h>
14
15
#include "config/av1_rtcd.h"
16
17
#include "av1/encoder/block.h"
18
#include "av1/encoder/hash.h"
19
#include "av1/encoder/hash_motion.h"
20
21
0
#define kSrcBits 16
22
// kMaxAddr is the number of hash table buckets in p_hash_table->p_lookup_table.
23
// p_hash_table->p_lookup_table consists of 6 hash tables of 1 << kSrcBits
24
// buckets each. Each of the 6 supported block sizes (4, 8, 16, 32, 64, 128) has
25
// its own hash table, indexed by the return value of
26
// hash_block_size_to_index().
27
0
#define kMaxAddr (6 << kSrcBits)
28
0
#define kMaxCandidatesPerHashBucket 256
29
30
static void get_pixels_in_1D_char_array_by_block_2x2(const uint8_t *y_src,
31
                                                     int stride,
32
0
                                                     uint8_t *p_pixels_in1D) {
33
0
  const uint8_t *p_pel = y_src;
34
0
  int index = 0;
35
0
  for (int i = 0; i < 2; i++) {
36
0
    for (int j = 0; j < 2; j++) {
37
0
      p_pixels_in1D[index++] = p_pel[j];
38
0
    }
39
0
    p_pel += stride;
40
0
  }
41
0
}
42
43
static void get_pixels_in_1D_short_array_by_block_2x2(const uint16_t *y_src,
44
                                                      int stride,
45
0
                                                      uint16_t *p_pixels_in1D) {
46
0
  const uint16_t *p_pel = y_src;
47
0
  int index = 0;
48
0
  for (int i = 0; i < 2; i++) {
49
0
    for (int j = 0; j < 2; j++) {
50
0
      p_pixels_in1D[index++] = p_pel[j];
51
0
    }
52
0
    p_pel += stride;
53
0
  }
54
0
}
55
56
// the hash value (hash_value1) consists of two parts, the first 3 bits relate
57
// to the block size and the remaining 16 bits are the crc values. This
58
// function is used to get the first 3 bits.
59
0
static int hash_block_size_to_index(int block_size) {
60
0
  switch (block_size) {
61
0
    case 4: return 0;
62
0
    case 8: return 1;
63
0
    case 16: return 2;
64
0
    case 32: return 3;
65
0
    case 64: return 4;
66
0
    case 128: return 5;
67
0
    default: return -1;
68
0
  }
69
0
}
70
71
static uint32_t get_identity_hash_value(const uint8_t a, const uint8_t b,
72
0
                                        const uint8_t c, const uint8_t d) {
73
  // The four input values add up to 32 bits, which is the size of the output.
74
  // Just pack those values as is.
75
0
  return ((uint32_t)a << 24) + ((uint32_t)b << 16) + ((uint32_t)c << 8) +
76
0
         ((uint32_t)d);
77
0
}
78
79
static uint32_t get_xor_hash_value_hbd(const uint16_t a, const uint16_t b,
80
0
                                       const uint16_t c, const uint16_t d) {
81
0
  uint32_t result;
82
  // Pack the lower 8 bits of each input value to the 32 bit output, then xor
83
  // with the upper 8 bits of each input value.
84
0
  result = ((uint32_t)(a & 0x00ff) << 24) + ((uint32_t)(b & 0x00ff) << 16) +
85
0
           ((uint32_t)(c & 0x00ff) << 8) + ((uint32_t)(d & 0x00ff));
86
0
  result ^= ((uint32_t)(a & 0xff00) << 16) + ((uint32_t)(b & 0xff00) << 8) +
87
0
            ((uint32_t)(c & 0xff00)) + ((uint32_t)(d & 0xff00) >> 8);
88
0
  return result;
89
0
}
90
91
0
void av1_hash_table_init(IntraBCHashInfo *intrabc_hash_info) {
92
0
  if (!intrabc_hash_info->crc_initialized) {
93
0
    av1_crc32c_calculator_init(&intrabc_hash_info->crc_calculator);
94
0
    intrabc_hash_info->crc_initialized = 1;
95
0
  }
96
0
  intrabc_hash_info->intrabc_hash_table.p_lookup_table = NULL;
97
0
}
98
99
93.5k
static void clear_all(hash_table *p_hash_table) {
100
93.5k
  if (p_hash_table->p_lookup_table == NULL) {
101
93.5k
    return;
102
93.5k
  }
103
0
  for (int i = 0; i < kMaxAddr; i++) {
104
0
    if (p_hash_table->p_lookup_table[i] != NULL) {
105
0
      aom_vector_destroy(p_hash_table->p_lookup_table[i]);
106
0
      aom_free(p_hash_table->p_lookup_table[i]);
107
0
      p_hash_table->p_lookup_table[i] = NULL;
108
0
    }
109
0
  }
110
0
}
111
112
93.5k
void av1_hash_table_destroy(hash_table *p_hash_table) {
113
93.5k
  clear_all(p_hash_table);
114
93.5k
  aom_free(p_hash_table->p_lookup_table);
115
93.5k
  p_hash_table->p_lookup_table = NULL;
116
93.5k
}
117
118
0
bool av1_hash_table_create(hash_table *p_hash_table) {
119
0
  if (p_hash_table->p_lookup_table != NULL) {
120
0
    clear_all(p_hash_table);
121
0
    return true;
122
0
  }
123
0
  p_hash_table->p_lookup_table =
124
0
      (Vector **)aom_calloc(kMaxAddr, sizeof(p_hash_table->p_lookup_table[0]));
125
0
  if (!p_hash_table->p_lookup_table) return false;
126
0
  return true;
127
0
}
128
129
static bool hash_table_add_to_table(hash_table *p_hash_table,
130
                                    uint32_t hash_value,
131
0
                                    const block_hash *curr_block_hash) {
132
0
  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
133
0
    p_hash_table->p_lookup_table[hash_value] =
134
0
        aom_malloc(sizeof(*p_hash_table->p_lookup_table[hash_value]));
135
0
    if (p_hash_table->p_lookup_table[hash_value] == NULL) {
136
0
      return false;
137
0
    }
138
0
    if (aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
139
0
                         sizeof(*curr_block_hash)) == VECTOR_ERROR)
140
0
      return false;
141
0
  }
142
  // Place an upper bound each hash table bucket to up to 256 intrabc
143
  // block candidates, and ignore subsequent ones. Considering more can
144
  // unnecessarily slow down encoding for virtually no efficiency gain.
145
0
  if (aom_vector_byte_size(p_hash_table->p_lookup_table[hash_value]) <
146
0
      kMaxCandidatesPerHashBucket * sizeof(*curr_block_hash)) {
147
0
    if (aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
148
0
                             (void *)curr_block_hash) == VECTOR_ERROR)
149
0
      return false;
150
0
  }
151
0
  return true;
152
0
}
153
154
int32_t av1_hash_table_count(const hash_table *p_hash_table,
155
0
                             uint32_t hash_value) {
156
0
  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
157
0
    return 0;
158
0
  } else {
159
0
    return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
160
0
  }
161
0
}
162
163
Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
164
0
                                     uint32_t hash_value) {
165
0
  assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
166
0
  return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
167
0
}
168
169
void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
170
0
                                       uint32_t *pic_block_hash) {
171
0
  const int width = 2;
172
0
  const int height = 2;
173
0
  const int x_end = picture->y_crop_width - width + 1;
174
0
  const int y_end = picture->y_crop_height - height + 1;
175
176
0
  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
177
0
    uint16_t p[4];
178
0
    int pos = 0;
179
0
    for (int y_pos = 0; y_pos < y_end; y_pos++) {
180
0
      for (int x_pos = 0; x_pos < x_end; x_pos++) {
181
0
        get_pixels_in_1D_short_array_by_block_2x2(
182
0
            CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
183
0
                x_pos,
184
0
            picture->y_stride, p);
185
        // For HBD, we either have 40 or 48 bits of input data that the xor hash
186
        // reduce to 32 bits. We intentionally don't want to "discard" bits to
187
        // avoid any kind of biasing.
188
0
        pic_block_hash[pos] = get_xor_hash_value_hbd(p[0], p[1], p[2], p[3]);
189
0
        pos++;
190
0
      }
191
0
      pos += width - 1;
192
0
    }
193
0
  } else {
194
0
    uint8_t p[4];
195
0
    int pos = 0;
196
0
    for (int y_pos = 0; y_pos < y_end; y_pos++) {
197
0
      for (int x_pos = 0; x_pos < x_end; x_pos++) {
198
0
        get_pixels_in_1D_char_array_by_block_2x2(
199
0
            picture->y_buffer + y_pos * picture->y_stride + x_pos,
200
0
            picture->y_stride, p);
201
        // This 2x2 hash isn't used directly as a "key" for the hash table, so
202
        // we can afford to just copy the 4 8-bit pixel values as a single
203
        // 32-bit value directly. (i.e. there are no concerns of a lack of
204
        // uniform distribution)
205
0
        pic_block_hash[pos] = get_identity_hash_value(p[0], p[1], p[2], p[3]);
206
0
        pos++;
207
0
      }
208
0
      pos += width - 1;
209
0
    }
210
0
  }
211
0
}
212
213
void av1_generate_block_hash_value(IntraBCHashInfo *intrabc_hash_info,
214
                                   const YV12_BUFFER_CONFIG *picture,
215
                                   int block_size,
216
                                   const uint32_t *src_pic_block_hash,
217
0
                                   uint32_t *dst_pic_block_hash) {
218
0
  CRC32C *calc = &intrabc_hash_info->crc_calculator;
219
220
0
  const int pic_width = picture->y_crop_width;
221
0
  const int x_end = picture->y_crop_width - block_size + 1;
222
0
  const int y_end = picture->y_crop_height - block_size + 1;
223
0
  const int src_size = block_size >> 1;
224
225
0
  uint32_t p[4];
226
0
  const int length = sizeof(p);
227
228
0
  int pos = 0;
229
0
  for (int y_pos = 0; y_pos < y_end; y_pos++) {
230
0
    for (int x_pos = 0; x_pos < x_end; x_pos++) {
231
      // Build up a bigger block from 4 smaller, non-overlapping source block
232
      // hashes, and compute its hash. Note: source blocks at the right and
233
      // bottom borders cannot be part of larger blocks, therefore they won't be
234
      // considered into the block hash value generation process.
235
0
      p[0] = src_pic_block_hash[pos];
236
0
      p[1] = src_pic_block_hash[pos + src_size];
237
0
      p[2] = src_pic_block_hash[pos + src_size * pic_width];
238
0
      p[3] = src_pic_block_hash[pos + src_size * pic_width + src_size];
239
      // TODO: bug aomedia:433531610 - serialize input values in a way that's
240
      // independent of the computer architecture's endianness
241
0
      dst_pic_block_hash[pos] =
242
0
          av1_get_crc32c_value(calc, (uint8_t *)p, length);
243
0
      pos++;
244
0
    }
245
0
    pos += block_size - 1;
246
0
  }
247
0
}
248
249
bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
250
                                                 const uint32_t *pic_hash,
251
                                                 int pic_width, int pic_height,
252
0
                                                 int block_size) {
253
0
  const int x_end = pic_width - block_size + 1;
254
0
  const int y_end = pic_height - block_size + 1;
255
256
0
  int add_value = hash_block_size_to_index(block_size);
257
0
  assert(add_value >= 0);
258
0
  add_value <<= kSrcBits;
259
0
  const int crc_mask = (1 << kSrcBits) - 1;
260
0
  int step = block_size;
261
0
  int x_offset = 0;
262
0
  int y_offset = 0;
263
264
  // Explore the entire frame hierarchically to add intrabc candidate blocks to
265
  // the hash table, by starting with coarser steps (the block size), towards
266
  // finer-grained steps until every candidate block has been considered.
267
  // The nested for loop goes through the pic_hash array column by column.
268
269
  // Doing a hierarchical block exploration helps maximize spatial dispersion
270
  // of the first and foremost candidate blocks while minimizing overlap between
271
  // them. This is helpful because we only keep up to 256 entries of the
272
  // same candidate block (located in different places), so we want those
273
  // entries to cover the biggest area of the image to encode to maximize coding
274
  // efficiency.
275
276
  // This is the coordinate exploration order example for an 8x8 region, with
277
  // block_size = 4. The top-left corner (x, y) coordinates of each candidate
278
  // block are shown below. There are 5 * 5 (25) candidate blocks.
279
  //    x  0  1  2  3  4  5  6  7
280
  //  y +------------------------
281
  //  0 |  1 10  5 13  3
282
  //  1 | 16 22 18 24 20
283
  //  2 |  7 11  9 14  8
284
  //  3 | 17 23 19 25 21
285
  //  4 |  2 12  6 15  4--------+
286
  //  5 |              | 4 x 4  |
287
  //  6 |              | block  |
288
  //  7 |              +--------+
289
290
  // Please note that due to the way block exploration works, the smallest step
291
  // used is 2 (i.e. no two adjacent blocks will be explored consecutively).
292
  // Also, the exploration is designed to visit each block candidate only once.
293
0
  while (step > 1) {
294
0
    for (int x_pos = x_offset; x_pos < x_end; x_pos += step) {
295
0
      for (int y_pos = y_offset; y_pos < y_end; y_pos += step) {
296
0
        const int pos = y_pos * pic_width + x_pos;
297
0
        block_hash curr_block_hash;
298
299
0
        curr_block_hash.x = x_pos;
300
0
        curr_block_hash.y = y_pos;
301
302
0
        const uint32_t hash_value1 = (pic_hash[pos] & crc_mask) + add_value;
303
0
        curr_block_hash.hash_value2 = pic_hash[pos];
304
305
0
        if (!hash_table_add_to_table(p_hash_table, hash_value1,
306
0
                                     &curr_block_hash)) {
307
0
          return false;
308
0
        }
309
0
      }
310
0
    }
311
312
    // Adjust offsets and step sizes with this state machine.
313
    // State 0 is needed because no blocks in pic_hash have been explored,
314
    // so exploration requires a way to account for blocks with both zero
315
    // x_offset and zero y_offset.
316
    // State 0 is always meant to be executed first, but the relative order of
317
    // states 1, 2 and 3 can be arbitrary, as long as no two adjacent blocks
318
    // are explored consecutively.
319
0
    if (x_offset == 0 && y_offset == 0) {
320
      // State 0 -> State 1: special case
321
      // This state transition will only execute when step == block_size
322
0
      x_offset = step / 2;
323
0
    } else if (x_offset == step / 2 && y_offset == 0) {
324
      // State 1 -> State 2
325
0
      x_offset = 0;
326
0
      y_offset = step / 2;
327
0
    } else if (x_offset == 0 && y_offset == step / 2) {
328
      // State 2 -> State 3
329
0
      x_offset = step / 2;
330
0
    } else {
331
0
      assert(x_offset == step / 2 && y_offset == step / 2);
332
      // State 3 -> State 1: We've fully explored all the coordinates for the
333
      // current step size, continue by halving the step size
334
0
      step /= 2;
335
0
      x_offset = step / 2;
336
0
      y_offset = 0;
337
0
    }
338
0
  }
339
340
0
  return true;
341
0
}
342
343
int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
344
0
                                   int block_size, int x_start, int y_start) {
345
0
  const int stride = picture->y_stride;
346
0
  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
347
348
0
  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
349
0
    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
350
0
    for (int i = 0; i < block_size; i++) {
351
0
      for (int j = 1; j < block_size; j++) {
352
0
        if (p16[j] != p16[0]) {
353
0
          return 0;
354
0
        }
355
0
      }
356
0
      p16 += stride;
357
0
    }
358
0
  } else {
359
0
    for (int i = 0; i < block_size; i++) {
360
0
      for (int j = 1; j < block_size; j++) {
361
0
        if (p[j] != p[0]) {
362
0
          return 0;
363
0
        }
364
0
      }
365
0
      p += stride;
366
0
    }
367
0
  }
368
369
0
  return 1;
370
0
}
371
372
int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
373
0
                                 int block_size, int x_start, int y_start) {
374
0
  const int stride = picture->y_stride;
375
0
  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
376
377
0
  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
378
0
    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
379
0
    for (int i = 0; i < block_size; i++) {
380
0
      for (int j = 1; j < block_size; j++) {
381
0
        if (p16[j * stride + i] != p16[i]) {
382
0
          return 0;
383
0
        }
384
0
      }
385
0
    }
386
0
  } else {
387
0
    for (int i = 0; i < block_size; i++) {
388
0
      for (int j = 1; j < block_size; j++) {
389
0
        if (p[j * stride + i] != p[i]) {
390
0
          return 0;
391
0
        }
392
0
      }
393
0
    }
394
0
  }
395
0
  return 1;
396
0
}
397
398
void av1_get_block_hash_value(IntraBCHashInfo *intra_bc_hash_info,
399
                              const uint8_t *y_src, int stride, int block_size,
400
                              uint32_t *hash_value1, uint32_t *hash_value2,
401
0
                              int use_highbitdepth) {
402
0
  int add_value = hash_block_size_to_index(block_size);
403
0
  assert(add_value >= 0);
404
0
  add_value <<= kSrcBits;
405
0
  const int crc_mask = (1 << kSrcBits) - 1;
406
407
0
  CRC32C *calc = &intra_bc_hash_info->crc_calculator;
408
0
  uint32_t **buf = intra_bc_hash_info->hash_value_buffer;
409
410
  // 2x2 subblock hash values in current CU
411
0
  int sub_block_in_width = (block_size >> 1);
412
0
  if (use_highbitdepth) {
413
0
    uint16_t pixel_to_hash[4];
414
0
    uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
415
0
    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
416
0
      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
417
0
        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
418
0
        get_pixels_in_1D_short_array_by_block_2x2(
419
0
            y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
420
0
        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
421
        // For HBD, we either have 40 or 48 bits of input data that the xor hash
422
        // reduce to 32 bits. We intentionally don't want to "discard" bits to
423
        // avoid any kind of biasing.
424
0
        buf[0][pos] =
425
0
            get_xor_hash_value_hbd(pixel_to_hash[0], pixel_to_hash[1],
426
0
                                   pixel_to_hash[2], pixel_to_hash[3]);
427
0
      }
428
0
    }
429
0
  } else {
430
0
    uint8_t pixel_to_hash[4];
431
0
    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
432
0
      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
433
0
        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
434
0
        get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
435
0
                                                 stride, pixel_to_hash);
436
0
        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
437
        // This 2x2 hash isn't used directly as a "key" for the hash table, so
438
        // we can afford to just copy the 4 8-bit pixel values as a single
439
        // 32-bit value directly. (i.e. there are no concerns of a lack of
440
        // uniform distribution)
441
0
        buf[0][pos] =
442
0
            get_identity_hash_value(pixel_to_hash[0], pixel_to_hash[1],
443
0
                                    pixel_to_hash[2], pixel_to_hash[3]);
444
0
      }
445
0
    }
446
0
  }
447
448
0
  int src_sub_block_in_width = sub_block_in_width;
449
0
  sub_block_in_width >>= 1;
450
451
0
  int src_idx = 0;
452
0
  int dst_idx = !src_idx;
453
454
  // 4x4 subblock hash values to current block hash values
455
0
  uint32_t to_hash[4];
456
0
  for (int sub_width = 4; sub_width <= block_size;
457
0
       sub_width *= 2, src_idx = !src_idx) {
458
0
    dst_idx = !src_idx;
459
460
0
    int dst_pos = 0;
461
0
    for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
462
0
      for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
463
0
        int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);
464
465
0
        assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
466
0
        assert(srcPos + src_sub_block_in_width + 1 <
467
0
               AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
468
0
        assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
469
470
0
        to_hash[0] = buf[src_idx][srcPos];
471
0
        to_hash[1] = buf[src_idx][srcPos + 1];
472
0
        to_hash[2] = buf[src_idx][srcPos + src_sub_block_in_width];
473
0
        to_hash[3] = buf[src_idx][srcPos + src_sub_block_in_width + 1];
474
475
        // TODO: bug aomedia:433531610 - serialize input values in a way that's
476
        // independent of the computer architecture's endianness
477
0
        buf[dst_idx][dst_pos] =
478
0
            av1_get_crc32c_value(calc, (uint8_t *)to_hash, sizeof(to_hash));
479
0
        dst_pos++;
480
0
      }
481
0
    }
482
483
0
    src_sub_block_in_width = sub_block_in_width;
484
0
    sub_block_in_width >>= 1;
485
0
  }
486
487
0
  *hash_value1 = (buf[dst_idx][0] & crc_mask) + add_value;
488
0
  *hash_value2 = buf[dst_idx][0];
489
0
}