Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vpx_dsp/x86/avg_pred_avx2.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <immintrin.h>
13
14
#include "./vpx_dsp_rtcd.h"
15
16
void vpx_comp_avg_pred_avx2(uint8_t *comp_pred, const uint8_t *pred, int width,
17
0
                            int height, const uint8_t *ref, int ref_stride) {
18
0
  int row = 0;
19
  // comp_pred and pred must be 32 byte aligned.
20
0
  assert(((intptr_t)comp_pred % 32) == 0);
21
0
  assert(((intptr_t)pred % 32) == 0);
22
23
0
  if (width == 8) {
24
0
    assert(height % 4 == 0);
25
0
    do {
26
0
      const __m256i p = _mm256_load_si256((const __m256i *)pred);
27
0
      const __m128i r_0 = _mm_loadl_epi64((const __m128i *)ref);
28
0
      const __m128i r_1 =
29
0
          _mm_loadl_epi64((const __m128i *)(ref + 2 * ref_stride));
30
31
0
      const __m128i r1 = _mm_castps_si128(_mm_loadh_pi(
32
0
          _mm_castsi128_ps(r_0), (const __m64 *)(ref + ref_stride)));
33
0
      const __m128i r2 = _mm_castps_si128(_mm_loadh_pi(
34
0
          _mm_castsi128_ps(r_1), (const __m64 *)(ref + 3 * ref_stride)));
35
36
0
      const __m256i ref_0123 =
37
0
          _mm256_inserti128_si256(_mm256_castsi128_si256(r1), r2, 1);
38
0
      const __m256i avg = _mm256_avg_epu8(p, ref_0123);
39
40
0
      _mm256_store_si256((__m256i *)comp_pred, avg);
41
42
0
      row += 4;
43
0
      pred += 32;
44
0
      comp_pred += 32;
45
0
      ref += 4 * ref_stride;
46
0
    } while (row < height);
47
0
  } else if (width == 16) {
48
0
    assert(height % 4 == 0);
49
0
    do {
50
0
      const __m256i pred_0 = _mm256_load_si256((const __m256i *)pred);
51
0
      const __m256i pred_1 = _mm256_load_si256((const __m256i *)(pred + 32));
52
0
      const __m256i tmp0 =
53
0
          _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)ref));
54
0
      const __m256i ref_0 = _mm256_inserti128_si256(
55
0
          tmp0, _mm_loadu_si128((const __m128i *)(ref + ref_stride)), 1);
56
0
      const __m256i tmp1 = _mm256_castsi128_si256(
57
0
          _mm_loadu_si128((const __m128i *)(ref + 2 * ref_stride)));
58
0
      const __m256i ref_1 = _mm256_inserti128_si256(
59
0
          tmp1, _mm_loadu_si128((const __m128i *)(ref + 3 * ref_stride)), 1);
60
0
      const __m256i average_0 = _mm256_avg_epu8(pred_0, ref_0);
61
0
      const __m256i average_1 = _mm256_avg_epu8(pred_1, ref_1);
62
0
      _mm256_store_si256((__m256i *)comp_pred, average_0);
63
0
      _mm256_store_si256((__m256i *)(comp_pred + 32), average_1);
64
65
0
      row += 4;
66
0
      pred += 64;
67
0
      comp_pred += 64;
68
0
      ref += 4 * ref_stride;
69
0
    } while (row < height);
70
0
  } else if (width == 32) {
71
0
    assert(height % 2 == 0);
72
0
    do {
73
0
      const __m256i pred_0 = _mm256_load_si256((const __m256i *)pred);
74
0
      const __m256i pred_1 = _mm256_load_si256((const __m256i *)(pred + 32));
75
0
      const __m256i ref_0 = _mm256_loadu_si256((const __m256i *)ref);
76
0
      const __m256i ref_1 =
77
0
          _mm256_loadu_si256((const __m256i *)(ref + ref_stride));
78
0
      const __m256i average_0 = _mm256_avg_epu8(pred_0, ref_0);
79
0
      const __m256i average_1 = _mm256_avg_epu8(pred_1, ref_1);
80
0
      _mm256_store_si256((__m256i *)comp_pred, average_0);
81
0
      _mm256_store_si256((__m256i *)(comp_pred + 32), average_1);
82
83
0
      row += 2;
84
0
      pred += 64;
85
0
      comp_pred += 64;
86
0
      ref += 2 * ref_stride;
87
0
    } while (row < height);
88
0
  } else if (width % 64 == 0) {
89
0
    do {
90
0
      int x;
91
0
      for (x = 0; x < width; x += 64) {
92
0
        const __m256i pred_0 = _mm256_load_si256((const __m256i *)(pred + x));
93
0
        const __m256i pred_1 =
94
0
            _mm256_load_si256((const __m256i *)(pred + x + 32));
95
0
        const __m256i ref_0 = _mm256_loadu_si256((const __m256i *)(ref + x));
96
0
        const __m256i ref_1 =
97
0
            _mm256_loadu_si256((const __m256i *)(ref + x + 32));
98
0
        const __m256i average_0 = _mm256_avg_epu8(pred_0, ref_0);
99
0
        const __m256i average_1 = _mm256_avg_epu8(pred_1, ref_1);
100
0
        _mm256_store_si256((__m256i *)(comp_pred + x), average_0);
101
0
        _mm256_store_si256((__m256i *)(comp_pred + x + 32), average_1);
102
0
      }
103
0
      row++;
104
0
      pred += width;
105
0
      comp_pred += width;
106
0
      ref += ref_stride;
107
0
    } while (row < height);
108
0
  } else {
109
0
    vpx_comp_avg_pred_sse2(comp_pred, pred, width, height, ref, ref_stride);
110
0
  }
111
0
}