/src/libvpx/vpx_dsp/x86/sad_avx512.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2025 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | #include <immintrin.h> |
11 | | #include "./vpx_dsp_rtcd.h" |
12 | | #include "vpx_ports/mem.h" |
13 | | |
14 | | static INLINE unsigned int sad64xh_avx512(const uint8_t *src_ptr, |
15 | | int src_stride, |
16 | | const uint8_t *ref_ptr, |
17 | 0 | int ref_stride, int h) { |
18 | 0 | int i, res; |
19 | 0 | __m512i sad_reg, ref_reg; |
20 | 0 | __m512i sum_sad = _mm512_setzero_si512(); |
21 | 0 | for (i = 0; i < h; i++) { |
22 | 0 | ref_reg = _mm512_loadu_si512((const __m512i *)ref_ptr); |
23 | 0 | sad_reg = |
24 | 0 | _mm512_sad_epu8(ref_reg, _mm512_loadu_si512((__m512 const *)src_ptr)); |
25 | 0 | sum_sad = _mm512_add_epi32(sum_sad, sad_reg); |
26 | 0 | ref_ptr += ref_stride; |
27 | 0 | src_ptr += src_stride; |
28 | 0 | } |
29 | 0 | res = _mm512_reduce_add_epi32(sum_sad); |
30 | 0 | return res; |
31 | 0 | } |
32 | | |
33 | | #define FSAD64_H(h) \ |
34 | | unsigned int vpx_sad64x##h##_avx512(const uint8_t *src_ptr, int src_stride, \ |
35 | | const uint8_t *ref_ptr, \ |
36 | 0 | int ref_stride) { \ |
37 | 0 | return sad64xh_avx512(src_ptr, src_stride, ref_ptr, ref_stride, h); \ |
38 | 0 | } Unexecuted instantiation: vpx_sad64x64_avx512 Unexecuted instantiation: vpx_sad64x32_avx512 |
39 | | |
40 | | #define FSADS64_H(h) \ |
41 | | unsigned int vpx_sad_skip_64x##h##_avx512( \ |
42 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
43 | 0 | int ref_stride) { \ |
44 | 0 | return 2 * sad64xh_avx512(src_ptr, src_stride * 2, ref_ptr, \ |
45 | 0 | ref_stride * 2, h / 2); \ |
46 | 0 | } Unexecuted instantiation: vpx_sad_skip_64x64_avx512 Unexecuted instantiation: vpx_sad_skip_64x32_avx512 |
47 | | |
48 | | #define FSAD64 \ |
49 | | FSAD64_H(64) \ |
50 | | FSAD64_H(32) \ |
51 | | FSADS64_H(64) \ |
52 | | FSADS64_H(32) |
53 | | |
54 | | FSAD64 |
55 | | |
56 | | #undef FSAD64 |
57 | | #undef FSAD64_H |
58 | | #undef FSADS64_H |
59 | | |
60 | | #define FSADAVG64_H(h) \ |
61 | | unsigned int vpx_sad64x##h##_avg_avx512( \ |
62 | | const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ |
63 | 0 | int ref_stride, const uint8_t *second_pred) { \ |
64 | 0 | int i; \ |
65 | 0 | __m512i sad_reg, ref_reg; \ |
66 | 0 | __m512i sum_sad = _mm512_setzero_si512(); \ |
67 | 0 | for (i = 0; i < h; i++) { \ |
68 | 0 | ref_reg = _mm512_loadu_si512((const __m512i *)ref_ptr); \ |
69 | 0 | ref_reg = _mm512_avg_epu8( \ |
70 | 0 | ref_reg, _mm512_loadu_si512((const __m512i *)second_pred)); \ |
71 | 0 | sad_reg = _mm512_sad_epu8(ref_reg, \ |
72 | 0 | _mm512_loadu_si512((const __m512i *)src_ptr)); \ |
73 | 0 | sum_sad = _mm512_add_epi32(sum_sad, sad_reg); \ |
74 | 0 | ref_ptr += ref_stride; \ |
75 | 0 | src_ptr += src_stride; \ |
76 | 0 | second_pred += 64; \ |
77 | 0 | } \ |
78 | 0 | return (unsigned int)_mm512_reduce_add_epi32(sum_sad); \ |
79 | 0 | } Unexecuted instantiation: vpx_sad64x64_avg_avx512 Unexecuted instantiation: vpx_sad64x32_avg_avx512 |
80 | | |
81 | | #define FSADAVG64 \ |
82 | | FSADAVG64_H(64) \ |
83 | | FSADAVG64_H(32) |
84 | | |
85 | | FSADAVG64 |
86 | | |
87 | | #undef FSADAVG64 |
88 | | #undef FSADAVG64_H |