Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vpx_dsp/x86/convolve_sse2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#ifndef VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_
12
#define VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_
13
14
#include <emmintrin.h>  // SSE2
15
16
#include "./vpx_config.h"
17
18
// Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns
19
// values at index 2 and 3 to return 3 2 3 2 3 2 3 2 as 16-bit words
20
0
static INLINE __m128i extract_quarter_2_epi16_sse2(const __m128i *const reg) {
21
0
  __m128i tmp = _mm_unpacklo_epi32(*reg, *reg);
22
0
  return _mm_unpackhi_epi64(tmp, tmp);
23
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:extract_quarter_2_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:extract_quarter_2_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:extract_quarter_2_epi16_sse2
24
25
// Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns
26
// values at index 2 and 3 to return 5 4 5 4 5 4 5 4 as 16-bit words.
27
0
static INLINE __m128i extract_quarter_3_epi16_sse2(const __m128i *const reg) {
28
0
  __m128i tmp = _mm_unpackhi_epi32(*reg, *reg);
29
0
  return _mm_unpacklo_epi64(tmp, tmp);
30
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:extract_quarter_3_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:extract_quarter_3_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:extract_quarter_3_epi16_sse2
31
32
// Interprets src as 8-bit words, zero extends to form 16-bit words, then
33
// multiplies with ker and add the adjacent results to form 32-bit words.
34
// Finally adds the result from 1 and 2 together.
35
static INLINE __m128i mm_madd_add_epi8_sse2(const __m128i *const src_1,
36
                                            const __m128i *const src_2,
37
                                            const __m128i *const ker_1,
38
0
                                            const __m128i *const ker_2) {
39
0
  const __m128i src_1_half = _mm_unpacklo_epi8(*src_1, _mm_setzero_si128());
40
0
  const __m128i src_2_half = _mm_unpacklo_epi8(*src_2, _mm_setzero_si128());
41
0
  const __m128i madd_1 = _mm_madd_epi16(src_1_half, *ker_1);
42
0
  const __m128i madd_2 = _mm_madd_epi16(src_2_half, *ker_2);
43
0
  return _mm_add_epi32(madd_1, madd_2);
44
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_madd_add_epi8_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:mm_madd_add_epi8_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_madd_add_epi8_sse2
45
46
// Interprets src as 16-bit words, then multiplies with ker and add the
47
// adjacent results to form 32-bit words. Finally adds the result from 1 and 2
48
// together.
49
static INLINE __m128i mm_madd_add_epi16_sse2(const __m128i *const src_1,
50
                                             const __m128i *const src_2,
51
                                             const __m128i *const ker_1,
52
0
                                             const __m128i *const ker_2) {
53
0
  const __m128i madd_1 = _mm_madd_epi16(*src_1, *ker_1);
54
0
  const __m128i madd_2 = _mm_madd_epi16(*src_2, *ker_2);
55
0
  return _mm_add_epi32(madd_1, madd_2);
56
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_madd_add_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:mm_madd_add_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_madd_add_epi16_sse2
57
58
static INLINE __m128i mm_madd_packs_epi16_sse2(const __m128i *const src_0,
59
                                               const __m128i *const src_1,
60
0
                                               const __m128i *const ker) {
61
0
  const __m128i madd_1 = _mm_madd_epi16(*src_0, *ker);
62
0
  const __m128i madd_2 = _mm_madd_epi16(*src_1, *ker);
63
0
  return _mm_packs_epi32(madd_1, madd_2);
64
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_madd_packs_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:mm_madd_packs_epi16_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_madd_packs_epi16_sse2
65
66
// Interleaves src_1 and src_2
67
static INLINE __m128i mm_zip_epi32_sse2(const __m128i *const src_1,
68
0
                                        const __m128i *const src_2) {
69
0
  const __m128i tmp_1 = _mm_unpacklo_epi32(*src_1, *src_2);
70
0
  const __m128i tmp_2 = _mm_unpackhi_epi32(*src_1, *src_2);
71
0
  return _mm_packs_epi32(tmp_1, tmp_2);
72
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_zip_epi32_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:mm_zip_epi32_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_zip_epi32_sse2
73
74
static INLINE __m128i mm_round_epi32_sse2(const __m128i *const src,
75
                                          const __m128i *const half_depth,
76
0
                                          const int depth) {
77
0
  const __m128i nearest_src = _mm_add_epi32(*src, *half_depth);
78
0
  return _mm_srai_epi32(nearest_src, depth);
79
0
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_round_epi32_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_avx2.c:mm_round_epi32_sse2
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_round_epi32_sse2
80
81
static INLINE __m128i mm_round_epi16_sse2(const __m128i *const src,
82
                                          const __m128i *const half_depth,
83
42.6M
                                          const int depth) {
84
42.6M
  const __m128i nearest_src = _mm_adds_epi16(*src, *half_depth);
85
42.6M
  return _mm_srai_epi16(nearest_src, depth);
86
42.6M
}
Unexecuted instantiation: vpx_subpixel_4t_intrin_sse2.c:mm_round_epi16_sse2
vpx_subpixel_8t_intrin_avx2.c:mm_round_epi16_sse2
Line
Count
Source
83
42.6M
                                          const int depth) {
84
42.6M
  const __m128i nearest_src = _mm_adds_epi16(*src, *half_depth);
85
42.6M
  return _mm_srai_epi16(nearest_src, depth);
86
42.6M
}
Unexecuted instantiation: vpx_subpixel_8t_intrin_ssse3.c:mm_round_epi16_sse2
87
88
#endif  // VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_