Coverage Report

Created: 2025-06-13 07:07

/src/aom/aom_dsp/x86/mem_sse2.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#ifndef AOM_AOM_DSP_X86_MEM_SSE2_H_
13
#define AOM_AOM_DSP_X86_MEM_SSE2_H_
14
15
#include <emmintrin.h>  // SSE2
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
20
#include "aom/aom_integer.h"
21
22
462k
static inline int16_t loadu_int16(const void *src) {
23
462k
  int16_t v;
24
462k
  memcpy(&v, src, sizeof(v));
25
462k
  return v;
26
462k
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadu_int16
Unexecuted instantiation: resize_ssse3.c:loadu_int16
Unexecuted instantiation: convolve_2d_avx2.c:loadu_int16
convolve_avx2.c:loadu_int16
Line
Count
Source
22
462k
static inline int16_t loadu_int16(const void *src) {
23
462k
  int16_t v;
24
462k
  memcpy(&v, src, sizeof(v));
25
462k
  return v;
26
462k
}
Unexecuted instantiation: jnt_convolve_avx2.c:loadu_int16
27
28
3.90M
static inline int32_t loadu_int32(const void *src) {
29
3.90M
  int32_t v;
30
3.90M
  memcpy(&v, src, sizeof(v));
31
3.90M
  return v;
32
3.90M
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadu_int32
Unexecuted instantiation: resize_ssse3.c:loadu_int32
convolve_2d_avx2.c:loadu_int32
Line
Count
Source
28
1.00M
static inline int32_t loadu_int32(const void *src) {
29
1.00M
  int32_t v;
30
1.00M
  memcpy(&v, src, sizeof(v));
31
1.00M
  return v;
32
1.00M
}
convolve_avx2.c:loadu_int32
Line
Count
Source
28
2.69M
static inline int32_t loadu_int32(const void *src) {
29
2.69M
  int32_t v;
30
2.69M
  memcpy(&v, src, sizeof(v));
31
2.69M
  return v;
32
2.69M
}
jnt_convolve_avx2.c:loadu_int32
Line
Count
Source
28
210k
static inline int32_t loadu_int32(const void *src) {
29
210k
  int32_t v;
30
210k
  memcpy(&v, src, sizeof(v));
31
210k
  return v;
32
210k
}
33
34
0
static inline int64_t loadu_int64(const void *src) {
35
0
  int64_t v;
36
0
  memcpy(&v, src, sizeof(v));
37
0
  return v;
38
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadu_int64
Unexecuted instantiation: resize_ssse3.c:loadu_int64
Unexecuted instantiation: convolve_2d_avx2.c:loadu_int64
Unexecuted instantiation: convolve_avx2.c:loadu_int64
Unexecuted instantiation: jnt_convolve_avx2.c:loadu_int64
39
40
336k
static inline void _mm_storeh_epi64(__m128i *const d, const __m128i s) {
41
336k
  _mm_storeh_pi((__m64 *)d, _mm_castsi128_ps(s));
42
336k
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:_mm_storeh_epi64
Unexecuted instantiation: resize_ssse3.c:_mm_storeh_epi64
Unexecuted instantiation: convolve_2d_avx2.c:_mm_storeh_epi64
convolve_avx2.c:_mm_storeh_epi64
Line
Count
Source
40
51.3k
static inline void _mm_storeh_epi64(__m128i *const d, const __m128i s) {
41
51.3k
  _mm_storeh_pi((__m64 *)d, _mm_castsi128_ps(s));
42
51.3k
}
jnt_convolve_avx2.c:_mm_storeh_epi64
Line
Count
Source
40
285k
static inline void _mm_storeh_epi64(__m128i *const d, const __m128i s) {
41
285k
  _mm_storeh_pi((__m64 *)d, _mm_castsi128_ps(s));
42
285k
}
43
44
5.40M
static inline __m128i loadh_epi64(const void *const src, const __m128i s) {
45
5.40M
  return _mm_castps_si128(
46
5.40M
      _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));
47
5.40M
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadh_epi64
Unexecuted instantiation: resize_ssse3.c:loadh_epi64
convolve_2d_avx2.c:loadh_epi64
Line
Count
Source
44
4.28M
static inline __m128i loadh_epi64(const void *const src, const __m128i s) {
45
4.28M
  return _mm_castps_si128(
46
4.28M
      _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));
47
4.28M
}
convolve_avx2.c:loadh_epi64
Line
Count
Source
44
1.11M
static inline __m128i loadh_epi64(const void *const src, const __m128i s) {
45
1.11M
  return _mm_castps_si128(
46
1.11M
      _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));
47
1.11M
}
Unexecuted instantiation: jnt_convolve_avx2.c:loadh_epi64
48
49
static inline __m128i load_8bit_4x4_to_1_reg_sse2(const void *const src,
50
0
                                                  const int byte_stride) {
51
0
  return _mm_setr_epi32(loadu_int32((int8_t *)src + 0 * byte_stride),
52
0
                        loadu_int32((int8_t *)src + 1 * byte_stride),
53
0
                        loadu_int32((int8_t *)src + 2 * byte_stride),
54
0
                        loadu_int32((int8_t *)src + 3 * byte_stride));
55
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_4x4_to_1_reg_sse2
Unexecuted instantiation: resize_ssse3.c:load_8bit_4x4_to_1_reg_sse2
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_4x4_to_1_reg_sse2
Unexecuted instantiation: convolve_avx2.c:load_8bit_4x4_to_1_reg_sse2
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_4x4_to_1_reg_sse2
56
57
static inline __m128i load_8bit_8x2_to_1_reg_sse2(const void *const src,
58
5.40M
                                                  const int byte_stride) {
59
5.40M
  __m128i dst;
60
5.40M
  dst = _mm_loadl_epi64((__m128i *)((int8_t *)src + 0 * byte_stride));
61
5.40M
  dst = loadh_epi64((int8_t *)src + 1 * byte_stride, dst);
62
5.40M
  return dst;
63
5.40M
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_8x2_to_1_reg_sse2
Unexecuted instantiation: resize_ssse3.c:load_8bit_8x2_to_1_reg_sse2
convolve_2d_avx2.c:load_8bit_8x2_to_1_reg_sse2
Line
Count
Source
58
4.28M
                                                  const int byte_stride) {
59
4.28M
  __m128i dst;
60
4.28M
  dst = _mm_loadl_epi64((__m128i *)((int8_t *)src + 0 * byte_stride));
61
4.28M
  dst = loadh_epi64((int8_t *)src + 1 * byte_stride, dst);
62
4.28M
  return dst;
63
4.28M
}
convolve_avx2.c:load_8bit_8x2_to_1_reg_sse2
Line
Count
Source
58
1.11M
                                                  const int byte_stride) {
59
1.11M
  __m128i dst;
60
1.11M
  dst = _mm_loadl_epi64((__m128i *)((int8_t *)src + 0 * byte_stride));
61
1.11M
  dst = loadh_epi64((int8_t *)src + 1 * byte_stride, dst);
62
1.11M
  return dst;
63
1.11M
}
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_8x2_to_1_reg_sse2
64
65
static inline void store_8bit_8x4_from_16x2(const __m128i *const s,
66
                                            uint8_t *const d,
67
0
                                            const ptrdiff_t stride) {
68
0
  _mm_storel_epi64((__m128i *)(d + 0 * stride), s[0]);
69
0
  _mm_storeh_epi64((__m128i *)(d + 1 * stride), s[0]);
70
0
  _mm_storel_epi64((__m128i *)(d + 2 * stride), s[1]);
71
0
  _mm_storeh_epi64((__m128i *)(d + 3 * stride), s[1]);
72
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:store_8bit_8x4_from_16x2
Unexecuted instantiation: resize_ssse3.c:store_8bit_8x4_from_16x2
Unexecuted instantiation: convolve_2d_avx2.c:store_8bit_8x4_from_16x2
Unexecuted instantiation: convolve_avx2.c:store_8bit_8x4_from_16x2
Unexecuted instantiation: jnt_convolve_avx2.c:store_8bit_8x4_from_16x2
73
74
static inline void store_8bit_4x4(const __m128i *const s, uint8_t *const d,
75
0
                                  const ptrdiff_t stride) {
76
0
  *(int *)(d + 0 * stride) = _mm_cvtsi128_si32(s[0]);
77
0
  *(int *)(d + 1 * stride) = _mm_cvtsi128_si32(s[1]);
78
0
  *(int *)(d + 2 * stride) = _mm_cvtsi128_si32(s[2]);
79
0
  *(int *)(d + 3 * stride) = _mm_cvtsi128_si32(s[3]);
80
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:store_8bit_4x4
Unexecuted instantiation: resize_ssse3.c:store_8bit_4x4
Unexecuted instantiation: convolve_2d_avx2.c:store_8bit_4x4
Unexecuted instantiation: convolve_avx2.c:store_8bit_4x4
Unexecuted instantiation: jnt_convolve_avx2.c:store_8bit_4x4
81
82
static inline void store_8bit_4x4_sse2(const __m128i s, uint8_t *const d,
83
0
                                       const ptrdiff_t stride) {
84
0
  __m128i ss[4];
85
86
0
  ss[0] = s;
87
0
  ss[1] = _mm_srli_si128(s, 4);
88
0
  ss[2] = _mm_srli_si128(s, 8);
89
0
  ss[3] = _mm_srli_si128(s, 12);
90
0
  store_8bit_4x4(ss, d, stride);
91
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:store_8bit_4x4_sse2
Unexecuted instantiation: resize_ssse3.c:store_8bit_4x4_sse2
Unexecuted instantiation: convolve_2d_avx2.c:store_8bit_4x4_sse2
Unexecuted instantiation: convolve_avx2.c:store_8bit_4x4_sse2
Unexecuted instantiation: jnt_convolve_avx2.c:store_8bit_4x4_sse2
92
93
static inline void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride,
94
0
                                 __m128i *const d) {
95
0
  d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride));
96
0
  d[1] = _mm_cvtsi32_si128(*(const int *)(s + 1 * stride));
97
0
  d[2] = _mm_cvtsi32_si128(*(const int *)(s + 2 * stride));
98
0
  d[3] = _mm_cvtsi32_si128(*(const int *)(s + 3 * stride));
99
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_4x4
Unexecuted instantiation: resize_ssse3.c:load_8bit_4x4
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_4x4
Unexecuted instantiation: convolve_avx2.c:load_8bit_4x4
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_4x4
100
101
static inline void load_8bit_4x8(const uint8_t *const s, const ptrdiff_t stride,
102
0
                                 __m128i *const d) {
103
0
  load_8bit_4x4(s + 0 * stride, stride, &d[0]);
104
0
  load_8bit_4x4(s + 4 * stride, stride, &d[4]);
105
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_4x8
Unexecuted instantiation: resize_ssse3.c:load_8bit_4x8
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_4x8
Unexecuted instantiation: convolve_avx2.c:load_8bit_4x8
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_4x8
106
107
static inline void load_8bit_8x4(const uint8_t *const s, const ptrdiff_t stride,
108
0
                                 __m128i *const d) {
109
0
  d[0] = _mm_loadl_epi64((const __m128i *)(s + 0 * stride));
110
0
  d[1] = _mm_loadl_epi64((const __m128i *)(s + 1 * stride));
111
0
  d[2] = _mm_loadl_epi64((const __m128i *)(s + 2 * stride));
112
0
  d[3] = _mm_loadl_epi64((const __m128i *)(s + 3 * stride));
113
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_8x4
Unexecuted instantiation: resize_ssse3.c:load_8bit_8x4
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_8x4
Unexecuted instantiation: convolve_avx2.c:load_8bit_8x4
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_8x4
114
115
static inline void loadu_8bit_16x4(const uint8_t *const s,
116
0
                                   const ptrdiff_t stride, __m128i *const d) {
117
0
  d[0] = _mm_loadu_si128((const __m128i *)(s + 0 * stride));
118
0
  d[1] = _mm_loadu_si128((const __m128i *)(s + 1 * stride));
119
0
  d[2] = _mm_loadu_si128((const __m128i *)(s + 2 * stride));
120
0
  d[3] = _mm_loadu_si128((const __m128i *)(s + 3 * stride));
121
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadu_8bit_16x4
Unexecuted instantiation: resize_ssse3.c:loadu_8bit_16x4
Unexecuted instantiation: convolve_2d_avx2.c:loadu_8bit_16x4
Unexecuted instantiation: convolve_avx2.c:loadu_8bit_16x4
Unexecuted instantiation: jnt_convolve_avx2.c:loadu_8bit_16x4
122
123
static inline void load_8bit_8x8(const uint8_t *const s, const ptrdiff_t stride,
124
0
                                 __m128i *const d) {
125
0
  load_8bit_8x4(s + 0 * stride, stride, &d[0]);
126
0
  load_8bit_8x4(s + 4 * stride, stride, &d[4]);
127
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_8x8
Unexecuted instantiation: resize_ssse3.c:load_8bit_8x8
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_8x8
Unexecuted instantiation: convolve_avx2.c:load_8bit_8x8
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_8x8
128
129
static inline void load_8bit_16x8(const uint8_t *const s,
130
0
                                  const ptrdiff_t stride, __m128i *const d) {
131
0
  d[0] = _mm_load_si128((const __m128i *)(s + 0 * stride));
132
0
  d[1] = _mm_load_si128((const __m128i *)(s + 1 * stride));
133
0
  d[2] = _mm_load_si128((const __m128i *)(s + 2 * stride));
134
0
  d[3] = _mm_load_si128((const __m128i *)(s + 3 * stride));
135
0
  d[4] = _mm_load_si128((const __m128i *)(s + 4 * stride));
136
0
  d[5] = _mm_load_si128((const __m128i *)(s + 5 * stride));
137
0
  d[6] = _mm_load_si128((const __m128i *)(s + 6 * stride));
138
0
  d[7] = _mm_load_si128((const __m128i *)(s + 7 * stride));
139
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:load_8bit_16x8
Unexecuted instantiation: resize_ssse3.c:load_8bit_16x8
Unexecuted instantiation: convolve_2d_avx2.c:load_8bit_16x8
Unexecuted instantiation: convolve_avx2.c:load_8bit_16x8
Unexecuted instantiation: jnt_convolve_avx2.c:load_8bit_16x8
140
141
static inline void loadu_8bit_16x8(const uint8_t *const s,
142
0
                                   const ptrdiff_t stride, __m128i *const d) {
143
0
  loadu_8bit_16x4(s + 0 * stride, stride, &d[0]);
144
0
  loadu_8bit_16x4(s + 4 * stride, stride, &d[4]);
145
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:loadu_8bit_16x8
Unexecuted instantiation: resize_ssse3.c:loadu_8bit_16x8
Unexecuted instantiation: convolve_2d_avx2.c:loadu_8bit_16x8
Unexecuted instantiation: convolve_avx2.c:loadu_8bit_16x8
Unexecuted instantiation: jnt_convolve_avx2.c:loadu_8bit_16x8
146
147
static inline void store_8bit_8x8(const __m128i *const s, uint8_t *const d,
148
0
                                  const ptrdiff_t stride) {
149
0
  _mm_storel_epi64((__m128i *)(d + 0 * stride), s[0]);
150
0
  _mm_storel_epi64((__m128i *)(d + 1 * stride), s[1]);
151
0
  _mm_storel_epi64((__m128i *)(d + 2 * stride), s[2]);
152
0
  _mm_storel_epi64((__m128i *)(d + 3 * stride), s[3]);
153
0
  _mm_storel_epi64((__m128i *)(d + 4 * stride), s[4]);
154
0
  _mm_storel_epi64((__m128i *)(d + 5 * stride), s[5]);
155
0
  _mm_storel_epi64((__m128i *)(d + 6 * stride), s[6]);
156
0
  _mm_storel_epi64((__m128i *)(d + 7 * stride), s[7]);
157
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:store_8bit_8x8
Unexecuted instantiation: resize_ssse3.c:store_8bit_8x8
Unexecuted instantiation: convolve_2d_avx2.c:store_8bit_8x8
Unexecuted instantiation: convolve_avx2.c:store_8bit_8x8
Unexecuted instantiation: jnt_convolve_avx2.c:store_8bit_8x8
158
159
static inline void storeu_8bit_16x4(const __m128i *const s, uint8_t *const d,
160
0
                                    const ptrdiff_t stride) {
161
0
  _mm_storeu_si128((__m128i *)(d + 0 * stride), s[0]);
162
0
  _mm_storeu_si128((__m128i *)(d + 1 * stride), s[1]);
163
0
  _mm_storeu_si128((__m128i *)(d + 2 * stride), s[2]);
164
0
  _mm_storeu_si128((__m128i *)(d + 3 * stride), s[3]);
165
0
}
Unexecuted instantiation: aom_subpixel_8t_intrin_ssse3.c:storeu_8bit_16x4
Unexecuted instantiation: resize_ssse3.c:storeu_8bit_16x4
Unexecuted instantiation: convolve_2d_avx2.c:storeu_8bit_16x4
Unexecuted instantiation: convolve_avx2.c:storeu_8bit_16x4
Unexecuted instantiation: jnt_convolve_avx2.c:storeu_8bit_16x4
166
167
#endif  // AOM_AOM_DSP_X86_MEM_SSE2_H_