Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/media/libyuv/libyuv/source/compare_gcc.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/basic_types.h"
12
13
#include "libyuv/compare_row.h"
14
#include "libyuv/row.h"
15
16
#ifdef __cplusplus
17
namespace libyuv {
18
extern "C" {
19
#endif
20
21
// This module is for GCC x86 and x64.
22
#if !defined(LIBYUV_DISABLE_X86) && \
23
    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
24
25
#if defined(__x86_64__)
26
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
27
                               const uint8_t* src_b,
28
0
                               int count) {
29
0
  uint64_t diff = 0u;
30
0
31
0
  asm volatile(
32
0
      "xor        %3,%3                          \n"
33
0
      "xor        %%r8,%%r8                      \n"
34
0
      "xor        %%r9,%%r9                      \n"
35
0
      "xor        %%r10,%%r10                    \n"
36
0
37
0
      // Process 32 bytes per loop.
38
0
      LABELALIGN
39
0
      "1:                                        \n"
40
0
      "mov        (%0),%%rcx                     \n"
41
0
      "mov        0x8(%0),%%rdx                  \n"
42
0
      "xor        (%1),%%rcx                     \n"
43
0
      "xor        0x8(%1),%%rdx                  \n"
44
0
      "popcnt     %%rcx,%%rcx                    \n"
45
0
      "popcnt     %%rdx,%%rdx                    \n"
46
0
      "mov        0x10(%0),%%rsi                 \n"
47
0
      "mov        0x18(%0),%%rdi                 \n"
48
0
      "xor        0x10(%1),%%rsi                 \n"
49
0
      "xor        0x18(%1),%%rdi                 \n"
50
0
      "popcnt     %%rsi,%%rsi                    \n"
51
0
      "popcnt     %%rdi,%%rdi                    \n"
52
0
      "add        $0x20,%0                       \n"
53
0
      "add        $0x20,%1                       \n"
54
0
      "add        %%rcx,%3                       \n"
55
0
      "add        %%rdx,%%r8                     \n"
56
0
      "add        %%rsi,%%r9                     \n"
57
0
      "add        %%rdi,%%r10                    \n"
58
0
      "sub        $0x20,%2                       \n"
59
0
      "jg         1b                             \n"
60
0
61
0
      "add        %%r8, %3                       \n"
62
0
      "add        %%r9, %3                       \n"
63
0
      "add        %%r10, %3                      \n"
64
0
      : "+r"(src_a),  // %0
65
0
        "+r"(src_b),  // %1
66
0
        "+r"(count),  // %2
67
0
        "=r"(diff)    // %3
68
0
      :
69
0
      : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
70
0
71
0
  return static_cast<uint32_t>(diff);
72
0
}
73
#else
74
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
75
                               const uint8_t* src_b,
76
                               int count) {
77
  uint32_t diff = 0u;
78
79
  asm volatile(
80
      // Process 16 bytes per loop.
81
      LABELALIGN
82
      "1:                                        \n"
83
      "mov        (%0),%%ecx                     \n"
84
      "mov        0x4(%0),%%edx                  \n"
85
      "xor        (%1),%%ecx                     \n"
86
      "xor        0x4(%1),%%edx                  \n"
87
      "popcnt     %%ecx,%%ecx                    \n"
88
      "add        %%ecx,%3                       \n"
89
      "popcnt     %%edx,%%edx                    \n"
90
      "add        %%edx,%3                       \n"
91
      "mov        0x8(%0),%%ecx                  \n"
92
      "mov        0xc(%0),%%edx                  \n"
93
      "xor        0x8(%1),%%ecx                  \n"
94
      "xor        0xc(%1),%%edx                  \n"
95
      "popcnt     %%ecx,%%ecx                    \n"
96
      "add        %%ecx,%3                       \n"
97
      "popcnt     %%edx,%%edx                    \n"
98
      "add        %%edx,%3                       \n"
99
      "add        $0x10,%0                       \n"
100
      "add        $0x10,%1                       \n"
101
      "sub        $0x10,%2                       \n"
102
      "jg         1b                             \n"
103
      : "+r"(src_a),  // %0
104
        "+r"(src_b),  // %1
105
        "+r"(count),  // %2
106
        "+r"(diff)    // %3
107
      :
108
      : "memory", "cc", "ecx", "edx");
109
110
  return diff;
111
}
112
#endif
113
114
static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
115
                                 15, 15, 15, 15, 15, 15, 15, 15};
116
static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
117
118
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
119
                               const uint8_t* src_b,
120
0
                               int count) {
121
0
  uint32_t diff = 0u;
122
0
123
0
  asm volatile(
124
0
      "movdqa     %4,%%xmm2                      \n"
125
0
      "movdqa     %5,%%xmm3                      \n"
126
0
      "pxor       %%xmm0,%%xmm0                  \n"
127
0
      "pxor       %%xmm1,%%xmm1                  \n"
128
0
      "sub        %0,%1                          \n"
129
0
130
0
      LABELALIGN
131
0
      "1:                                        \n"
132
0
      "movdqa     (%0),%%xmm4                    \n"
133
0
      "movdqa     0x10(%0), %%xmm5               \n"
134
0
      "pxor       (%0,%1), %%xmm4                \n"
135
0
      "movdqa     %%xmm4,%%xmm6                  \n"
136
0
      "pand       %%xmm2,%%xmm6                  \n"
137
0
      "psrlw      $0x4,%%xmm4                    \n"
138
0
      "movdqa     %%xmm3,%%xmm7                  \n"
139
0
      "pshufb     %%xmm6,%%xmm7                  \n"
140
0
      "pand       %%xmm2,%%xmm4                  \n"
141
0
      "movdqa     %%xmm3,%%xmm6                  \n"
142
0
      "pshufb     %%xmm4,%%xmm6                  \n"
143
0
      "paddb      %%xmm7,%%xmm6                  \n"
144
0
      "pxor       0x10(%0,%1),%%xmm5             \n"
145
0
      "add        $0x20,%0                       \n"
146
0
      "movdqa     %%xmm5,%%xmm4                  \n"
147
0
      "pand       %%xmm2,%%xmm5                  \n"
148
0
      "psrlw      $0x4,%%xmm4                    \n"
149
0
      "movdqa     %%xmm3,%%xmm7                  \n"
150
0
      "pshufb     %%xmm5,%%xmm7                  \n"
151
0
      "pand       %%xmm2,%%xmm4                  \n"
152
0
      "movdqa     %%xmm3,%%xmm5                  \n"
153
0
      "pshufb     %%xmm4,%%xmm5                  \n"
154
0
      "paddb      %%xmm7,%%xmm5                  \n"
155
0
      "paddb      %%xmm5,%%xmm6                  \n"
156
0
      "psadbw     %%xmm1,%%xmm6                  \n"
157
0
      "paddd      %%xmm6,%%xmm0                  \n"
158
0
      "sub        $0x20,%2                       \n"
159
0
      "jg         1b                             \n"
160
0
161
0
      "pshufd     $0xaa,%%xmm0,%%xmm1            \n"
162
0
      "paddd      %%xmm1,%%xmm0                  \n"
163
0
      "movd       %%xmm0, %3                     \n"
164
0
      : "+r"(src_a),       // %0
165
0
        "+r"(src_b),       // %1
166
0
        "+r"(count),       // %2
167
0
        "=r"(diff)         // %3
168
0
      : "m"(kNibbleMask),  // %4
169
0
        "m"(kBitCount)     // %5
170
0
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
171
0
        "xmm7");
172
0
173
0
  return diff;
174
0
}
175
176
#ifdef HAS_HAMMINGDISTANCE_AVX2
177
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
178
                              const uint8_t* src_b,
179
0
                              int count) {
180
0
  uint32_t diff = 0u;
181
0
182
0
  asm volatile(
183
0
      "vbroadcastf128 %4,%%ymm2                  \n"
184
0
      "vbroadcastf128 %5,%%ymm3                  \n"
185
0
      "vpxor      %%ymm0,%%ymm0,%%ymm0           \n"
186
0
      "vpxor      %%ymm1,%%ymm1,%%ymm1           \n"
187
0
      "sub        %0,%1                          \n"
188
0
189
0
      LABELALIGN
190
0
      "1:                                        \n"
191
0
      "vmovdqa    (%0),%%ymm4                    \n"
192
0
      "vmovdqa    0x20(%0), %%ymm5               \n"
193
0
      "vpxor      (%0,%1), %%ymm4, %%ymm4        \n"
194
0
      "vpand      %%ymm2,%%ymm4,%%ymm6           \n"
195
0
      "vpsrlw     $0x4,%%ymm4,%%ymm4             \n"
196
0
      "vpshufb    %%ymm6,%%ymm3,%%ymm6           \n"
197
0
      "vpand      %%ymm2,%%ymm4,%%ymm4           \n"
198
0
      "vpshufb    %%ymm4,%%ymm3,%%ymm4           \n"
199
0
      "vpaddb     %%ymm4,%%ymm6,%%ymm6           \n"
200
0
      "vpxor      0x20(%0,%1),%%ymm5,%%ymm4      \n"
201
0
      "add        $0x40,%0                       \n"
202
0
      "vpand      %%ymm2,%%ymm4,%%ymm5           \n"
203
0
      "vpsrlw     $0x4,%%ymm4,%%ymm4             \n"
204
0
      "vpshufb    %%ymm5,%%ymm3,%%ymm5           \n"
205
0
      "vpand      %%ymm2,%%ymm4,%%ymm4           \n"
206
0
      "vpshufb    %%ymm4,%%ymm3,%%ymm4           \n"
207
0
      "vpaddb     %%ymm5,%%ymm4,%%ymm4           \n"
208
0
      "vpaddb     %%ymm6,%%ymm4,%%ymm4           \n"
209
0
      "vpsadbw    %%ymm1,%%ymm4,%%ymm4           \n"
210
0
      "vpaddd     %%ymm0,%%ymm4,%%ymm0           \n"
211
0
      "sub        $0x40,%2                       \n"
212
0
      "jg         1b                             \n"
213
0
214
0
      "vpermq     $0xb1,%%ymm0,%%ymm1            \n"
215
0
      "vpaddd     %%ymm1,%%ymm0,%%ymm0           \n"
216
0
      "vpermq     $0xaa,%%ymm0,%%ymm1            \n"
217
0
      "vpaddd     %%ymm1,%%ymm0,%%ymm0           \n"
218
0
      "vmovd      %%xmm0, %3                     \n"
219
0
      "vzeroupper                                \n"
220
0
      : "+r"(src_a),       // %0
221
0
        "+r"(src_b),       // %1
222
0
        "+r"(count),       // %2
223
0
        "=r"(diff)         // %3
224
0
      : "m"(kNibbleMask),  // %4
225
0
        "m"(kBitCount)     // %5
226
0
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
227
0
228
0
  return diff;
229
0
}
230
#endif  // HAS_HAMMINGDISTANCE_AVX2
231
232
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
233
                             const uint8_t* src_b,
234
0
                             int count) {
235
0
  uint32_t sse;
236
0
  asm volatile(
237
0
      "pxor      %%xmm0,%%xmm0                   \n"
238
0
      "pxor      %%xmm5,%%xmm5                   \n"
239
0
240
0
      LABELALIGN
241
0
      "1:                                        \n"
242
0
      "movdqu    (%0),%%xmm1                     \n"
243
0
      "lea       0x10(%0),%0                     \n"
244
0
      "movdqu    (%1),%%xmm2                     \n"
245
0
      "lea       0x10(%1),%1                     \n"
246
0
      "movdqa    %%xmm1,%%xmm3                   \n"
247
0
      "psubusb   %%xmm2,%%xmm1                   \n"
248
0
      "psubusb   %%xmm3,%%xmm2                   \n"
249
0
      "por       %%xmm2,%%xmm1                   \n"
250
0
      "movdqa    %%xmm1,%%xmm2                   \n"
251
0
      "punpcklbw %%xmm5,%%xmm1                   \n"
252
0
      "punpckhbw %%xmm5,%%xmm2                   \n"
253
0
      "pmaddwd   %%xmm1,%%xmm1                   \n"
254
0
      "pmaddwd   %%xmm2,%%xmm2                   \n"
255
0
      "paddd     %%xmm1,%%xmm0                   \n"
256
0
      "paddd     %%xmm2,%%xmm0                   \n"
257
0
      "sub       $0x10,%2                        \n"
258
0
      "jg        1b                              \n"
259
0
260
0
      "pshufd    $0xee,%%xmm0,%%xmm1             \n"
261
0
      "paddd     %%xmm1,%%xmm0                   \n"
262
0
      "pshufd    $0x1,%%xmm0,%%xmm1              \n"
263
0
      "paddd     %%xmm1,%%xmm0                   \n"
264
0
      "movd      %%xmm0,%3                       \n"
265
0
266
0
      : "+r"(src_a),  // %0
267
0
        "+r"(src_b),  // %1
268
0
        "+r"(count),  // %2
269
0
        "=g"(sse)     // %3
270
0
        ::"memory",
271
0
        "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
272
0
  return sse;
273
0
}
274
275
static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0};  // 33 ^ 16
276
static const uvec32 kHashMul0 = {
277
    0x0c3525e1,  // 33 ^ 15
278
    0xa3476dc1,  // 33 ^ 14
279
    0x3b4039a1,  // 33 ^ 13
280
    0x4f5f0981,  // 33 ^ 12
281
};
282
static const uvec32 kHashMul1 = {
283
    0x30f35d61,  // 33 ^ 11
284
    0x855cb541,  // 33 ^ 10
285
    0x040a9121,  // 33 ^ 9
286
    0x747c7101,  // 33 ^ 8
287
};
288
static const uvec32 kHashMul2 = {
289
    0xec41d4e1,  // 33 ^ 7
290
    0x4cfa3cc1,  // 33 ^ 6
291
    0x025528a1,  // 33 ^ 5
292
    0x00121881,  // 33 ^ 4
293
};
294
static const uvec32 kHashMul3 = {
295
    0x00008c61,  // 33 ^ 3
296
    0x00000441,  // 33 ^ 2
297
    0x00000021,  // 33 ^ 1
298
    0x00000001,  // 33 ^ 0
299
};
300
301
0
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
302
0
  uint32_t hash;
303
0
  asm volatile(
304
0
      "movd      %2,%%xmm0                       \n"
305
0
      "pxor      %%xmm7,%%xmm7                   \n"
306
0
      "movdqa    %4,%%xmm6                       \n"
307
0
308
0
      LABELALIGN
309
0
      "1:                                        \n"
310
0
      "movdqu    (%0),%%xmm1                     \n"
311
0
      "lea       0x10(%0),%0                     \n"
312
0
      "pmulld    %%xmm6,%%xmm0                   \n"
313
0
      "movdqa    %5,%%xmm5                       \n"
314
0
      "movdqa    %%xmm1,%%xmm2                   \n"
315
0
      "punpcklbw %%xmm7,%%xmm2                   \n"
316
0
      "movdqa    %%xmm2,%%xmm3                   \n"
317
0
      "punpcklwd %%xmm7,%%xmm3                   \n"
318
0
      "pmulld    %%xmm5,%%xmm3                   \n"
319
0
      "movdqa    %6,%%xmm5                       \n"
320
0
      "movdqa    %%xmm2,%%xmm4                   \n"
321
0
      "punpckhwd %%xmm7,%%xmm4                   \n"
322
0
      "pmulld    %%xmm5,%%xmm4                   \n"
323
0
      "movdqa    %7,%%xmm5                       \n"
324
0
      "punpckhbw %%xmm7,%%xmm1                   \n"
325
0
      "movdqa    %%xmm1,%%xmm2                   \n"
326
0
      "punpcklwd %%xmm7,%%xmm2                   \n"
327
0
      "pmulld    %%xmm5,%%xmm2                   \n"
328
0
      "movdqa    %8,%%xmm5                       \n"
329
0
      "punpckhwd %%xmm7,%%xmm1                   \n"
330
0
      "pmulld    %%xmm5,%%xmm1                   \n"
331
0
      "paddd     %%xmm4,%%xmm3                   \n"
332
0
      "paddd     %%xmm2,%%xmm1                   \n"
333
0
      "paddd     %%xmm3,%%xmm1                   \n"
334
0
      "pshufd    $0xe,%%xmm1,%%xmm2              \n"
335
0
      "paddd     %%xmm2,%%xmm1                   \n"
336
0
      "pshufd    $0x1,%%xmm1,%%xmm2              \n"
337
0
      "paddd     %%xmm2,%%xmm1                   \n"
338
0
      "paddd     %%xmm1,%%xmm0                   \n"
339
0
      "sub       $0x10,%1                        \n"
340
0
      "jg        1b                              \n"
341
0
      "movd      %%xmm0,%3                       \n"
342
0
      : "+r"(src),        // %0
343
0
        "+r"(count),      // %1
344
0
        "+rm"(seed),      // %2
345
0
        "=g"(hash)        // %3
346
0
      : "m"(kHash16x33),  // %4
347
0
        "m"(kHashMul0),   // %5
348
0
        "m"(kHashMul1),   // %6
349
0
        "m"(kHashMul2),   // %7
350
0
        "m"(kHashMul3)    // %8
351
0
      : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
352
0
        "xmm7");
353
0
  return hash;
354
0
}
355
#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
356
357
#ifdef __cplusplus
358
}  // extern "C"
359
}  // namespace libyuv
360
#endif