Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x264/common/dct.c
Line
Count
Source
1
/*****************************************************************************
2
 * dct.c: transform and zigzag
3
 *****************************************************************************
4
 * Copyright (C) 2003-2025 x264 project
5
 *
6
 * Authors: Loren Merritt <lorenm@u.washington.edu>
7
 *          Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Henrik Gramner <henrik@gramner.com>
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23
 *
24
 * This program is also available under a commercial proprietary license.
25
 * For more information, contact us at licensing@x264.com.
26
 *****************************************************************************/
27
28
#include "common.h"
29
#if HAVE_MMX
30
#   include "x86/dct.h"
31
#endif
32
#if HAVE_ALTIVEC
33
#   include "ppc/dct.h"
34
#endif
35
#if HAVE_ARMV6
36
#   include "arm/dct.h"
37
#endif
38
#if HAVE_AARCH64
39
#   include "aarch64/dct.h"
40
#endif
41
#if HAVE_MSA
42
#   include "mips/dct.h"
43
#endif
44
#if HAVE_LSX
45
#   include "loongarch/dct.h"
46
#endif
47
static void dct4x4dc( dctcoef d[16] )
48
27.2k
{
49
27.2k
    dctcoef tmp[16];
50
51
136k
    for( int i = 0; i < 4; i++ )
52
109k
    {
53
109k
        int s01 = d[i*4+0] + d[i*4+1];
54
109k
        int d01 = d[i*4+0] - d[i*4+1];
55
109k
        int s23 = d[i*4+2] + d[i*4+3];
56
109k
        int d23 = d[i*4+2] - d[i*4+3];
57
58
109k
        tmp[0*4+i] = s01 + s23;
59
109k
        tmp[1*4+i] = s01 - s23;
60
109k
        tmp[2*4+i] = d01 - d23;
61
109k
        tmp[3*4+i] = d01 + d23;
62
109k
    }
63
64
136k
    for( int i = 0; i < 4; i++ )
65
109k
    {
66
109k
        int s01 = tmp[i*4+0] + tmp[i*4+1];
67
109k
        int d01 = tmp[i*4+0] - tmp[i*4+1];
68
109k
        int s23 = tmp[i*4+2] + tmp[i*4+3];
69
109k
        int d23 = tmp[i*4+2] - tmp[i*4+3];
70
71
109k
        d[i*4+0] = ( s01 + s23 + 1 ) >> 1;
72
109k
        d[i*4+1] = ( s01 - s23 + 1 ) >> 1;
73
109k
        d[i*4+2] = ( d01 - d23 + 1 ) >> 1;
74
109k
        d[i*4+3] = ( d01 + d23 + 1 ) >> 1;
75
109k
    }
76
27.2k
}
77
78
static void idct4x4dc( dctcoef d[16] )
79
212
{
80
212
    dctcoef tmp[16];
81
82
1.06k
    for( int i = 0; i < 4; i++ )
83
848
    {
84
848
        int s01 = d[i*4+0] + d[i*4+1];
85
848
        int d01 = d[i*4+0] - d[i*4+1];
86
848
        int s23 = d[i*4+2] + d[i*4+3];
87
848
        int d23 = d[i*4+2] - d[i*4+3];
88
89
848
        tmp[0*4+i] = s01 + s23;
90
848
        tmp[1*4+i] = s01 - s23;
91
848
        tmp[2*4+i] = d01 - d23;
92
848
        tmp[3*4+i] = d01 + d23;
93
848
    }
94
95
1.06k
    for( int i = 0; i < 4; i++ )
96
848
    {
97
848
        int s01 = tmp[i*4+0] + tmp[i*4+1];
98
848
        int d01 = tmp[i*4+0] - tmp[i*4+1];
99
848
        int s23 = tmp[i*4+2] + tmp[i*4+3];
100
848
        int d23 = tmp[i*4+2] - tmp[i*4+3];
101
102
848
        d[i*4+0] = s01 + s23;
103
848
        d[i*4+1] = s01 - s23;
104
848
        d[i*4+2] = d01 - d23;
105
848
        d[i*4+3] = d01 + d23;
106
848
    }
107
212
}
108
109
static void dct2x4dc( dctcoef dct[8], dctcoef dct4x4[8][16] )
110
0
{
111
0
    int a0 = dct4x4[0][0] + dct4x4[1][0];
112
0
    int a1 = dct4x4[2][0] + dct4x4[3][0];
113
0
    int a2 = dct4x4[4][0] + dct4x4[5][0];
114
0
    int a3 = dct4x4[6][0] + dct4x4[7][0];
115
0
    int a4 = dct4x4[0][0] - dct4x4[1][0];
116
0
    int a5 = dct4x4[2][0] - dct4x4[3][0];
117
0
    int a6 = dct4x4[4][0] - dct4x4[5][0];
118
0
    int a7 = dct4x4[6][0] - dct4x4[7][0];
119
0
    int b0 = a0 + a1;
120
0
    int b1 = a2 + a3;
121
0
    int b2 = a4 + a5;
122
0
    int b3 = a6 + a7;
123
0
    int b4 = a0 - a1;
124
0
    int b5 = a2 - a3;
125
0
    int b6 = a4 - a5;
126
0
    int b7 = a6 - a7;
127
0
    dct[0] = b0 + b1;
128
0
    dct[1] = b2 + b3;
129
0
    dct[2] = b0 - b1;
130
0
    dct[3] = b2 - b3;
131
0
    dct[4] = b4 - b5;
132
0
    dct[5] = b6 - b7;
133
0
    dct[6] = b4 + b5;
134
0
    dct[7] = b6 + b7;
135
0
    dct4x4[0][0] = 0;
136
0
    dct4x4[1][0] = 0;
137
0
    dct4x4[2][0] = 0;
138
0
    dct4x4[3][0] = 0;
139
0
    dct4x4[4][0] = 0;
140
0
    dct4x4[5][0] = 0;
141
0
    dct4x4[6][0] = 0;
142
0
    dct4x4[7][0] = 0;
143
0
}
144
145
static inline void pixel_sub_wxh( dctcoef *diff, int i_size,
146
                                  pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
147
658k
{
148
3.29M
    for( int y = 0; y < i_size; y++ )
149
2.63M
    {
150
13.1M
        for( int x = 0; x < i_size; x++ )
151
10.5M
            diff[x + y*i_size] = pix1[x] - pix2[x];
152
2.63M
        pix1 += i_pix1;
153
2.63M
        pix2 += i_pix2;
154
2.63M
    }
155
658k
}
156
157
static void sub4x4_dct( dctcoef dct[16], pixel *pix1, pixel *pix2 )
158
658k
{
159
658k
    dctcoef d[16];
160
658k
    dctcoef tmp[16];
161
162
658k
    pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
163
164
3.29M
    for( int i = 0; i < 4; i++ )
165
2.63M
    {
166
2.63M
        int s03 = d[i*4+0] + d[i*4+3];
167
2.63M
        int s12 = d[i*4+1] + d[i*4+2];
168
2.63M
        int d03 = d[i*4+0] - d[i*4+3];
169
2.63M
        int d12 = d[i*4+1] - d[i*4+2];
170
171
2.63M
        tmp[0*4+i] =   s03 +   s12;
172
2.63M
        tmp[1*4+i] = 2*d03 +   d12;
173
2.63M
        tmp[2*4+i] =   s03 -   s12;
174
2.63M
        tmp[3*4+i] =   d03 - 2*d12;
175
2.63M
    }
176
177
3.29M
    for( int i = 0; i < 4; i++ )
178
2.63M
    {
179
2.63M
        int s03 = tmp[i*4+0] + tmp[i*4+3];
180
2.63M
        int s12 = tmp[i*4+1] + tmp[i*4+2];
181
2.63M
        int d03 = tmp[i*4+0] - tmp[i*4+3];
182
2.63M
        int d12 = tmp[i*4+1] - tmp[i*4+2];
183
184
2.63M
        dct[i*4+0] =   s03 +   s12;
185
2.63M
        dct[i*4+1] = 2*d03 +   d12;
186
2.63M
        dct[i*4+2] =   s03 -   s12;
187
2.63M
        dct[i*4+3] =   d03 - 2*d12;
188
2.63M
    }
189
658k
}
190
191
static void sub8x8_dct( dctcoef dct[4][16], pixel *pix1, pixel *pix2 )
192
164k
{
193
164k
    sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
194
164k
    sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
195
164k
    sub4x4_dct( dct[2], &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
196
164k
    sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
197
164k
}
198
199
static void sub16x16_dct( dctcoef dct[16][16], pixel *pix1, pixel *pix2 )
200
27.2k
{
201
27.2k
    sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
202
27.2k
    sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
203
27.2k
    sub8x8_dct( &dct[ 8], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
204
27.2k
    sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
205
27.2k
}
206
207
static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
208
0
{
209
0
    int sum = 0;
210
0
    for( int i=0; i<4; i++, pix1 += FENC_STRIDE, pix2 += FDEC_STRIDE )
211
0
        sum += pix1[0] + pix1[1] + pix1[2] + pix1[3]
212
0
             - pix2[0] - pix2[1] - pix2[2] - pix2[3];
213
0
    return sum;
214
0
}
215
216
static void sub8x8_dct_dc( dctcoef dct[4], pixel *pix1, pixel *pix2 )
217
0
{
218
0
    dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] );
219
0
    dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] );
220
0
    dct[2] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
221
0
    dct[3] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
222
223
    /* 2x2 DC transform */
224
0
    int d0 = dct[0] + dct[1];
225
0
    int d1 = dct[2] + dct[3];
226
0
    int d2 = dct[0] - dct[1];
227
0
    int d3 = dct[2] - dct[3];
228
0
    dct[0] = d0 + d1;
229
0
    dct[1] = d0 - d1;
230
0
    dct[2] = d2 + d3;
231
0
    dct[3] = d2 - d3;
232
0
}
233
234
static void sub8x16_dct_dc( dctcoef dct[8], pixel *pix1, pixel *pix2 )
235
0
{
236
0
    int a0 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+0], &pix2[ 0*FDEC_STRIDE+0] );
237
0
    int a1 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+4], &pix2[ 0*FDEC_STRIDE+4] );
238
0
    int a2 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+0], &pix2[ 4*FDEC_STRIDE+0] );
239
0
    int a3 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+4], &pix2[ 4*FDEC_STRIDE+4] );
240
0
    int a4 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+0], &pix2[ 8*FDEC_STRIDE+0] );
241
0
    int a5 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+4], &pix2[ 8*FDEC_STRIDE+4] );
242
0
    int a6 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+0], &pix2[12*FDEC_STRIDE+0] );
243
0
    int a7 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+4], &pix2[12*FDEC_STRIDE+4] );
244
245
    /* 2x4 DC transform */
246
0
    int b0 = a0 + a1;
247
0
    int b1 = a2 + a3;
248
0
    int b2 = a4 + a5;
249
0
    int b3 = a6 + a7;
250
0
    int b4 = a0 - a1;
251
0
    int b5 = a2 - a3;
252
0
    int b6 = a4 - a5;
253
0
    int b7 = a6 - a7;
254
0
    a0 = b0 + b1;
255
0
    a1 = b2 + b3;
256
0
    a2 = b4 + b5;
257
0
    a3 = b6 + b7;
258
0
    a4 = b0 - b1;
259
0
    a5 = b2 - b3;
260
0
    a6 = b4 - b5;
261
0
    a7 = b6 - b7;
262
0
    dct[0] = a0 + a1;
263
0
    dct[1] = a2 + a3;
264
0
    dct[2] = a0 - a1;
265
0
    dct[3] = a2 - a3;
266
0
    dct[4] = a4 - a5;
267
0
    dct[5] = a6 - a7;
268
0
    dct[6] = a4 + a5;
269
0
    dct[7] = a6 + a7;
270
0
}
271
272
static void add4x4_idct( pixel *p_dst, dctcoef dct[16] )
273
114
{
274
114
    dctcoef d[16];
275
114
    dctcoef tmp[16];
276
277
570
    for( int i = 0; i < 4; i++ )
278
456
    {
279
456
        int s02 =  dct[0*4+i]     +  dct[2*4+i];
280
456
        int d02 =  dct[0*4+i]     -  dct[2*4+i];
281
456
        int s13 =  dct[1*4+i]     + (dct[3*4+i]>>1);
282
456
        int d13 = (dct[1*4+i]>>1) -  dct[3*4+i];
283
284
456
        tmp[i*4+0] = s02 + s13;
285
456
        tmp[i*4+1] = d02 + d13;
286
456
        tmp[i*4+2] = d02 - d13;
287
456
        tmp[i*4+3] = s02 - s13;
288
456
    }
289
290
570
    for( int i = 0; i < 4; i++ )
291
456
    {
292
456
        int s02 =  tmp[0*4+i]     +  tmp[2*4+i];
293
456
        int d02 =  tmp[0*4+i]     -  tmp[2*4+i];
294
456
        int s13 =  tmp[1*4+i]     + (tmp[3*4+i]>>1);
295
456
        int d13 = (tmp[1*4+i]>>1) -  tmp[3*4+i];
296
297
456
        d[0*4+i] = ( s02 + s13 + 32 ) >> 6;
298
456
        d[1*4+i] = ( d02 + d13 + 32 ) >> 6;
299
456
        d[2*4+i] = ( d02 - d13 + 32 ) >> 6;
300
456
        d[3*4+i] = ( s02 - s13 + 32 ) >> 6;
301
456
    }
302
303
304
570
    for( int y = 0; y < 4; y++ )
305
456
    {
306
2.28k
        for( int x = 0; x < 4; x++ )
307
1.82k
            p_dst[x] = x264_clip_pixel( p_dst[x] + d[y*4+x] );
308
456
        p_dst += FDEC_STRIDE;
309
456
    }
310
114
}
311
312
static void add8x8_idct( pixel *p_dst, dctcoef dct[4][16] )
313
0
{
314
0
    add4x4_idct( &p_dst[0],               dct[0] );
315
0
    add4x4_idct( &p_dst[4],               dct[1] );
316
0
    add4x4_idct( &p_dst[4*FDEC_STRIDE+0], dct[2] );
317
0
    add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
318
0
}
319
320
static void add16x16_idct( pixel *p_dst, dctcoef dct[16][16] )
321
0
{
322
0
    add8x8_idct( &p_dst[0],               &dct[0] );
323
0
    add8x8_idct( &p_dst[8],               &dct[4] );
324
0
    add8x8_idct( &p_dst[8*FDEC_STRIDE+0], &dct[8] );
325
0
    add8x8_idct( &p_dst[8*FDEC_STRIDE+8], &dct[12] );
326
0
}
327
328
/****************************************************************************
329
 * 8x8 transform:
330
 ****************************************************************************/
331
332
0
#define DCT8_1D {\
333
0
    int s07 = SRC(0) + SRC(7);\
334
0
    int s16 = SRC(1) + SRC(6);\
335
0
    int s25 = SRC(2) + SRC(5);\
336
0
    int s34 = SRC(3) + SRC(4);\
337
0
    int a0 = s07 + s34;\
338
0
    int a1 = s16 + s25;\
339
0
    int a2 = s07 - s34;\
340
0
    int a3 = s16 - s25;\
341
0
    int d07 = SRC(0) - SRC(7);\
342
0
    int d16 = SRC(1) - SRC(6);\
343
0
    int d25 = SRC(2) - SRC(5);\
344
0
    int d34 = SRC(3) - SRC(4);\
345
0
    int a4 = d16 + d25 + (d07 + (d07>>1));\
346
0
    int a5 = d07 - d34 - (d25 + (d25>>1));\
347
0
    int a6 = d07 + d34 - (d16 + (d16>>1));\
348
0
    int a7 = d16 - d25 + (d34 + (d34>>1));\
349
0
    DST(0) =  a0 + a1     ;\
350
0
    DST(1) =  a4 + (a7>>2);\
351
0
    DST(2) =  a2 + (a3>>1);\
352
0
    DST(3) =  a5 + (a6>>2);\
353
0
    DST(4) =  a0 - a1     ;\
354
0
    DST(5) =  a6 - (a5>>2);\
355
0
    DST(6) = (a2>>1) - a3 ;\
356
0
    DST(7) = (a4>>2) - a7 ;\
357
0
}
358
359
static void sub8x8_dct8( dctcoef dct[64], pixel *pix1, pixel *pix2 )
360
0
{
361
0
    dctcoef tmp[64];
362
363
0
    pixel_sub_wxh( tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
364
365
0
#define SRC(x) tmp[x*8+i]
366
0
#define DST(x) tmp[x*8+i]
367
0
    for( int i = 0; i < 8; i++ )
368
0
        DCT8_1D
369
0
#undef SRC
370
0
#undef DST
371
372
0
#define SRC(x) tmp[i*8+x]
373
0
#define DST(x) dct[x*8+i]
374
0
    for( int i = 0; i < 8; i++ )
375
0
        DCT8_1D
376
0
#undef SRC
377
0
#undef DST
378
0
}
379
380
static void sub16x16_dct8( dctcoef dct[4][64], pixel *pix1, pixel *pix2 )
381
0
{
382
0
    sub8x8_dct8( dct[0], &pix1[0],               &pix2[0] );
383
0
    sub8x8_dct8( dct[1], &pix1[8],               &pix2[8] );
384
0
    sub8x8_dct8( dct[2], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
385
0
    sub8x8_dct8( dct[3], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
386
0
}
387
388
0
#define IDCT8_1D {\
389
0
    int a0 =  SRC(0) + SRC(4);\
390
0
    int a2 =  SRC(0) - SRC(4);\
391
0
    int a4 = (SRC(2)>>1) - SRC(6);\
392
0
    int a6 = (SRC(6)>>1) + SRC(2);\
393
0
    int b0 = a0 + a6;\
394
0
    int b2 = a2 + a4;\
395
0
    int b4 = a2 - a4;\
396
0
    int b6 = a0 - a6;\
397
0
    int a1 = -SRC(3) + SRC(5) - SRC(7) - (SRC(7)>>1);\
398
0
    int a3 =  SRC(1) + SRC(7) - SRC(3) - (SRC(3)>>1);\
399
0
    int a5 = -SRC(1) + SRC(7) + SRC(5) + (SRC(5)>>1);\
400
0
    int a7 =  SRC(3) + SRC(5) + SRC(1) + (SRC(1)>>1);\
401
0
    int b1 = (a7>>2) + a1;\
402
0
    int b3 =  a3 + (a5>>2);\
403
0
    int b5 = (a3>>2) - a5;\
404
0
    int b7 =  a7 - (a1>>2);\
405
0
    DST(0, b0 + b7);\
406
0
    DST(1, b2 + b5);\
407
0
    DST(2, b4 + b3);\
408
0
    DST(3, b6 + b1);\
409
0
    DST(4, b6 - b1);\
410
0
    DST(5, b4 - b3);\
411
0
    DST(6, b2 - b5);\
412
0
    DST(7, b0 - b7);\
413
0
}
414
415
static void add8x8_idct8( pixel *dst, dctcoef dct[64] )
416
0
{
417
0
    dct[0] += 32; // rounding for the >>6 at the end
418
419
0
#define SRC(x)     dct[x*8+i]
420
0
#define DST(x,rhs) dct[x*8+i] = (rhs)
421
0
    for( int i = 0; i < 8; i++ )
422
0
        IDCT8_1D
423
0
#undef SRC
424
0
#undef DST
425
426
0
#define SRC(x)     dct[i*8+x]
427
0
#define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_pixel( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
428
0
    for( int i = 0; i < 8; i++ )
429
0
        IDCT8_1D
430
0
#undef SRC
431
0
#undef DST
432
0
}
433
434
static void add16x16_idct8( pixel *dst, dctcoef dct[4][64] )
435
0
{
436
0
    add8x8_idct8( &dst[0],               dct[0] );
437
0
    add8x8_idct8( &dst[8],               dct[1] );
438
0
    add8x8_idct8( &dst[8*FDEC_STRIDE+0], dct[2] );
439
0
    add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
440
0
}
441
442
static inline void add4x4_idct_dc( pixel *p_dst, dctcoef dc )
443
7.44k
{
444
7.44k
    dc = (dc + 32) >> 6;
445
37.2k
    for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
446
29.7k
    {
447
29.7k
        p_dst[0] = x264_clip_pixel( p_dst[0] + dc );
448
29.7k
        p_dst[1] = x264_clip_pixel( p_dst[1] + dc );
449
29.7k
        p_dst[2] = x264_clip_pixel( p_dst[2] + dc );
450
29.7k
        p_dst[3] = x264_clip_pixel( p_dst[3] + dc );
451
29.7k
    }
452
7.44k
}
453
454
static void add8x8_idct_dc( pixel *p_dst, dctcoef dct[4] )
455
1.01k
{
456
1.01k
    add4x4_idct_dc( &p_dst[0],               dct[0] );
457
1.01k
    add4x4_idct_dc( &p_dst[4],               dct[1] );
458
1.01k
    add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+0], dct[2] );
459
1.01k
    add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+4], dct[3] );
460
1.01k
}
461
462
static void add16x16_idct_dc( pixel *p_dst, dctcoef dct[16] )
463
212
{
464
1.06k
    for( int i = 0; i < 4; i++, dct += 4, p_dst += 4*FDEC_STRIDE )
465
848
    {
466
848
        add4x4_idct_dc( &p_dst[ 0], dct[0] );
467
848
        add4x4_idct_dc( &p_dst[ 4], dct[1] );
468
848
        add4x4_idct_dc( &p_dst[ 8], dct[2] );
469
848
        add4x4_idct_dc( &p_dst[12], dct[3] );
470
848
    }
471
212
}
472
473
474
/****************************************************************************
475
 * x264_dct_init:
476
 ****************************************************************************/
477
void x264_dct_init( uint32_t cpu, x264_dct_function_t *dctf )
478
158
{
479
158
    dctf->sub4x4_dct    = sub4x4_dct;
480
158
    dctf->add4x4_idct   = add4x4_idct;
481
482
158
    dctf->sub8x8_dct    = sub8x8_dct;
483
158
    dctf->sub8x8_dct_dc = sub8x8_dct_dc;
484
158
    dctf->add8x8_idct   = add8x8_idct;
485
158
    dctf->add8x8_idct_dc = add8x8_idct_dc;
486
487
158
    dctf->sub8x16_dct_dc = sub8x16_dct_dc;
488
489
158
    dctf->sub16x16_dct  = sub16x16_dct;
490
158
    dctf->add16x16_idct = add16x16_idct;
491
158
    dctf->add16x16_idct_dc = add16x16_idct_dc;
492
493
158
    dctf->sub8x8_dct8   = sub8x8_dct8;
494
158
    dctf->add8x8_idct8  = add8x8_idct8;
495
496
158
    dctf->sub16x16_dct8  = sub16x16_dct8;
497
158
    dctf->add16x16_idct8 = add16x16_idct8;
498
499
158
    dctf->dct4x4dc  = dct4x4dc;
500
158
    dctf->idct4x4dc = idct4x4dc;
501
502
158
    dctf->dct2x4dc = dct2x4dc;
503
504
#if HIGH_BIT_DEPTH
505
#if HAVE_MMX
506
    if( cpu&X264_CPU_MMX )
507
    {
508
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
509
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
510
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
511
    }
512
    if( cpu&X264_CPU_SSE2 )
513
    {
514
        dctf->add4x4_idct     = x264_add4x4_idct_sse2;
515
        dctf->dct4x4dc        = x264_dct4x4dc_sse2;
516
        dctf->idct4x4dc       = x264_idct4x4dc_sse2;
517
        dctf->dct2x4dc        = x264_dct2x4dc_sse2;
518
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse2;
519
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse2;
520
        dctf->add8x8_idct     = x264_add8x8_idct_sse2;
521
        dctf->add16x16_idct   = x264_add16x16_idct_sse2;
522
        dctf->add8x8_idct8    = x264_add8x8_idct8_sse2;
523
        dctf->add16x16_idct8    = x264_add16x16_idct8_sse2;
524
        dctf->sub8x8_dct_dc   = x264_sub8x8_dct_dc_sse2;
525
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_sse2;
526
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_sse2;
527
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_sse2;
528
    }
529
    if( cpu&X264_CPU_SSE4 )
530
    {
531
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse4;
532
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse4;
533
    }
534
    if( cpu&X264_CPU_AVX )
535
    {
536
        dctf->add4x4_idct     = x264_add4x4_idct_avx;
537
        dctf->dct4x4dc        = x264_dct4x4dc_avx;
538
        dctf->idct4x4dc       = x264_idct4x4dc_avx;
539
        dctf->dct2x4dc        = x264_dct2x4dc_avx;
540
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_avx;
541
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_avx;
542
        dctf->add8x8_idct     = x264_add8x8_idct_avx;
543
        dctf->add16x16_idct   = x264_add16x16_idct_avx;
544
        dctf->add8x8_idct8    = x264_add8x8_idct8_avx;
545
        dctf->add16x16_idct8  = x264_add16x16_idct8_avx;
546
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_avx;
547
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_avx;
548
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_avx;
549
    }
550
#endif // HAVE_MMX
551
#else // !HIGH_BIT_DEPTH
552
#if HAVE_MMX
553
    if( cpu&X264_CPU_MMX )
554
    {
555
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
556
        dctf->add4x4_idct   = x264_add4x4_idct_mmx;
557
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
558
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
559
560
#if !ARCH_X86_64
561
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
562
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
563
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
564
        dctf->add16x16_idct = x264_add16x16_idct_mmx;
565
566
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_mmx;
567
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
568
        dctf->add8x8_idct8  = x264_add8x8_idct8_mmx;
569
        dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
570
#endif
571
    }
572
573
    if( cpu&X264_CPU_MMX2 )
574
    {
575
        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
576
        dctf->dct2x4dc         = x264_dct2x4dc_mmx2;
577
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
578
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
579
    }
580
581
    if( cpu&X264_CPU_SSE2 )
582
    {
583
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_sse2;
584
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
585
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_sse2;
586
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_sse2;
587
        dctf->add8x8_idct8  = x264_add8x8_idct8_sse2;
588
        dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
589
590
        if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
591
        {
592
            dctf->sub8x8_dct    = x264_sub8x8_dct_sse2;
593
            dctf->sub16x16_dct  = x264_sub16x16_dct_sse2;
594
            dctf->add8x8_idct   = x264_add8x8_idct_sse2;
595
            dctf->add16x16_idct = x264_add16x16_idct_sse2;
596
            dctf->add16x16_idct_dc = x264_add16x16_idct_dc_sse2;
597
        }
598
    }
599
600
    if( (cpu&X264_CPU_SSSE3) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
601
    {
602
        dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_ssse3;
603
        if( !(cpu&X264_CPU_SLOW_ATOM) )
604
        {
605
            dctf->sub4x4_dct    = x264_sub4x4_dct_ssse3;
606
            dctf->sub8x8_dct    = x264_sub8x8_dct_ssse3;
607
            dctf->sub16x16_dct  = x264_sub16x16_dct_ssse3;
608
            dctf->sub8x8_dct8   = x264_sub8x8_dct8_ssse3;
609
            dctf->sub16x16_dct8 = x264_sub16x16_dct8_ssse3;
610
            if( !(cpu&X264_CPU_SLOW_PSHUFB) )
611
            {
612
                dctf->add8x8_idct_dc = x264_add8x8_idct_dc_ssse3;
613
                dctf->add16x16_idct_dc = x264_add16x16_idct_dc_ssse3;
614
            }
615
        }
616
    }
617
618
    if( cpu&X264_CPU_SSE4 )
619
        dctf->add4x4_idct   = x264_add4x4_idct_sse4;
620
621
    if( cpu&X264_CPU_AVX )
622
    {
623
        dctf->add4x4_idct      = x264_add4x4_idct_avx;
624
        dctf->add8x8_idct      = x264_add8x8_idct_avx;
625
        dctf->add16x16_idct    = x264_add16x16_idct_avx;
626
        dctf->add8x8_idct8     = x264_add8x8_idct8_avx;
627
        dctf->add16x16_idct8   = x264_add16x16_idct8_avx;
628
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx;
629
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx;
630
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx;
631
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_avx;
632
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx;
633
    }
634
635
    if( cpu&X264_CPU_XOP )
636
    {
637
        dctf->sub8x8_dct       = x264_sub8x8_dct_xop;
638
        dctf->sub16x16_dct     = x264_sub16x16_dct_xop;
639
    }
640
641
    if( cpu&X264_CPU_AVX2 )
642
    {
643
        dctf->add8x8_idct      = x264_add8x8_idct_avx2;
644
        dctf->add16x16_idct    = x264_add16x16_idct_avx2;
645
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
646
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
647
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
648
#if ARCH_X86_64
649
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
650
#endif
651
    }
652
653
    if( cpu&X264_CPU_AVX512 )
654
    {
655
        dctf->sub4x4_dct       = x264_sub4x4_dct_avx512;
656
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx512;
657
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx512;
658
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_avx512;
659
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_avx512;
660
        dctf->add8x8_idct      = x264_add8x8_idct_avx512;
661
    }
662
#endif //HAVE_MMX
663
664
#if HAVE_ALTIVEC
665
    if( cpu&X264_CPU_ALTIVEC )
666
    {
667
        dctf->sub4x4_dct    = x264_sub4x4_dct_altivec;
668
        dctf->sub8x8_dct    = x264_sub8x8_dct_altivec;
669
        dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
670
671
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
672
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec;
673
674
        dctf->add4x4_idct   = x264_add4x4_idct_altivec;
675
        dctf->add8x8_idct   = x264_add8x8_idct_altivec;
676
        dctf->add16x16_idct = x264_add16x16_idct_altivec;
677
678
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_altivec;
679
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_altivec;
680
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
681
682
        dctf->add8x8_idct8  = x264_add8x8_idct8_altivec;
683
        dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
684
    }
685
#endif
686
687
#if HAVE_ARMV6 || HAVE_AARCH64
688
    if( cpu&X264_CPU_NEON )
689
    {
690
        dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
691
        dctf->sub8x8_dct    = x264_sub8x8_dct_neon;
692
        dctf->sub16x16_dct  = x264_sub16x16_dct_neon;
693
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_neon;
694
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_neon;
695
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_neon;
696
        dctf->dct4x4dc      = x264_dct4x4dc_neon;
697
        dctf->idct4x4dc     = x264_idct4x4dc_neon;
698
699
        dctf->add4x4_idct   = x264_add4x4_idct_neon;
700
        dctf->add8x8_idct   = x264_add8x8_idct_neon;
701
        dctf->add16x16_idct = x264_add16x16_idct_neon;
702
703
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_neon;
704
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_neon;
705
706
        dctf->add8x8_idct8  = x264_add8x8_idct8_neon;
707
        dctf->add16x16_idct8= x264_add16x16_idct8_neon;
708
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_neon;
709
    }
710
#if HAVE_SVE
711
    if ( cpu&X264_CPU_SVE )
712
    {
713
        dctf->sub4x4_dct    = x264_sub4x4_dct_sve;
714
    }
715
#endif
716
#if HAVE_SVE2
717
    if ( cpu&X264_CPU_SVE2 )
718
    {
719
        dctf->add4x4_idct   = x264_add4x4_idct_sve2;
720
    }
721
#endif
722
#endif
723
724
#if HAVE_MSA
725
    if( cpu&X264_CPU_MSA )
726
    {
727
        dctf->sub4x4_dct       = x264_sub4x4_dct_msa;
728
        dctf->sub8x8_dct       = x264_sub8x8_dct_msa;
729
        dctf->sub16x16_dct     = x264_sub16x16_dct_msa;
730
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_msa;
731
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_msa;
732
        dctf->dct4x4dc         = x264_dct4x4dc_msa;
733
        dctf->idct4x4dc        = x264_idct4x4dc_msa;
734
        dctf->add4x4_idct      = x264_add4x4_idct_msa;
735
        dctf->add8x8_idct      = x264_add8x8_idct_msa;
736
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_msa;
737
        dctf->add16x16_idct    = x264_add16x16_idct_msa;
738
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_msa;
739
        dctf->add8x8_idct8     = x264_add8x8_idct8_msa;
740
        dctf->add16x16_idct8   = x264_add16x16_idct8_msa;
741
    }
742
#endif
743
744
#if HAVE_LSX
745
    if( cpu&X264_CPU_LSX )
746
    {
747
        dctf->sub4x4_dct       = x264_sub4x4_dct_lsx;
748
        dctf->add4x4_idct      = x264_add4x4_idct_lsx;
749
        dctf->dct4x4dc         = x264_dct4x4dc_lsx;
750
        dctf->idct4x4dc        = x264_idct4x4dc_lsx;
751
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_lsx;
752
        dctf->sub8x8_dct       = x264_sub8x8_dct_lsx;
753
        dctf->add8x8_idct      = x264_add8x8_idct_lsx;
754
        dctf->add8x8_idct8     = x264_add8x8_idct8_lsx;
755
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lsx;
756
        dctf->add16x16_idct    = x264_add16x16_idct_lsx;
757
        dctf->sub16x16_dct     = x264_sub16x16_dct_lsx;
758
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lsx;
759
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lsx;
760
    }
761
    if( cpu&X264_CPU_LASX )
762
    {
763
        dctf->sub8x8_dct       = x264_sub8x8_dct_lasx;
764
        dctf->sub16x16_dct     = x264_sub16x16_dct_lasx;
765
        dctf->add8x8_idct      = x264_add8x8_idct_lasx;
766
        dctf->add8x8_idct8     = x264_add8x8_idct8_lasx;
767
        dctf->add16x16_idct    = x264_add16x16_idct_lasx;
768
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lasx;
769
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lasx;
770
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lasx;
771
        dctf->dct4x4dc         = x264_dct4x4dc_lasx;
772
        dctf->idct4x4dc        = x264_idct4x4dc_lasx;
773
    }
774
#endif
775
776
#endif // HIGH_BIT_DEPTH
777
158
}
x264_8_dct_init
Line
Count
Source
478
158
{
479
158
    dctf->sub4x4_dct    = sub4x4_dct;
480
158
    dctf->add4x4_idct   = add4x4_idct;
481
482
158
    dctf->sub8x8_dct    = sub8x8_dct;
483
158
    dctf->sub8x8_dct_dc = sub8x8_dct_dc;
484
158
    dctf->add8x8_idct   = add8x8_idct;
485
158
    dctf->add8x8_idct_dc = add8x8_idct_dc;
486
487
158
    dctf->sub8x16_dct_dc = sub8x16_dct_dc;
488
489
158
    dctf->sub16x16_dct  = sub16x16_dct;
490
158
    dctf->add16x16_idct = add16x16_idct;
491
158
    dctf->add16x16_idct_dc = add16x16_idct_dc;
492
493
158
    dctf->sub8x8_dct8   = sub8x8_dct8;
494
158
    dctf->add8x8_idct8  = add8x8_idct8;
495
496
158
    dctf->sub16x16_dct8  = sub16x16_dct8;
497
158
    dctf->add16x16_idct8 = add16x16_idct8;
498
499
158
    dctf->dct4x4dc  = dct4x4dc;
500
158
    dctf->idct4x4dc = idct4x4dc;
501
502
158
    dctf->dct2x4dc = dct2x4dc;
503
504
#if HIGH_BIT_DEPTH
505
#if HAVE_MMX
506
    if( cpu&X264_CPU_MMX )
507
    {
508
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
509
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
510
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
511
    }
512
    if( cpu&X264_CPU_SSE2 )
513
    {
514
        dctf->add4x4_idct     = x264_add4x4_idct_sse2;
515
        dctf->dct4x4dc        = x264_dct4x4dc_sse2;
516
        dctf->idct4x4dc       = x264_idct4x4dc_sse2;
517
        dctf->dct2x4dc        = x264_dct2x4dc_sse2;
518
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse2;
519
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse2;
520
        dctf->add8x8_idct     = x264_add8x8_idct_sse2;
521
        dctf->add16x16_idct   = x264_add16x16_idct_sse2;
522
        dctf->add8x8_idct8    = x264_add8x8_idct8_sse2;
523
        dctf->add16x16_idct8    = x264_add16x16_idct8_sse2;
524
        dctf->sub8x8_dct_dc   = x264_sub8x8_dct_dc_sse2;
525
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_sse2;
526
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_sse2;
527
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_sse2;
528
    }
529
    if( cpu&X264_CPU_SSE4 )
530
    {
531
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse4;
532
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse4;
533
    }
534
    if( cpu&X264_CPU_AVX )
535
    {
536
        dctf->add4x4_idct     = x264_add4x4_idct_avx;
537
        dctf->dct4x4dc        = x264_dct4x4dc_avx;
538
        dctf->idct4x4dc       = x264_idct4x4dc_avx;
539
        dctf->dct2x4dc        = x264_dct2x4dc_avx;
540
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_avx;
541
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_avx;
542
        dctf->add8x8_idct     = x264_add8x8_idct_avx;
543
        dctf->add16x16_idct   = x264_add16x16_idct_avx;
544
        dctf->add8x8_idct8    = x264_add8x8_idct8_avx;
545
        dctf->add16x16_idct8  = x264_add16x16_idct8_avx;
546
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_avx;
547
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_avx;
548
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_avx;
549
    }
550
#endif // HAVE_MMX
551
#else // !HIGH_BIT_DEPTH
552
#if HAVE_MMX
553
    if( cpu&X264_CPU_MMX )
554
    {
555
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
556
        dctf->add4x4_idct   = x264_add4x4_idct_mmx;
557
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
558
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
559
560
#if !ARCH_X86_64
561
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
562
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
563
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
564
        dctf->add16x16_idct = x264_add16x16_idct_mmx;
565
566
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_mmx;
567
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
568
        dctf->add8x8_idct8  = x264_add8x8_idct8_mmx;
569
        dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
570
#endif
571
    }
572
573
    if( cpu&X264_CPU_MMX2 )
574
    {
575
        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
576
        dctf->dct2x4dc         = x264_dct2x4dc_mmx2;
577
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
578
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
579
    }
580
581
    if( cpu&X264_CPU_SSE2 )
582
    {
583
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_sse2;
584
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
585
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_sse2;
586
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_sse2;
587
        dctf->add8x8_idct8  = x264_add8x8_idct8_sse2;
588
        dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
589
590
        if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
591
        {
592
            dctf->sub8x8_dct    = x264_sub8x8_dct_sse2;
593
            dctf->sub16x16_dct  = x264_sub16x16_dct_sse2;
594
            dctf->add8x8_idct   = x264_add8x8_idct_sse2;
595
            dctf->add16x16_idct = x264_add16x16_idct_sse2;
596
            dctf->add16x16_idct_dc = x264_add16x16_idct_dc_sse2;
597
        }
598
    }
599
600
    if( (cpu&X264_CPU_SSSE3) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
601
    {
602
        dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_ssse3;
603
        if( !(cpu&X264_CPU_SLOW_ATOM) )
604
        {
605
            dctf->sub4x4_dct    = x264_sub4x4_dct_ssse3;
606
            dctf->sub8x8_dct    = x264_sub8x8_dct_ssse3;
607
            dctf->sub16x16_dct  = x264_sub16x16_dct_ssse3;
608
            dctf->sub8x8_dct8   = x264_sub8x8_dct8_ssse3;
609
            dctf->sub16x16_dct8 = x264_sub16x16_dct8_ssse3;
610
            if( !(cpu&X264_CPU_SLOW_PSHUFB) )
611
            {
612
                dctf->add8x8_idct_dc = x264_add8x8_idct_dc_ssse3;
613
                dctf->add16x16_idct_dc = x264_add16x16_idct_dc_ssse3;
614
            }
615
        }
616
    }
617
618
    if( cpu&X264_CPU_SSE4 )
619
        dctf->add4x4_idct   = x264_add4x4_idct_sse4;
620
621
    if( cpu&X264_CPU_AVX )
622
    {
623
        dctf->add4x4_idct      = x264_add4x4_idct_avx;
624
        dctf->add8x8_idct      = x264_add8x8_idct_avx;
625
        dctf->add16x16_idct    = x264_add16x16_idct_avx;
626
        dctf->add8x8_idct8     = x264_add8x8_idct8_avx;
627
        dctf->add16x16_idct8   = x264_add16x16_idct8_avx;
628
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx;
629
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx;
630
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx;
631
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_avx;
632
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx;
633
    }
634
635
    if( cpu&X264_CPU_XOP )
636
    {
637
        dctf->sub8x8_dct       = x264_sub8x8_dct_xop;
638
        dctf->sub16x16_dct     = x264_sub16x16_dct_xop;
639
    }
640
641
    if( cpu&X264_CPU_AVX2 )
642
    {
643
        dctf->add8x8_idct      = x264_add8x8_idct_avx2;
644
        dctf->add16x16_idct    = x264_add16x16_idct_avx2;
645
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
646
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
647
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
648
#if ARCH_X86_64
649
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
650
#endif
651
    }
652
653
    if( cpu&X264_CPU_AVX512 )
654
    {
655
        dctf->sub4x4_dct       = x264_sub4x4_dct_avx512;
656
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx512;
657
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx512;
658
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_avx512;
659
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_avx512;
660
        dctf->add8x8_idct      = x264_add8x8_idct_avx512;
661
    }
662
#endif //HAVE_MMX
663
664
#if HAVE_ALTIVEC
665
    if( cpu&X264_CPU_ALTIVEC )
666
    {
667
        dctf->sub4x4_dct    = x264_sub4x4_dct_altivec;
668
        dctf->sub8x8_dct    = x264_sub8x8_dct_altivec;
669
        dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
670
671
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
672
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec;
673
674
        dctf->add4x4_idct   = x264_add4x4_idct_altivec;
675
        dctf->add8x8_idct   = x264_add8x8_idct_altivec;
676
        dctf->add16x16_idct = x264_add16x16_idct_altivec;
677
678
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_altivec;
679
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_altivec;
680
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
681
682
        dctf->add8x8_idct8  = x264_add8x8_idct8_altivec;
683
        dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
684
    }
685
#endif
686
687
#if HAVE_ARMV6 || HAVE_AARCH64
688
    if( cpu&X264_CPU_NEON )
689
    {
690
        dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
691
        dctf->sub8x8_dct    = x264_sub8x8_dct_neon;
692
        dctf->sub16x16_dct  = x264_sub16x16_dct_neon;
693
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_neon;
694
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_neon;
695
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_neon;
696
        dctf->dct4x4dc      = x264_dct4x4dc_neon;
697
        dctf->idct4x4dc     = x264_idct4x4dc_neon;
698
699
        dctf->add4x4_idct   = x264_add4x4_idct_neon;
700
        dctf->add8x8_idct   = x264_add8x8_idct_neon;
701
        dctf->add16x16_idct = x264_add16x16_idct_neon;
702
703
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_neon;
704
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_neon;
705
706
        dctf->add8x8_idct8  = x264_add8x8_idct8_neon;
707
        dctf->add16x16_idct8= x264_add16x16_idct8_neon;
708
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_neon;
709
    }
710
#if HAVE_SVE
711
    if ( cpu&X264_CPU_SVE )
712
    {
713
        dctf->sub4x4_dct    = x264_sub4x4_dct_sve;
714
    }
715
#endif
716
#if HAVE_SVE2
717
    if ( cpu&X264_CPU_SVE2 )
718
    {
719
        dctf->add4x4_idct   = x264_add4x4_idct_sve2;
720
    }
721
#endif
722
#endif
723
724
#if HAVE_MSA
725
    if( cpu&X264_CPU_MSA )
726
    {
727
        dctf->sub4x4_dct       = x264_sub4x4_dct_msa;
728
        dctf->sub8x8_dct       = x264_sub8x8_dct_msa;
729
        dctf->sub16x16_dct     = x264_sub16x16_dct_msa;
730
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_msa;
731
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_msa;
732
        dctf->dct4x4dc         = x264_dct4x4dc_msa;
733
        dctf->idct4x4dc        = x264_idct4x4dc_msa;
734
        dctf->add4x4_idct      = x264_add4x4_idct_msa;
735
        dctf->add8x8_idct      = x264_add8x8_idct_msa;
736
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_msa;
737
        dctf->add16x16_idct    = x264_add16x16_idct_msa;
738
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_msa;
739
        dctf->add8x8_idct8     = x264_add8x8_idct8_msa;
740
        dctf->add16x16_idct8   = x264_add16x16_idct8_msa;
741
    }
742
#endif
743
744
#if HAVE_LSX
745
    if( cpu&X264_CPU_LSX )
746
    {
747
        dctf->sub4x4_dct       = x264_sub4x4_dct_lsx;
748
        dctf->add4x4_idct      = x264_add4x4_idct_lsx;
749
        dctf->dct4x4dc         = x264_dct4x4dc_lsx;
750
        dctf->idct4x4dc        = x264_idct4x4dc_lsx;
751
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_lsx;
752
        dctf->sub8x8_dct       = x264_sub8x8_dct_lsx;
753
        dctf->add8x8_idct      = x264_add8x8_idct_lsx;
754
        dctf->add8x8_idct8     = x264_add8x8_idct8_lsx;
755
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lsx;
756
        dctf->add16x16_idct    = x264_add16x16_idct_lsx;
757
        dctf->sub16x16_dct     = x264_sub16x16_dct_lsx;
758
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lsx;
759
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lsx;
760
    }
761
    if( cpu&X264_CPU_LASX )
762
    {
763
        dctf->sub8x8_dct       = x264_sub8x8_dct_lasx;
764
        dctf->sub16x16_dct     = x264_sub16x16_dct_lasx;
765
        dctf->add8x8_idct      = x264_add8x8_idct_lasx;
766
        dctf->add8x8_idct8     = x264_add8x8_idct8_lasx;
767
        dctf->add16x16_idct    = x264_add16x16_idct_lasx;
768
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lasx;
769
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lasx;
770
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lasx;
771
        dctf->dct4x4dc         = x264_dct4x4dc_lasx;
772
        dctf->idct4x4dc        = x264_idct4x4dc_lasx;
773
    }
774
#endif
775
776
158
#endif // HIGH_BIT_DEPTH
777
158
}
Unexecuted instantiation: x264_10_dct_init
778
779
780
0
#define ZIG(i,y,x) level[i] = dct[x*8+y];
781
#define ZIGZAG8_FRAME\
782
0
    ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
783
0
    ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
784
0
    ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)\
785
0
    ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)\
786
0
    ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)\
787
0
    ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)\
788
0
    ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)\
789
0
    ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)\
790
0
    ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)\
791
0
    ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)\
792
0
    ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)\
793
0
    ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)\
794
0
    ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)\
795
0
    ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)\
796
0
    ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)\
797
0
    ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)\
798
799
#define ZIGZAG8_FIELD\
800
0
    ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)\
801
0
    ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)\
802
0
    ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)\
803
0
    ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)\
804
0
    ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)\
805
0
    ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)\
806
0
    ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)\
807
0
    ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)\
808
0
    ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)\
809
0
    ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)\
810
0
    ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)\
811
0
    ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)\
812
0
    ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)\
813
0
    ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)\
814
0
    ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)\
815
0
    ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
816
817
#define ZIGZAG4_FRAME\
818
381k
    ZIGDC( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
819
381k
    ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
820
381k
    ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)\
821
381k
    ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
822
823
#define ZIGZAG4_FIELD\
824
0
    ZIGDC( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)\
825
0
    ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)\
826
0
    ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\
827
0
    ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
828
829
static void zigzag_scan_8x8_frame( dctcoef level[64], dctcoef dct[64] )
830
0
{
831
0
    ZIGZAG8_FRAME
832
0
}
833
834
static void zigzag_scan_8x8_field( dctcoef level[64], dctcoef dct[64] )
835
0
{
836
0
    ZIGZAG8_FIELD
837
0
}
838
839
#undef ZIG
840
248k
#define ZIG(i,y,x) level[i] = dct[x*4+y];
841
23.3k
#define ZIGDC(i,y,x) ZIG(i,y,x)
842
843
static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
844
15.5k
{
845
15.5k
    ZIGZAG4_FRAME
846
15.5k
}
847
848
static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
849
0
{
850
0
    memcpy( level, dct, 2 * sizeof(dctcoef) );
851
0
    ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
852
0
    memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
853
0
}
854
855
#undef ZIG
856
5.50M
#define ZIG(i,y,x) {\
857
5.50M
    int oe = x+y*FENC_STRIDE;\
858
5.50M
    int od = x+y*FDEC_STRIDE;\
859
5.50M
    level[i] = p_src[oe] - p_dst[od];\
860
5.50M
    nz |= level[i];\
861
5.50M
}
862
#define COPY4x4\
863
366k
    CPPIXEL_X4( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
864
366k
    CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
865
366k
    CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
866
366k
    CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
867
0
#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
868
#define COPY8x8\
869
0
    CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
870
0
    CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
871
0
    CPPIXEL_X8( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
872
0
    CPPIXEL_X8( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );\
873
0
    CPPIXEL_X8( p_dst+4*FDEC_STRIDE, p_src+4*FENC_STRIDE );\
874
0
    CPPIXEL_X8( p_dst+5*FDEC_STRIDE, p_src+5*FENC_STRIDE );\
875
0
    CPPIXEL_X8( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
876
0
    CPPIXEL_X8( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
877
878
static int zigzag_sub_4x4_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst )
879
7.80k
{
880
7.80k
    int nz = 0;
881
7.80k
    ZIGZAG4_FRAME
882
7.80k
    COPY4x4
883
7.80k
    return !!nz;
884
7.80k
}
885
886
static int zigzag_sub_4x4_field( dctcoef level[16], const pixel *p_src, pixel *p_dst )
887
0
{
888
0
    int nz = 0;
889
0
    ZIGZAG4_FIELD
890
0
    COPY4x4
891
0
    return !!nz;
892
0
}
893
894
#undef ZIGDC
895
358k
#define ZIGDC(i,y,x) {\
896
358k
    int oe = x+y*FENC_STRIDE;\
897
358k
    int od = x+y*FDEC_STRIDE;\
898
358k
    *dc = p_src[oe] - p_dst[od];\
899
358k
    level[0] = 0;\
900
358k
}
901
902
static int zigzag_sub_4x4ac_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
903
358k
{
904
358k
    int nz = 0;
905
358k
    ZIGZAG4_FRAME
906
358k
    COPY4x4
907
358k
    return !!nz;
908
358k
}
909
910
static int zigzag_sub_4x4ac_field( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
911
0
{
912
0
    int nz = 0;
913
0
    ZIGZAG4_FIELD
914
0
    COPY4x4
915
0
    return !!nz;
916
0
}
917
918
static int zigzag_sub_8x8_frame( dctcoef level[64], const pixel *p_src, pixel *p_dst )
919
0
{
920
0
    int nz = 0;
921
0
    ZIGZAG8_FRAME
922
0
    COPY8x8
923
0
    return !!nz;
924
0
}
925
static int zigzag_sub_8x8_field( dctcoef level[64], const pixel *p_src, pixel *p_dst )
926
0
{
927
0
    int nz = 0;
928
0
    ZIGZAG8_FIELD
929
0
    COPY8x8
930
0
    return !!nz;
931
0
}
932
933
#undef ZIG
934
#undef COPY4x4
935
936
static void zigzag_interleave_8x8_cavlc( dctcoef *dst, dctcoef *src, uint8_t *nnz )
937
0
{
938
0
    for( int i = 0; i < 4; i++ )
939
0
    {
940
0
        int nz = 0;
941
0
        for( int j = 0; j < 16; j++ )
942
0
        {
943
0
            nz |= src[i+j*4];
944
0
            dst[i*16+j] = src[i+j*4];
945
0
        }
946
0
        nnz[(i&1) + (i>>1)*8] = !!nz;
947
0
    }
948
0
}
949
950
void x264_zigzag_init( uint32_t cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced )
951
158
{
952
158
    pf_interlaced->scan_8x8   = zigzag_scan_8x8_field;
953
158
    pf_progressive->scan_8x8  = zigzag_scan_8x8_frame;
954
158
    pf_interlaced->scan_4x4   = zigzag_scan_4x4_field;
955
158
    pf_progressive->scan_4x4  = zigzag_scan_4x4_frame;
956
158
    pf_interlaced->sub_8x8    = zigzag_sub_8x8_field;
957
158
    pf_progressive->sub_8x8   = zigzag_sub_8x8_frame;
958
158
    pf_interlaced->sub_4x4    = zigzag_sub_4x4_field;
959
158
    pf_progressive->sub_4x4   = zigzag_sub_4x4_frame;
960
158
    pf_interlaced->sub_4x4ac  = zigzag_sub_4x4ac_field;
961
158
    pf_progressive->sub_4x4ac = zigzag_sub_4x4ac_frame;
962
963
#if HIGH_BIT_DEPTH
964
#if HAVE_MMX
965
    if( cpu&X264_CPU_SSE2 )
966
    {
967
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse2;
968
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
969
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
970
    }
971
    if( cpu&X264_CPU_SSE4 )
972
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
973
    if( cpu&X264_CPU_AVX )
974
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
975
#if ARCH_X86_64
976
    if( cpu&X264_CPU_AVX )
977
    {
978
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
979
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
980
    }
981
#endif // ARCH_X86_64
982
    if( cpu&X264_CPU_AVX512 )
983
    {
984
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
985
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
986
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
987
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
988
    }
989
#endif // HAVE_MMX
990
#else
991
#if HAVE_MMX
992
    if( cpu&X264_CPU_MMX )
993
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
994
    if( cpu&X264_CPU_MMX2 )
995
    {
996
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_mmx2;
997
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
998
    }
999
    if( cpu&X264_CPU_SSE )
1000
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse;
1001
    if( cpu&X264_CPU_SSE2_IS_FAST )
1002
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
1003
    if( cpu&X264_CPU_SSSE3 )
1004
    {
1005
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_ssse3;
1006
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_ssse3;
1007
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_ssse3;
1008
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
1009
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
1010
        if( !(cpu&X264_CPU_SLOW_SHUFFLE) )
1011
            pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
1012
    }
1013
    if( cpu&X264_CPU_AVX )
1014
    {
1015
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1016
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1017
#if ARCH_X86_64
1018
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1019
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1020
#endif
1021
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
1022
    }
1023
    if( cpu&X264_CPU_XOP )
1024
    {
1025
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_xop;
1026
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_xop;
1027
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_xop;
1028
    }
1029
    if( cpu&X264_CPU_AVX512 )
1030
    {
1031
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
1032
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
1033
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
1034
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
1035
    }
1036
#endif // HAVE_MMX
1037
#if HAVE_ALTIVEC
1038
    if( cpu&X264_CPU_ALTIVEC )
1039
    {
1040
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_altivec;
1041
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
1042
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
1043
    }
1044
#endif
1045
#if HAVE_ARMV6 || HAVE_AARCH64
1046
    if( cpu&X264_CPU_NEON )
1047
    {
1048
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
1049
#if HAVE_AARCH64
1050
        pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
1051
        pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
1052
        pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
1053
        pf_interlaced->sub_4x4ac  = x264_zigzag_sub_4x4ac_field_neon;
1054
        pf_interlaced->sub_8x8    = x264_zigzag_sub_8x8_field_neon;
1055
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_neon;
1056
        pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
1057
        pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
1058
        pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
1059
#endif // HAVE_AARCH64
1060
    }
1061
#endif // HAVE_ARMV6 || HAVE_AARCH64
1062
#endif // HIGH_BIT_DEPTH
1063
1064
158
    pf_interlaced->interleave_8x8_cavlc =
1065
158
    pf_progressive->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
1066
#if HAVE_MMX
1067
#if HIGH_BIT_DEPTH
1068
    if( cpu&X264_CPU_SSE2 )
1069
    {
1070
        pf_interlaced->interleave_8x8_cavlc =
1071
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1072
    }
1073
    if( cpu&X264_CPU_AVX )
1074
    {
1075
        pf_interlaced->interleave_8x8_cavlc =
1076
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1077
    }
1078
    if( cpu&X264_CPU_AVX512 )
1079
    {
1080
        pf_interlaced->interleave_8x8_cavlc =
1081
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1082
    }
1083
#else
1084
    if( cpu&X264_CPU_MMX )
1085
    {
1086
        pf_interlaced->interleave_8x8_cavlc =
1087
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
1088
    }
1089
    if( (cpu&X264_CPU_SSE2) && !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SSE2_IS_SLOW)) )
1090
    {
1091
        pf_interlaced->interleave_8x8_cavlc =
1092
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1093
    }
1094
1095
    if( cpu&X264_CPU_AVX )
1096
    {
1097
        pf_interlaced->interleave_8x8_cavlc =
1098
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1099
    }
1100
1101
    if( cpu&X264_CPU_AVX2 )
1102
    {
1103
        pf_interlaced->interleave_8x8_cavlc =
1104
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx2;
1105
    }
1106
    if( cpu&X264_CPU_AVX512 )
1107
    {
1108
        pf_interlaced->interleave_8x8_cavlc =
1109
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1110
    }
1111
#endif // HIGH_BIT_DEPTH
1112
#endif
1113
#if !HIGH_BIT_DEPTH
1114
#if HAVE_AARCH64
1115
    if( cpu&X264_CPU_NEON )
1116
    {
1117
        pf_interlaced->interleave_8x8_cavlc =
1118
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
1119
    }
1120
#if HAVE_SVE
1121
    if( cpu&X264_CPU_SVE )
1122
    {
1123
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_sve;
1124
    }
1125
#endif
1126
#endif // HAVE_AARCH64
1127
1128
#if HAVE_ALTIVEC
1129
    if( cpu&X264_CPU_ALTIVEC )
1130
    {
1131
        pf_interlaced->interleave_8x8_cavlc =
1132
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_altivec;
1133
    }
1134
#endif // HAVE_ALTIVEC
1135
1136
#if HAVE_MSA
1137
    if( cpu&X264_CPU_MSA )
1138
    {
1139
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_msa;
1140
    }
1141
#endif
1142
1143
#if HAVE_LSX
1144
    if( cpu&X264_CPU_LASX )
1145
    {
1146
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_lasx;
1147
    }
1148
#endif
1149
#endif // !HIGH_BIT_DEPTH
1150
158
}
x264_8_zigzag_init
Line
Count
Source
951
158
{
952
158
    pf_interlaced->scan_8x8   = zigzag_scan_8x8_field;
953
158
    pf_progressive->scan_8x8  = zigzag_scan_8x8_frame;
954
158
    pf_interlaced->scan_4x4   = zigzag_scan_4x4_field;
955
158
    pf_progressive->scan_4x4  = zigzag_scan_4x4_frame;
956
158
    pf_interlaced->sub_8x8    = zigzag_sub_8x8_field;
957
158
    pf_progressive->sub_8x8   = zigzag_sub_8x8_frame;
958
158
    pf_interlaced->sub_4x4    = zigzag_sub_4x4_field;
959
158
    pf_progressive->sub_4x4   = zigzag_sub_4x4_frame;
960
158
    pf_interlaced->sub_4x4ac  = zigzag_sub_4x4ac_field;
961
158
    pf_progressive->sub_4x4ac = zigzag_sub_4x4ac_frame;
962
963
#if HIGH_BIT_DEPTH
964
#if HAVE_MMX
965
    if( cpu&X264_CPU_SSE2 )
966
    {
967
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse2;
968
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
969
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
970
    }
971
    if( cpu&X264_CPU_SSE4 )
972
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
973
    if( cpu&X264_CPU_AVX )
974
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
975
#if ARCH_X86_64
976
    if( cpu&X264_CPU_AVX )
977
    {
978
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
979
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
980
    }
981
#endif // ARCH_X86_64
982
    if( cpu&X264_CPU_AVX512 )
983
    {
984
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
985
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
986
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
987
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
988
    }
989
#endif // HAVE_MMX
990
#else
991
#if HAVE_MMX
992
    if( cpu&X264_CPU_MMX )
993
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
994
    if( cpu&X264_CPU_MMX2 )
995
    {
996
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_mmx2;
997
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
998
    }
999
    if( cpu&X264_CPU_SSE )
1000
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse;
1001
    if( cpu&X264_CPU_SSE2_IS_FAST )
1002
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
1003
    if( cpu&X264_CPU_SSSE3 )
1004
    {
1005
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_ssse3;
1006
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_ssse3;
1007
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_ssse3;
1008
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
1009
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
1010
        if( !(cpu&X264_CPU_SLOW_SHUFFLE) )
1011
            pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
1012
    }
1013
    if( cpu&X264_CPU_AVX )
1014
    {
1015
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1016
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1017
#if ARCH_X86_64
1018
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1019
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1020
#endif
1021
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
1022
    }
1023
    if( cpu&X264_CPU_XOP )
1024
    {
1025
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_xop;
1026
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_xop;
1027
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_xop;
1028
    }
1029
    if( cpu&X264_CPU_AVX512 )
1030
    {
1031
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
1032
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
1033
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
1034
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
1035
    }
1036
#endif // HAVE_MMX
1037
#if HAVE_ALTIVEC
1038
    if( cpu&X264_CPU_ALTIVEC )
1039
    {
1040
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_altivec;
1041
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
1042
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
1043
    }
1044
#endif
1045
#if HAVE_ARMV6 || HAVE_AARCH64
1046
    if( cpu&X264_CPU_NEON )
1047
    {
1048
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
1049
#if HAVE_AARCH64
1050
        pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
1051
        pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
1052
        pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
1053
        pf_interlaced->sub_4x4ac  = x264_zigzag_sub_4x4ac_field_neon;
1054
        pf_interlaced->sub_8x8    = x264_zigzag_sub_8x8_field_neon;
1055
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_neon;
1056
        pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
1057
        pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
1058
        pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
1059
#endif // HAVE_AARCH64
1060
    }
1061
#endif // HAVE_ARMV6 || HAVE_AARCH64
1062
158
#endif // HIGH_BIT_DEPTH
1063
1064
158
    pf_interlaced->interleave_8x8_cavlc =
1065
158
    pf_progressive->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
1066
#if HAVE_MMX
1067
#if HIGH_BIT_DEPTH
1068
    if( cpu&X264_CPU_SSE2 )
1069
    {
1070
        pf_interlaced->interleave_8x8_cavlc =
1071
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1072
    }
1073
    if( cpu&X264_CPU_AVX )
1074
    {
1075
        pf_interlaced->interleave_8x8_cavlc =
1076
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1077
    }
1078
    if( cpu&X264_CPU_AVX512 )
1079
    {
1080
        pf_interlaced->interleave_8x8_cavlc =
1081
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1082
    }
1083
#else
1084
    if( cpu&X264_CPU_MMX )
1085
    {
1086
        pf_interlaced->interleave_8x8_cavlc =
1087
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
1088
    }
1089
    if( (cpu&X264_CPU_SSE2) && !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SSE2_IS_SLOW)) )
1090
    {
1091
        pf_interlaced->interleave_8x8_cavlc =
1092
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1093
    }
1094
1095
    if( cpu&X264_CPU_AVX )
1096
    {
1097
        pf_interlaced->interleave_8x8_cavlc =
1098
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1099
    }
1100
1101
    if( cpu&X264_CPU_AVX2 )
1102
    {
1103
        pf_interlaced->interleave_8x8_cavlc =
1104
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx2;
1105
    }
1106
    if( cpu&X264_CPU_AVX512 )
1107
    {
1108
        pf_interlaced->interleave_8x8_cavlc =
1109
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1110
    }
1111
#endif // HIGH_BIT_DEPTH
1112
#endif
1113
158
#if !HIGH_BIT_DEPTH
1114
#if HAVE_AARCH64
1115
    if( cpu&X264_CPU_NEON )
1116
    {
1117
        pf_interlaced->interleave_8x8_cavlc =
1118
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
1119
    }
1120
#if HAVE_SVE
1121
    if( cpu&X264_CPU_SVE )
1122
    {
1123
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_sve;
1124
    }
1125
#endif
1126
#endif // HAVE_AARCH64
1127
1128
#if HAVE_ALTIVEC
1129
    if( cpu&X264_CPU_ALTIVEC )
1130
    {
1131
        pf_interlaced->interleave_8x8_cavlc =
1132
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_altivec;
1133
    }
1134
#endif // HAVE_ALTIVEC
1135
1136
#if HAVE_MSA
1137
    if( cpu&X264_CPU_MSA )
1138
    {
1139
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_msa;
1140
    }
1141
#endif
1142
1143
#if HAVE_LSX
1144
    if( cpu&X264_CPU_LASX )
1145
    {
1146
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_lasx;
1147
    }
1148
#endif
1149
158
#endif // !HIGH_BIT_DEPTH
1150
158
}
Unexecuted instantiation: x264_10_zigzag_init