Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x264/common/dct.c
Line
Count
Source
1
/*****************************************************************************
2
 * dct.c: transform and zigzag
3
 *****************************************************************************
4
 * Copyright (C) 2003-2025 x264 project
5
 *
6
 * Authors: Loren Merritt <lorenm@u.washington.edu>
7
 *          Laurent Aimar <fenrir@via.ecp.fr>
8
 *          Henrik Gramner <henrik@gramner.com>
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
23
 *
24
 * This program is also available under a commercial proprietary license.
25
 * For more information, contact us at licensing@x264.com.
26
 *****************************************************************************/
27
28
#include "common.h"
29
#if HAVE_MMX
30
#   include "x86/dct.h"
31
#endif
32
#if HAVE_ALTIVEC
33
#   include "ppc/dct.h"
34
#endif
35
#if HAVE_ARMV6
36
#   include "arm/dct.h"
37
#endif
38
#if HAVE_AARCH64
39
#   include "aarch64/dct.h"
40
#endif
41
#if HAVE_MSA
42
#   include "mips/dct.h"
43
#endif
44
#if HAVE_LSX
45
#   include "loongarch/dct.h"
46
#endif
47
static void dct4x4dc( dctcoef d[16] )
48
30.2k
{
49
30.2k
    dctcoef tmp[16];
50
51
151k
    for( int i = 0; i < 4; i++ )
52
121k
    {
53
121k
        int s01 = d[i*4+0] + d[i*4+1];
54
121k
        int d01 = d[i*4+0] - d[i*4+1];
55
121k
        int s23 = d[i*4+2] + d[i*4+3];
56
121k
        int d23 = d[i*4+2] - d[i*4+3];
57
58
121k
        tmp[0*4+i] = s01 + s23;
59
121k
        tmp[1*4+i] = s01 - s23;
60
121k
        tmp[2*4+i] = d01 - d23;
61
121k
        tmp[3*4+i] = d01 + d23;
62
121k
    }
63
64
151k
    for( int i = 0; i < 4; i++ )
65
121k
    {
66
121k
        int s01 = tmp[i*4+0] + tmp[i*4+1];
67
121k
        int d01 = tmp[i*4+0] - tmp[i*4+1];
68
121k
        int s23 = tmp[i*4+2] + tmp[i*4+3];
69
121k
        int d23 = tmp[i*4+2] - tmp[i*4+3];
70
71
121k
        d[i*4+0] = ( s01 + s23 + 1 ) >> 1;
72
121k
        d[i*4+1] = ( s01 - s23 + 1 ) >> 1;
73
121k
        d[i*4+2] = ( d01 - d23 + 1 ) >> 1;
74
121k
        d[i*4+3] = ( d01 + d23 + 1 ) >> 1;
75
121k
    }
76
30.2k
}
77
78
static void idct4x4dc( dctcoef d[16] )
79
232
{
80
232
    dctcoef tmp[16];
81
82
1.16k
    for( int i = 0; i < 4; i++ )
83
928
    {
84
928
        int s01 = d[i*4+0] + d[i*4+1];
85
928
        int d01 = d[i*4+0] - d[i*4+1];
86
928
        int s23 = d[i*4+2] + d[i*4+3];
87
928
        int d23 = d[i*4+2] - d[i*4+3];
88
89
928
        tmp[0*4+i] = s01 + s23;
90
928
        tmp[1*4+i] = s01 - s23;
91
928
        tmp[2*4+i] = d01 - d23;
92
928
        tmp[3*4+i] = d01 + d23;
93
928
    }
94
95
1.16k
    for( int i = 0; i < 4; i++ )
96
928
    {
97
928
        int s01 = tmp[i*4+0] + tmp[i*4+1];
98
928
        int d01 = tmp[i*4+0] - tmp[i*4+1];
99
928
        int s23 = tmp[i*4+2] + tmp[i*4+3];
100
928
        int d23 = tmp[i*4+2] - tmp[i*4+3];
101
102
928
        d[i*4+0] = s01 + s23;
103
928
        d[i*4+1] = s01 - s23;
104
928
        d[i*4+2] = d01 - d23;
105
928
        d[i*4+3] = d01 + d23;
106
928
    }
107
232
}
108
109
static void dct2x4dc( dctcoef dct[8], dctcoef dct4x4[8][16] )
110
0
{
111
0
    int a0 = dct4x4[0][0] + dct4x4[1][0];
112
0
    int a1 = dct4x4[2][0] + dct4x4[3][0];
113
0
    int a2 = dct4x4[4][0] + dct4x4[5][0];
114
0
    int a3 = dct4x4[6][0] + dct4x4[7][0];
115
0
    int a4 = dct4x4[0][0] - dct4x4[1][0];
116
0
    int a5 = dct4x4[2][0] - dct4x4[3][0];
117
0
    int a6 = dct4x4[4][0] - dct4x4[5][0];
118
0
    int a7 = dct4x4[6][0] - dct4x4[7][0];
119
0
    int b0 = a0 + a1;
120
0
    int b1 = a2 + a3;
121
0
    int b2 = a4 + a5;
122
0
    int b3 = a6 + a7;
123
0
    int b4 = a0 - a1;
124
0
    int b5 = a2 - a3;
125
0
    int b6 = a4 - a5;
126
0
    int b7 = a6 - a7;
127
0
    dct[0] = b0 + b1;
128
0
    dct[1] = b2 + b3;
129
0
    dct[2] = b0 - b1;
130
0
    dct[3] = b2 - b3;
131
0
    dct[4] = b4 - b5;
132
0
    dct[5] = b6 - b7;
133
0
    dct[6] = b4 + b5;
134
0
    dct[7] = b6 + b7;
135
0
    dct4x4[0][0] = 0;
136
0
    dct4x4[1][0] = 0;
137
0
    dct4x4[2][0] = 0;
138
0
    dct4x4[3][0] = 0;
139
0
    dct4x4[4][0] = 0;
140
0
    dct4x4[5][0] = 0;
141
0
    dct4x4[6][0] = 0;
142
0
    dct4x4[7][0] = 0;
143
0
}
144
145
static inline void pixel_sub_wxh( dctcoef *diff, int i_size,
146
                                  pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
147
729k
{
148
3.64M
    for( int y = 0; y < i_size; y++ )
149
2.91M
    {
150
14.5M
        for( int x = 0; x < i_size; x++ )
151
11.6M
            diff[x + y*i_size] = pix1[x] - pix2[x];
152
2.91M
        pix1 += i_pix1;
153
2.91M
        pix2 += i_pix2;
154
2.91M
    }
155
729k
}
156
157
static void sub4x4_dct( dctcoef dct[16], pixel *pix1, pixel *pix2 )
158
729k
{
159
729k
    dctcoef d[16];
160
729k
    dctcoef tmp[16];
161
162
729k
    pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
163
164
3.64M
    for( int i = 0; i < 4; i++ )
165
2.91M
    {
166
2.91M
        int s03 = d[i*4+0] + d[i*4+3];
167
2.91M
        int s12 = d[i*4+1] + d[i*4+2];
168
2.91M
        int d03 = d[i*4+0] - d[i*4+3];
169
2.91M
        int d12 = d[i*4+1] - d[i*4+2];
170
171
2.91M
        tmp[0*4+i] =   s03 +   s12;
172
2.91M
        tmp[1*4+i] = 2*d03 +   d12;
173
2.91M
        tmp[2*4+i] =   s03 -   s12;
174
2.91M
        tmp[3*4+i] =   d03 - 2*d12;
175
2.91M
    }
176
177
3.64M
    for( int i = 0; i < 4; i++ )
178
2.91M
    {
179
2.91M
        int s03 = tmp[i*4+0] + tmp[i*4+3];
180
2.91M
        int s12 = tmp[i*4+1] + tmp[i*4+2];
181
2.91M
        int d03 = tmp[i*4+0] - tmp[i*4+3];
182
2.91M
        int d12 = tmp[i*4+1] - tmp[i*4+2];
183
184
2.91M
        dct[i*4+0] =   s03 +   s12;
185
2.91M
        dct[i*4+1] = 2*d03 +   d12;
186
2.91M
        dct[i*4+2] =   s03 -   s12;
187
2.91M
        dct[i*4+3] =   d03 - 2*d12;
188
2.91M
    }
189
729k
}
190
191
static void sub8x8_dct( dctcoef dct[4][16], pixel *pix1, pixel *pix2 )
192
181k
{
193
181k
    sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
194
181k
    sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
195
181k
    sub4x4_dct( dct[2], &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
196
181k
    sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
197
181k
}
198
199
static void sub16x16_dct( dctcoef dct[16][16], pixel *pix1, pixel *pix2 )
200
30.2k
{
201
30.2k
    sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
202
30.2k
    sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
203
30.2k
    sub8x8_dct( &dct[ 8], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
204
30.2k
    sub8x8_dct( &dct[12], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
205
30.2k
}
206
207
static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
208
0
{
209
0
    int sum = 0;
210
0
    for( int i=0; i<4; i++, pix1 += FENC_STRIDE, pix2 += FDEC_STRIDE )
211
0
        sum += pix1[0] + pix1[1] + pix1[2] + pix1[3]
212
0
             - pix2[0] - pix2[1] - pix2[2] - pix2[3];
213
0
    return sum;
214
0
}
215
216
static void sub8x8_dct_dc( dctcoef dct[4], pixel *pix1, pixel *pix2 )
217
0
{
218
0
    dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] );
219
0
    dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] );
220
0
    dct[2] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
221
0
    dct[3] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
222
223
    /* 2x2 DC transform */
224
0
    int d0 = dct[0] + dct[1];
225
0
    int d1 = dct[2] + dct[3];
226
0
    int d2 = dct[0] - dct[1];
227
0
    int d3 = dct[2] - dct[3];
228
0
    dct[0] = d0 + d1;
229
0
    dct[1] = d0 - d1;
230
0
    dct[2] = d2 + d3;
231
0
    dct[3] = d2 - d3;
232
0
}
233
234
static void sub8x16_dct_dc( dctcoef dct[8], pixel *pix1, pixel *pix2 )
235
0
{
236
0
    int a0 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+0], &pix2[ 0*FDEC_STRIDE+0] );
237
0
    int a1 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+4], &pix2[ 0*FDEC_STRIDE+4] );
238
0
    int a2 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+0], &pix2[ 4*FDEC_STRIDE+0] );
239
0
    int a3 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+4], &pix2[ 4*FDEC_STRIDE+4] );
240
0
    int a4 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+0], &pix2[ 8*FDEC_STRIDE+0] );
241
0
    int a5 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+4], &pix2[ 8*FDEC_STRIDE+4] );
242
0
    int a6 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+0], &pix2[12*FDEC_STRIDE+0] );
243
0
    int a7 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+4], &pix2[12*FDEC_STRIDE+4] );
244
245
    /* 2x4 DC transform */
246
0
    int b0 = a0 + a1;
247
0
    int b1 = a2 + a3;
248
0
    int b2 = a4 + a5;
249
0
    int b3 = a6 + a7;
250
0
    int b4 = a0 - a1;
251
0
    int b5 = a2 - a3;
252
0
    int b6 = a4 - a5;
253
0
    int b7 = a6 - a7;
254
0
    a0 = b0 + b1;
255
0
    a1 = b2 + b3;
256
0
    a2 = b4 + b5;
257
0
    a3 = b6 + b7;
258
0
    a4 = b0 - b1;
259
0
    a5 = b2 - b3;
260
0
    a6 = b4 - b5;
261
0
    a7 = b6 - b7;
262
0
    dct[0] = a0 + a1;
263
0
    dct[1] = a2 + a3;
264
0
    dct[2] = a0 - a1;
265
0
    dct[3] = a2 - a3;
266
0
    dct[4] = a4 - a5;
267
0
    dct[5] = a6 - a7;
268
0
    dct[6] = a4 + a5;
269
0
    dct[7] = a6 + a7;
270
0
}
271
272
static void add4x4_idct( pixel *p_dst, dctcoef dct[16] )
273
122
{
274
122
    dctcoef d[16];
275
122
    dctcoef tmp[16];
276
277
610
    for( int i = 0; i < 4; i++ )
278
488
    {
279
488
        int s02 =  dct[0*4+i]     +  dct[2*4+i];
280
488
        int d02 =  dct[0*4+i]     -  dct[2*4+i];
281
488
        int s13 =  dct[1*4+i]     + (dct[3*4+i]>>1);
282
488
        int d13 = (dct[1*4+i]>>1) -  dct[3*4+i];
283
284
488
        tmp[i*4+0] = s02 + s13;
285
488
        tmp[i*4+1] = d02 + d13;
286
488
        tmp[i*4+2] = d02 - d13;
287
488
        tmp[i*4+3] = s02 - s13;
288
488
    }
289
290
610
    for( int i = 0; i < 4; i++ )
291
488
    {
292
488
        int s02 =  tmp[0*4+i]     +  tmp[2*4+i];
293
488
        int d02 =  tmp[0*4+i]     -  tmp[2*4+i];
294
488
        int s13 =  tmp[1*4+i]     + (tmp[3*4+i]>>1);
295
488
        int d13 = (tmp[1*4+i]>>1) -  tmp[3*4+i];
296
297
488
        d[0*4+i] = ( s02 + s13 + 32 ) >> 6;
298
488
        d[1*4+i] = ( d02 + d13 + 32 ) >> 6;
299
488
        d[2*4+i] = ( d02 - d13 + 32 ) >> 6;
300
488
        d[3*4+i] = ( s02 - s13 + 32 ) >> 6;
301
488
    }
302
303
304
610
    for( int y = 0; y < 4; y++ )
305
488
    {
306
2.44k
        for( int x = 0; x < 4; x++ )
307
1.95k
            p_dst[x] = x264_clip_pixel( p_dst[x] + d[y*4+x] );
308
488
        p_dst += FDEC_STRIDE;
309
488
    }
310
122
}
311
312
static void add8x8_idct( pixel *p_dst, dctcoef dct[4][16] )
313
0
{
314
0
    add4x4_idct( &p_dst[0],               dct[0] );
315
0
    add4x4_idct( &p_dst[4],               dct[1] );
316
0
    add4x4_idct( &p_dst[4*FDEC_STRIDE+0], dct[2] );
317
0
    add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
318
0
}
319
320
static void add16x16_idct( pixel *p_dst, dctcoef dct[16][16] )
321
0
{
322
0
    add8x8_idct( &p_dst[0],               &dct[0] );
323
0
    add8x8_idct( &p_dst[8],               &dct[4] );
324
0
    add8x8_idct( &p_dst[8*FDEC_STRIDE+0], &dct[8] );
325
0
    add8x8_idct( &p_dst[8*FDEC_STRIDE+8], &dct[12] );
326
0
}
327
328
/****************************************************************************
329
 * 8x8 transform:
330
 ****************************************************************************/
331
332
0
#define DCT8_1D {\
333
0
    int s07 = SRC(0) + SRC(7);\
334
0
    int s16 = SRC(1) + SRC(6);\
335
0
    int s25 = SRC(2) + SRC(5);\
336
0
    int s34 = SRC(3) + SRC(4);\
337
0
    int a0 = s07 + s34;\
338
0
    int a1 = s16 + s25;\
339
0
    int a2 = s07 - s34;\
340
0
    int a3 = s16 - s25;\
341
0
    int d07 = SRC(0) - SRC(7);\
342
0
    int d16 = SRC(1) - SRC(6);\
343
0
    int d25 = SRC(2) - SRC(5);\
344
0
    int d34 = SRC(3) - SRC(4);\
345
0
    int a4 = d16 + d25 + (d07 + (d07>>1));\
346
0
    int a5 = d07 - d34 - (d25 + (d25>>1));\
347
0
    int a6 = d07 + d34 - (d16 + (d16>>1));\
348
0
    int a7 = d16 - d25 + (d34 + (d34>>1));\
349
0
    DST(0) =  a0 + a1     ;\
350
0
    DST(1) =  a4 + (a7>>2);\
351
0
    DST(2) =  a2 + (a3>>1);\
352
0
    DST(3) =  a5 + (a6>>2);\
353
0
    DST(4) =  a0 - a1     ;\
354
0
    DST(5) =  a6 - (a5>>2);\
355
0
    DST(6) = (a2>>1) - a3 ;\
356
0
    DST(7) = (a4>>2) - a7 ;\
357
0
}
358
359
static void sub8x8_dct8( dctcoef dct[64], pixel *pix1, pixel *pix2 )
360
0
{
361
0
    dctcoef tmp[64];
362
363
0
    pixel_sub_wxh( tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
364
365
0
#define SRC(x) tmp[x*8+i]
366
0
#define DST(x) tmp[x*8+i]
367
0
    for( int i = 0; i < 8; i++ )
368
0
        DCT8_1D
369
0
#undef SRC
370
0
#undef DST
371
372
0
#define SRC(x) tmp[i*8+x]
373
0
#define DST(x) dct[x*8+i]
374
0
    for( int i = 0; i < 8; i++ )
375
0
        DCT8_1D
376
0
#undef SRC
377
0
#undef DST
378
0
}
379
380
static void sub16x16_dct8( dctcoef dct[4][64], pixel *pix1, pixel *pix2 )
381
0
{
382
0
    sub8x8_dct8( dct[0], &pix1[0],               &pix2[0] );
383
0
    sub8x8_dct8( dct[1], &pix1[8],               &pix2[8] );
384
0
    sub8x8_dct8( dct[2], &pix1[8*FENC_STRIDE+0], &pix2[8*FDEC_STRIDE+0] );
385
0
    sub8x8_dct8( dct[3], &pix1[8*FENC_STRIDE+8], &pix2[8*FDEC_STRIDE+8] );
386
0
}
387
388
0
#define IDCT8_1D {\
389
0
    int a0 =  SRC(0) + SRC(4);\
390
0
    int a2 =  SRC(0) - SRC(4);\
391
0
    int a4 = (SRC(2)>>1) - SRC(6);\
392
0
    int a6 = (SRC(6)>>1) + SRC(2);\
393
0
    int b0 = a0 + a6;\
394
0
    int b2 = a2 + a4;\
395
0
    int b4 = a2 - a4;\
396
0
    int b6 = a0 - a6;\
397
0
    int a1 = -SRC(3) + SRC(5) - SRC(7) - (SRC(7)>>1);\
398
0
    int a3 =  SRC(1) + SRC(7) - SRC(3) - (SRC(3)>>1);\
399
0
    int a5 = -SRC(1) + SRC(7) + SRC(5) + (SRC(5)>>1);\
400
0
    int a7 =  SRC(3) + SRC(5) + SRC(1) + (SRC(1)>>1);\
401
0
    int b1 = (a7>>2) + a1;\
402
0
    int b3 =  a3 + (a5>>2);\
403
0
    int b5 = (a3>>2) - a5;\
404
0
    int b7 =  a7 - (a1>>2);\
405
0
    DST(0, b0 + b7);\
406
0
    DST(1, b2 + b5);\
407
0
    DST(2, b4 + b3);\
408
0
    DST(3, b6 + b1);\
409
0
    DST(4, b6 - b1);\
410
0
    DST(5, b4 - b3);\
411
0
    DST(6, b2 - b5);\
412
0
    DST(7, b0 - b7);\
413
0
}
414
415
static void add8x8_idct8( pixel *dst, dctcoef dct[64] )
416
0
{
417
0
    dct[0] += 32; // rounding for the >>6 at the end
418
419
0
#define SRC(x)     dct[x*8+i]
420
0
#define DST(x,rhs) dct[x*8+i] = (rhs)
421
0
    for( int i = 0; i < 8; i++ )
422
0
        IDCT8_1D
423
0
#undef SRC
424
0
#undef DST
425
426
0
#define SRC(x)     dct[i*8+x]
427
0
#define DST(x,rhs) dst[i + x*FDEC_STRIDE] = x264_clip_pixel( dst[i + x*FDEC_STRIDE] + ((rhs) >> 6) );
428
0
    for( int i = 0; i < 8; i++ )
429
0
        IDCT8_1D
430
0
#undef SRC
431
0
#undef DST
432
0
}
433
434
static void add16x16_idct8( pixel *dst, dctcoef dct[4][64] )
435
0
{
436
0
    add8x8_idct8( &dst[0],               dct[0] );
437
0
    add8x8_idct8( &dst[8],               dct[1] );
438
0
    add8x8_idct8( &dst[8*FDEC_STRIDE+0], dct[2] );
439
0
    add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
440
0
}
441
442
static inline void add4x4_idct_dc( pixel *p_dst, dctcoef dc )
443
7.95k
{
444
7.95k
    dc = (dc + 32) >> 6;
445
39.7k
    for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
446
31.8k
    {
447
31.8k
        p_dst[0] = x264_clip_pixel( p_dst[0] + dc );
448
31.8k
        p_dst[1] = x264_clip_pixel( p_dst[1] + dc );
449
31.8k
        p_dst[2] = x264_clip_pixel( p_dst[2] + dc );
450
31.8k
        p_dst[3] = x264_clip_pixel( p_dst[3] + dc );
451
31.8k
    }
452
7.95k
}
453
454
static void add8x8_idct_dc( pixel *p_dst, dctcoef dct[4] )
455
1.06k
{
456
1.06k
    add4x4_idct_dc( &p_dst[0],               dct[0] );
457
1.06k
    add4x4_idct_dc( &p_dst[4],               dct[1] );
458
1.06k
    add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+0], dct[2] );
459
1.06k
    add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+4], dct[3] );
460
1.06k
}
461
462
static void add16x16_idct_dc( pixel *p_dst, dctcoef dct[16] )
463
232
{
464
1.16k
    for( int i = 0; i < 4; i++, dct += 4, p_dst += 4*FDEC_STRIDE )
465
928
    {
466
928
        add4x4_idct_dc( &p_dst[ 0], dct[0] );
467
928
        add4x4_idct_dc( &p_dst[ 4], dct[1] );
468
928
        add4x4_idct_dc( &p_dst[ 8], dct[2] );
469
928
        add4x4_idct_dc( &p_dst[12], dct[3] );
470
928
    }
471
232
}
472
473
474
/****************************************************************************
475
 * x264_dct_init:
476
 ****************************************************************************/
477
void x264_dct_init( uint32_t cpu, x264_dct_function_t *dctf )
478
169
{
479
169
    dctf->sub4x4_dct    = sub4x4_dct;
480
169
    dctf->add4x4_idct   = add4x4_idct;
481
482
169
    dctf->sub8x8_dct    = sub8x8_dct;
483
169
    dctf->sub8x8_dct_dc = sub8x8_dct_dc;
484
169
    dctf->add8x8_idct   = add8x8_idct;
485
169
    dctf->add8x8_idct_dc = add8x8_idct_dc;
486
487
169
    dctf->sub8x16_dct_dc = sub8x16_dct_dc;
488
489
169
    dctf->sub16x16_dct  = sub16x16_dct;
490
169
    dctf->add16x16_idct = add16x16_idct;
491
169
    dctf->add16x16_idct_dc = add16x16_idct_dc;
492
493
169
    dctf->sub8x8_dct8   = sub8x8_dct8;
494
169
    dctf->add8x8_idct8  = add8x8_idct8;
495
496
169
    dctf->sub16x16_dct8  = sub16x16_dct8;
497
169
    dctf->add16x16_idct8 = add16x16_idct8;
498
499
169
    dctf->dct4x4dc  = dct4x4dc;
500
169
    dctf->idct4x4dc = idct4x4dc;
501
502
169
    dctf->dct2x4dc = dct2x4dc;
503
504
#if HIGH_BIT_DEPTH
505
#if HAVE_MMX
506
    if( cpu&X264_CPU_MMX )
507
    {
508
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
509
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
510
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
511
    }
512
    if( cpu&X264_CPU_SSE2 )
513
    {
514
        dctf->add4x4_idct     = x264_add4x4_idct_sse2;
515
        dctf->dct4x4dc        = x264_dct4x4dc_sse2;
516
        dctf->idct4x4dc       = x264_idct4x4dc_sse2;
517
        dctf->dct2x4dc        = x264_dct2x4dc_sse2;
518
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse2;
519
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse2;
520
        dctf->add8x8_idct     = x264_add8x8_idct_sse2;
521
        dctf->add16x16_idct   = x264_add16x16_idct_sse2;
522
        dctf->add8x8_idct8    = x264_add8x8_idct8_sse2;
523
        dctf->add16x16_idct8    = x264_add16x16_idct8_sse2;
524
        dctf->sub8x8_dct_dc   = x264_sub8x8_dct_dc_sse2;
525
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_sse2;
526
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_sse2;
527
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_sse2;
528
    }
529
    if( cpu&X264_CPU_SSE4 )
530
    {
531
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse4;
532
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse4;
533
    }
534
    if( cpu&X264_CPU_AVX )
535
    {
536
        dctf->add4x4_idct     = x264_add4x4_idct_avx;
537
        dctf->dct4x4dc        = x264_dct4x4dc_avx;
538
        dctf->idct4x4dc       = x264_idct4x4dc_avx;
539
        dctf->dct2x4dc        = x264_dct2x4dc_avx;
540
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_avx;
541
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_avx;
542
        dctf->add8x8_idct     = x264_add8x8_idct_avx;
543
        dctf->add16x16_idct   = x264_add16x16_idct_avx;
544
        dctf->add8x8_idct8    = x264_add8x8_idct8_avx;
545
        dctf->add16x16_idct8  = x264_add16x16_idct8_avx;
546
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_avx;
547
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_avx;
548
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_avx;
549
    }
550
#endif // HAVE_MMX
551
#else // !HIGH_BIT_DEPTH
552
#if HAVE_MMX
553
    if( cpu&X264_CPU_MMX )
554
    {
555
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
556
        dctf->add4x4_idct   = x264_add4x4_idct_mmx;
557
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
558
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
559
560
#if !ARCH_X86_64
561
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
562
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
563
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
564
        dctf->add16x16_idct = x264_add16x16_idct_mmx;
565
566
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_mmx;
567
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
568
        dctf->add8x8_idct8  = x264_add8x8_idct8_mmx;
569
        dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
570
#endif
571
    }
572
573
    if( cpu&X264_CPU_MMX2 )
574
    {
575
        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
576
        dctf->dct2x4dc         = x264_dct2x4dc_mmx2;
577
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
578
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
579
    }
580
581
    if( cpu&X264_CPU_SSE2 )
582
    {
583
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_sse2;
584
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
585
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_sse2;
586
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_sse2;
587
        dctf->add8x8_idct8  = x264_add8x8_idct8_sse2;
588
        dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
589
590
        if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
591
        {
592
            dctf->sub8x8_dct    = x264_sub8x8_dct_sse2;
593
            dctf->sub16x16_dct  = x264_sub16x16_dct_sse2;
594
            dctf->add8x8_idct   = x264_add8x8_idct_sse2;
595
            dctf->add16x16_idct = x264_add16x16_idct_sse2;
596
            dctf->add16x16_idct_dc = x264_add16x16_idct_dc_sse2;
597
        }
598
    }
599
600
    if( (cpu&X264_CPU_SSSE3) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
601
    {
602
        dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_ssse3;
603
        if( !(cpu&X264_CPU_SLOW_ATOM) )
604
        {
605
            dctf->sub4x4_dct    = x264_sub4x4_dct_ssse3;
606
            dctf->sub8x8_dct    = x264_sub8x8_dct_ssse3;
607
            dctf->sub16x16_dct  = x264_sub16x16_dct_ssse3;
608
            dctf->sub8x8_dct8   = x264_sub8x8_dct8_ssse3;
609
            dctf->sub16x16_dct8 = x264_sub16x16_dct8_ssse3;
610
            if( !(cpu&X264_CPU_SLOW_PSHUFB) )
611
            {
612
                dctf->add8x8_idct_dc = x264_add8x8_idct_dc_ssse3;
613
                dctf->add16x16_idct_dc = x264_add16x16_idct_dc_ssse3;
614
            }
615
        }
616
    }
617
618
    if( cpu&X264_CPU_SSE4 )
619
        dctf->add4x4_idct   = x264_add4x4_idct_sse4;
620
621
    if( cpu&X264_CPU_AVX )
622
    {
623
        dctf->add4x4_idct      = x264_add4x4_idct_avx;
624
        dctf->add8x8_idct      = x264_add8x8_idct_avx;
625
        dctf->add16x16_idct    = x264_add16x16_idct_avx;
626
        dctf->add8x8_idct8     = x264_add8x8_idct8_avx;
627
        dctf->add16x16_idct8   = x264_add16x16_idct8_avx;
628
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx;
629
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx;
630
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx;
631
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_avx;
632
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx;
633
    }
634
635
    if( cpu&X264_CPU_XOP )
636
    {
637
        dctf->sub8x8_dct       = x264_sub8x8_dct_xop;
638
        dctf->sub16x16_dct     = x264_sub16x16_dct_xop;
639
    }
640
641
    if( cpu&X264_CPU_AVX2 )
642
    {
643
        dctf->add8x8_idct      = x264_add8x8_idct_avx2;
644
        dctf->add16x16_idct    = x264_add16x16_idct_avx2;
645
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
646
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
647
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
648
#if ARCH_X86_64
649
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
650
#endif
651
    }
652
653
    if( cpu&X264_CPU_AVX512 )
654
    {
655
        dctf->sub4x4_dct       = x264_sub4x4_dct_avx512;
656
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx512;
657
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx512;
658
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_avx512;
659
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_avx512;
660
        dctf->add8x8_idct      = x264_add8x8_idct_avx512;
661
    }
662
#endif //HAVE_MMX
663
664
#if HAVE_ALTIVEC
665
    if( cpu&X264_CPU_ALTIVEC )
666
    {
667
        dctf->sub4x4_dct    = x264_sub4x4_dct_altivec;
668
        dctf->sub8x8_dct    = x264_sub8x8_dct_altivec;
669
        dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
670
671
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
672
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec;
673
674
        dctf->add4x4_idct   = x264_add4x4_idct_altivec;
675
        dctf->add8x8_idct   = x264_add8x8_idct_altivec;
676
        dctf->add16x16_idct = x264_add16x16_idct_altivec;
677
678
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_altivec;
679
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_altivec;
680
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
681
682
        dctf->add8x8_idct8  = x264_add8x8_idct8_altivec;
683
        dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
684
    }
685
#endif
686
687
#if HAVE_ARMV6 || HAVE_AARCH64
688
    if( cpu&X264_CPU_NEON )
689
    {
690
        dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
691
        dctf->sub8x8_dct    = x264_sub8x8_dct_neon;
692
        dctf->sub16x16_dct  = x264_sub16x16_dct_neon;
693
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_neon;
694
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_neon;
695
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_neon;
696
        dctf->dct4x4dc      = x264_dct4x4dc_neon;
697
        dctf->idct4x4dc     = x264_idct4x4dc_neon;
698
699
        dctf->add4x4_idct   = x264_add4x4_idct_neon;
700
        dctf->add8x8_idct   = x264_add8x8_idct_neon;
701
        dctf->add16x16_idct = x264_add16x16_idct_neon;
702
703
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_neon;
704
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_neon;
705
706
        dctf->add8x8_idct8  = x264_add8x8_idct8_neon;
707
        dctf->add16x16_idct8= x264_add16x16_idct8_neon;
708
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_neon;
709
    }
710
#if HAVE_SVE
711
    if ( cpu&X264_CPU_SVE )
712
    {
713
        dctf->sub4x4_dct    = x264_sub4x4_dct_sve;
714
    }
715
#endif
716
#if HAVE_SVE2
717
    if ( cpu&X264_CPU_SVE2 )
718
    {
719
        dctf->add4x4_idct   = x264_add4x4_idct_sve2;
720
    }
721
#endif
722
#endif
723
724
#if HAVE_MSA
725
    if( cpu&X264_CPU_MSA )
726
    {
727
        dctf->sub4x4_dct       = x264_sub4x4_dct_msa;
728
        dctf->sub8x8_dct       = x264_sub8x8_dct_msa;
729
        dctf->sub16x16_dct     = x264_sub16x16_dct_msa;
730
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_msa;
731
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_msa;
732
        dctf->dct4x4dc         = x264_dct4x4dc_msa;
733
        dctf->idct4x4dc        = x264_idct4x4dc_msa;
734
        dctf->add4x4_idct      = x264_add4x4_idct_msa;
735
        dctf->add8x8_idct      = x264_add8x8_idct_msa;
736
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_msa;
737
        dctf->add16x16_idct    = x264_add16x16_idct_msa;
738
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_msa;
739
        dctf->add8x8_idct8     = x264_add8x8_idct8_msa;
740
        dctf->add16x16_idct8   = x264_add16x16_idct8_msa;
741
    }
742
#endif
743
744
#if HAVE_LSX
745
    if( cpu&X264_CPU_LSX )
746
    {
747
        dctf->sub4x4_dct       = x264_sub4x4_dct_lsx;
748
        dctf->add4x4_idct      = x264_add4x4_idct_lsx;
749
        dctf->dct4x4dc         = x264_dct4x4dc_lsx;
750
        dctf->idct4x4dc        = x264_idct4x4dc_lsx;
751
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_lsx;
752
        dctf->sub8x8_dct       = x264_sub8x8_dct_lsx;
753
        dctf->add8x8_idct      = x264_add8x8_idct_lsx;
754
        dctf->add8x8_idct8     = x264_add8x8_idct8_lsx;
755
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lsx;
756
        dctf->add16x16_idct    = x264_add16x16_idct_lsx;
757
        dctf->sub16x16_dct     = x264_sub16x16_dct_lsx;
758
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lsx;
759
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lsx;
760
    }
761
    if( cpu&X264_CPU_LASX )
762
    {
763
        dctf->sub8x8_dct       = x264_sub8x8_dct_lasx;
764
        dctf->sub16x16_dct     = x264_sub16x16_dct_lasx;
765
        dctf->add8x8_idct      = x264_add8x8_idct_lasx;
766
        dctf->add8x8_idct8     = x264_add8x8_idct8_lasx;
767
        dctf->add16x16_idct    = x264_add16x16_idct_lasx;
768
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lasx;
769
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lasx;
770
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lasx;
771
        dctf->dct4x4dc         = x264_dct4x4dc_lasx;
772
        dctf->idct4x4dc        = x264_idct4x4dc_lasx;
773
    }
774
#endif
775
776
#endif // HIGH_BIT_DEPTH
777
169
}
x264_8_dct_init
Line
Count
Source
478
169
{
479
169
    dctf->sub4x4_dct    = sub4x4_dct;
480
169
    dctf->add4x4_idct   = add4x4_idct;
481
482
169
    dctf->sub8x8_dct    = sub8x8_dct;
483
169
    dctf->sub8x8_dct_dc = sub8x8_dct_dc;
484
169
    dctf->add8x8_idct   = add8x8_idct;
485
169
    dctf->add8x8_idct_dc = add8x8_idct_dc;
486
487
169
    dctf->sub8x16_dct_dc = sub8x16_dct_dc;
488
489
169
    dctf->sub16x16_dct  = sub16x16_dct;
490
169
    dctf->add16x16_idct = add16x16_idct;
491
169
    dctf->add16x16_idct_dc = add16x16_idct_dc;
492
493
169
    dctf->sub8x8_dct8   = sub8x8_dct8;
494
169
    dctf->add8x8_idct8  = add8x8_idct8;
495
496
169
    dctf->sub16x16_dct8  = sub16x16_dct8;
497
169
    dctf->add16x16_idct8 = add16x16_idct8;
498
499
169
    dctf->dct4x4dc  = dct4x4dc;
500
169
    dctf->idct4x4dc = idct4x4dc;
501
502
169
    dctf->dct2x4dc = dct2x4dc;
503
504
#if HIGH_BIT_DEPTH
505
#if HAVE_MMX
506
    if( cpu&X264_CPU_MMX )
507
    {
508
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
509
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
510
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
511
    }
512
    if( cpu&X264_CPU_SSE2 )
513
    {
514
        dctf->add4x4_idct     = x264_add4x4_idct_sse2;
515
        dctf->dct4x4dc        = x264_dct4x4dc_sse2;
516
        dctf->idct4x4dc       = x264_idct4x4dc_sse2;
517
        dctf->dct2x4dc        = x264_dct2x4dc_sse2;
518
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse2;
519
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse2;
520
        dctf->add8x8_idct     = x264_add8x8_idct_sse2;
521
        dctf->add16x16_idct   = x264_add16x16_idct_sse2;
522
        dctf->add8x8_idct8    = x264_add8x8_idct8_sse2;
523
        dctf->add16x16_idct8    = x264_add16x16_idct8_sse2;
524
        dctf->sub8x8_dct_dc   = x264_sub8x8_dct_dc_sse2;
525
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_sse2;
526
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_sse2;
527
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_sse2;
528
    }
529
    if( cpu&X264_CPU_SSE4 )
530
    {
531
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_sse4;
532
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_sse4;
533
    }
534
    if( cpu&X264_CPU_AVX )
535
    {
536
        dctf->add4x4_idct     = x264_add4x4_idct_avx;
537
        dctf->dct4x4dc        = x264_dct4x4dc_avx;
538
        dctf->idct4x4dc       = x264_idct4x4dc_avx;
539
        dctf->dct2x4dc        = x264_dct2x4dc_avx;
540
        dctf->sub8x8_dct8     = x264_sub8x8_dct8_avx;
541
        dctf->sub16x16_dct8   = x264_sub16x16_dct8_avx;
542
        dctf->add8x8_idct     = x264_add8x8_idct_avx;
543
        dctf->add16x16_idct   = x264_add16x16_idct_avx;
544
        dctf->add8x8_idct8    = x264_add8x8_idct8_avx;
545
        dctf->add16x16_idct8  = x264_add16x16_idct8_avx;
546
        dctf->add8x8_idct_dc  = x264_add8x8_idct_dc_avx;
547
        dctf->sub8x16_dct_dc  = x264_sub8x16_dct_dc_avx;
548
        dctf->add16x16_idct_dc= x264_add16x16_idct_dc_avx;
549
    }
550
#endif // HAVE_MMX
551
#else // !HIGH_BIT_DEPTH
552
#if HAVE_MMX
553
    if( cpu&X264_CPU_MMX )
554
    {
555
        dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
556
        dctf->add4x4_idct   = x264_add4x4_idct_mmx;
557
        dctf->idct4x4dc     = x264_idct4x4dc_mmx;
558
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
559
560
#if !ARCH_X86_64
561
        dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
562
        dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
563
        dctf->add8x8_idct   = x264_add8x8_idct_mmx;
564
        dctf->add16x16_idct = x264_add16x16_idct_mmx;
565
566
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_mmx;
567
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
568
        dctf->add8x8_idct8  = x264_add8x8_idct8_mmx;
569
        dctf->add16x16_idct8= x264_add16x16_idct8_mmx;
570
#endif
571
    }
572
573
    if( cpu&X264_CPU_MMX2 )
574
    {
575
        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
576
        dctf->dct2x4dc         = x264_dct2x4dc_mmx2;
577
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
578
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
579
    }
580
581
    if( cpu&X264_CPU_SSE2 )
582
    {
583
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_sse2;
584
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_sse2;
585
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_sse2;
586
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_sse2;
587
        dctf->add8x8_idct8  = x264_add8x8_idct8_sse2;
588
        dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
589
590
        if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
591
        {
592
            dctf->sub8x8_dct    = x264_sub8x8_dct_sse2;
593
            dctf->sub16x16_dct  = x264_sub16x16_dct_sse2;
594
            dctf->add8x8_idct   = x264_add8x8_idct_sse2;
595
            dctf->add16x16_idct = x264_add16x16_idct_sse2;
596
            dctf->add16x16_idct_dc = x264_add16x16_idct_dc_sse2;
597
        }
598
    }
599
600
    if( (cpu&X264_CPU_SSSE3) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
601
    {
602
        dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_ssse3;
603
        if( !(cpu&X264_CPU_SLOW_ATOM) )
604
        {
605
            dctf->sub4x4_dct    = x264_sub4x4_dct_ssse3;
606
            dctf->sub8x8_dct    = x264_sub8x8_dct_ssse3;
607
            dctf->sub16x16_dct  = x264_sub16x16_dct_ssse3;
608
            dctf->sub8x8_dct8   = x264_sub8x8_dct8_ssse3;
609
            dctf->sub16x16_dct8 = x264_sub16x16_dct8_ssse3;
610
            if( !(cpu&X264_CPU_SLOW_PSHUFB) )
611
            {
612
                dctf->add8x8_idct_dc = x264_add8x8_idct_dc_ssse3;
613
                dctf->add16x16_idct_dc = x264_add16x16_idct_dc_ssse3;
614
            }
615
        }
616
    }
617
618
    if( cpu&X264_CPU_SSE4 )
619
        dctf->add4x4_idct   = x264_add4x4_idct_sse4;
620
621
    if( cpu&X264_CPU_AVX )
622
    {
623
        dctf->add4x4_idct      = x264_add4x4_idct_avx;
624
        dctf->add8x8_idct      = x264_add8x8_idct_avx;
625
        dctf->add16x16_idct    = x264_add16x16_idct_avx;
626
        dctf->add8x8_idct8     = x264_add8x8_idct8_avx;
627
        dctf->add16x16_idct8   = x264_add16x16_idct8_avx;
628
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx;
629
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx;
630
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx;
631
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_avx;
632
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx;
633
    }
634
635
    if( cpu&X264_CPU_XOP )
636
    {
637
        dctf->sub8x8_dct       = x264_sub8x8_dct_xop;
638
        dctf->sub16x16_dct     = x264_sub16x16_dct_xop;
639
    }
640
641
    if( cpu&X264_CPU_AVX2 )
642
    {
643
        dctf->add8x8_idct      = x264_add8x8_idct_avx2;
644
        dctf->add16x16_idct    = x264_add16x16_idct_avx2;
645
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx2;
646
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx2;
647
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_avx2;
648
#if ARCH_X86_64
649
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_avx2;
650
#endif
651
    }
652
653
    if( cpu&X264_CPU_AVX512 )
654
    {
655
        dctf->sub4x4_dct       = x264_sub4x4_dct_avx512;
656
        dctf->sub8x8_dct       = x264_sub8x8_dct_avx512;
657
        dctf->sub16x16_dct     = x264_sub16x16_dct_avx512;
658
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_avx512;
659
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_avx512;
660
        dctf->add8x8_idct      = x264_add8x8_idct_avx512;
661
    }
662
#endif //HAVE_MMX
663
664
#if HAVE_ALTIVEC
665
    if( cpu&X264_CPU_ALTIVEC )
666
    {
667
        dctf->sub4x4_dct    = x264_sub4x4_dct_altivec;
668
        dctf->sub8x8_dct    = x264_sub8x8_dct_altivec;
669
        dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
670
671
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
672
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec;
673
674
        dctf->add4x4_idct   = x264_add4x4_idct_altivec;
675
        dctf->add8x8_idct   = x264_add8x8_idct_altivec;
676
        dctf->add16x16_idct = x264_add16x16_idct_altivec;
677
678
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_altivec;
679
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_altivec;
680
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_altivec;
681
682
        dctf->add8x8_idct8  = x264_add8x8_idct8_altivec;
683
        dctf->add16x16_idct8= x264_add16x16_idct8_altivec;
684
    }
685
#endif
686
687
#if HAVE_ARMV6 || HAVE_AARCH64
688
    if( cpu&X264_CPU_NEON )
689
    {
690
        dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
691
        dctf->sub8x8_dct    = x264_sub8x8_dct_neon;
692
        dctf->sub16x16_dct  = x264_sub16x16_dct_neon;
693
        dctf->add8x8_idct_dc = x264_add8x8_idct_dc_neon;
694
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_neon;
695
        dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_neon;
696
        dctf->dct4x4dc      = x264_dct4x4dc_neon;
697
        dctf->idct4x4dc     = x264_idct4x4dc_neon;
698
699
        dctf->add4x4_idct   = x264_add4x4_idct_neon;
700
        dctf->add8x8_idct   = x264_add8x8_idct_neon;
701
        dctf->add16x16_idct = x264_add16x16_idct_neon;
702
703
        dctf->sub8x8_dct8   = x264_sub8x8_dct8_neon;
704
        dctf->sub16x16_dct8 = x264_sub16x16_dct8_neon;
705
706
        dctf->add8x8_idct8  = x264_add8x8_idct8_neon;
707
        dctf->add16x16_idct8= x264_add16x16_idct8_neon;
708
        dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_neon;
709
    }
710
#if HAVE_SVE
711
    if ( cpu&X264_CPU_SVE )
712
    {
713
        dctf->sub4x4_dct    = x264_sub4x4_dct_sve;
714
    }
715
#endif
716
#if HAVE_SVE2
717
    if ( cpu&X264_CPU_SVE2 )
718
    {
719
        dctf->add4x4_idct   = x264_add4x4_idct_sve2;
720
    }
721
#endif
722
#endif
723
724
#if HAVE_MSA
725
    if( cpu&X264_CPU_MSA )
726
    {
727
        dctf->sub4x4_dct       = x264_sub4x4_dct_msa;
728
        dctf->sub8x8_dct       = x264_sub8x8_dct_msa;
729
        dctf->sub16x16_dct     = x264_sub16x16_dct_msa;
730
        dctf->sub8x8_dct_dc    = x264_sub8x8_dct_dc_msa;
731
        dctf->sub8x16_dct_dc   = x264_sub8x16_dct_dc_msa;
732
        dctf->dct4x4dc         = x264_dct4x4dc_msa;
733
        dctf->idct4x4dc        = x264_idct4x4dc_msa;
734
        dctf->add4x4_idct      = x264_add4x4_idct_msa;
735
        dctf->add8x8_idct      = x264_add8x8_idct_msa;
736
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_msa;
737
        dctf->add16x16_idct    = x264_add16x16_idct_msa;
738
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_msa;
739
        dctf->add8x8_idct8     = x264_add8x8_idct8_msa;
740
        dctf->add16x16_idct8   = x264_add16x16_idct8_msa;
741
    }
742
#endif
743
744
#if HAVE_LSX
745
    if( cpu&X264_CPU_LSX )
746
    {
747
        dctf->sub4x4_dct       = x264_sub4x4_dct_lsx;
748
        dctf->add4x4_idct      = x264_add4x4_idct_lsx;
749
        dctf->dct4x4dc         = x264_dct4x4dc_lsx;
750
        dctf->idct4x4dc        = x264_idct4x4dc_lsx;
751
        dctf->sub8x8_dct8      = x264_sub8x8_dct8_lsx;
752
        dctf->sub8x8_dct       = x264_sub8x8_dct_lsx;
753
        dctf->add8x8_idct      = x264_add8x8_idct_lsx;
754
        dctf->add8x8_idct8     = x264_add8x8_idct8_lsx;
755
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lsx;
756
        dctf->add16x16_idct    = x264_add16x16_idct_lsx;
757
        dctf->sub16x16_dct     = x264_sub16x16_dct_lsx;
758
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lsx;
759
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lsx;
760
    }
761
    if( cpu&X264_CPU_LASX )
762
    {
763
        dctf->sub8x8_dct       = x264_sub8x8_dct_lasx;
764
        dctf->sub16x16_dct     = x264_sub16x16_dct_lasx;
765
        dctf->add8x8_idct      = x264_add8x8_idct_lasx;
766
        dctf->add8x8_idct8     = x264_add8x8_idct8_lasx;
767
        dctf->add16x16_idct    = x264_add16x16_idct_lasx;
768
        dctf->sub16x16_dct8    = x264_sub16x16_dct8_lasx;
769
        dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_lasx;
770
        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_lasx;
771
        dctf->dct4x4dc         = x264_dct4x4dc_lasx;
772
        dctf->idct4x4dc        = x264_idct4x4dc_lasx;
773
    }
774
#endif
775
776
169
#endif // HIGH_BIT_DEPTH
777
169
}
Unexecuted instantiation: x264_10_dct_init
778
779
780
0
#define ZIG(i,y,x) level[i] = dct[x*8+y];
781
#define ZIGZAG8_FRAME\
782
0
    ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
783
0
    ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
784
0
    ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)\
785
0
    ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)\
786
0
    ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)\
787
0
    ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)\
788
0
    ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)\
789
0
    ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)\
790
0
    ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)\
791
0
    ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)\
792
0
    ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)\
793
0
    ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)\
794
0
    ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)\
795
0
    ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)\
796
0
    ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)\
797
0
    ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)\
798
799
#define ZIGZAG8_FIELD\
800
0
    ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)\
801
0
    ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)\
802
0
    ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)\
803
0
    ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)\
804
0
    ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)\
805
0
    ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)\
806
0
    ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)\
807
0
    ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)\
808
0
    ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)\
809
0
    ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)\
810
0
    ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)\
811
0
    ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)\
812
0
    ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)\
813
0
    ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)\
814
0
    ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)\
815
0
    ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
816
817
#define ZIGZAG4_FRAME\
818
362k
    ZIGDC( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
819
362k
    ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
820
362k
    ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)\
821
362k
    ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
822
823
#define ZIGZAG4_FIELD\
824
0
    ZIGDC( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)\
825
0
    ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)\
826
0
    ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\
827
0
    ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
828
829
static void zigzag_scan_8x8_frame( dctcoef level[64], dctcoef dct[64] )
830
0
{
831
0
    ZIGZAG8_FRAME
832
0
}
833
834
static void zigzag_scan_8x8_field( dctcoef level[64], dctcoef dct[64] )
835
0
{
836
0
    ZIGZAG8_FIELD
837
0
}
838
839
#undef ZIG
840
237k
#define ZIG(i,y,x) level[i] = dct[x*4+y];
841
22.7k
#define ZIGDC(i,y,x) ZIG(i,y,x)
842
843
static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
844
14.8k
{
845
14.8k
    ZIGZAG4_FRAME
846
14.8k
}
847
848
static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
849
0
{
850
0
    memcpy( level, dct, 2 * sizeof(dctcoef) );
851
0
    ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
852
0
    memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
853
0
}
854
855
#undef ZIG
856
5.21M
#define ZIG(i,y,x) {\
857
5.21M
    int oe = x+y*FENC_STRIDE;\
858
5.21M
    int od = x+y*FDEC_STRIDE;\
859
5.21M
    level[i] = p_src[oe] - p_dst[od];\
860
5.21M
    nz |= level[i];\
861
5.21M
}
862
#define COPY4x4\
863
347k
    CPPIXEL_X4( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
864
347k
    CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
865
347k
    CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
866
347k
    CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
867
0
#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
868
#define COPY8x8\
869
0
    CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
870
0
    CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
871
0
    CPPIXEL_X8( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
872
0
    CPPIXEL_X8( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );\
873
0
    CPPIXEL_X8( p_dst+4*FDEC_STRIDE, p_src+4*FENC_STRIDE );\
874
0
    CPPIXEL_X8( p_dst+5*FDEC_STRIDE, p_src+5*FENC_STRIDE );\
875
0
    CPPIXEL_X8( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
876
0
    CPPIXEL_X8( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
877
878
static int zigzag_sub_4x4_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst )
879
7.95k
{
880
7.95k
    int nz = 0;
881
7.95k
    ZIGZAG4_FRAME
882
7.95k
    COPY4x4
883
7.95k
    return !!nz;
884
7.95k
}
885
886
static int zigzag_sub_4x4_field( dctcoef level[16], const pixel *p_src, pixel *p_dst )
887
0
{
888
0
    int nz = 0;
889
0
    ZIGZAG4_FIELD
890
0
    COPY4x4
891
0
    return !!nz;
892
0
}
893
894
#undef ZIGDC
895
339k
#define ZIGDC(i,y,x) {\
896
339k
    int oe = x+y*FENC_STRIDE;\
897
339k
    int od = x+y*FDEC_STRIDE;\
898
339k
    *dc = p_src[oe] - p_dst[od];\
899
339k
    level[0] = 0;\
900
339k
}
901
902
static int zigzag_sub_4x4ac_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
903
339k
{
904
339k
    int nz = 0;
905
339k
    ZIGZAG4_FRAME
906
339k
    COPY4x4
907
339k
    return !!nz;
908
339k
}
909
910
static int zigzag_sub_4x4ac_field( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
911
0
{
912
0
    int nz = 0;
913
0
    ZIGZAG4_FIELD
914
0
    COPY4x4
915
0
    return !!nz;
916
0
}
917
918
static int zigzag_sub_8x8_frame( dctcoef level[64], const pixel *p_src, pixel *p_dst )
919
0
{
920
0
    int nz = 0;
921
0
    ZIGZAG8_FRAME
922
0
    COPY8x8
923
0
    return !!nz;
924
0
}
925
static int zigzag_sub_8x8_field( dctcoef level[64], const pixel *p_src, pixel *p_dst )
926
0
{
927
0
    int nz = 0;
928
0
    ZIGZAG8_FIELD
929
0
    COPY8x8
930
0
    return !!nz;
931
0
}
932
933
#undef ZIG
934
#undef COPY4x4
935
936
static void zigzag_interleave_8x8_cavlc( dctcoef *dst, dctcoef *src, uint8_t *nnz )
937
0
{
938
0
    for( int i = 0; i < 4; i++ )
939
0
    {
940
0
        int nz = 0;
941
0
        for( int j = 0; j < 16; j++ )
942
0
        {
943
0
            nz |= src[i+j*4];
944
0
            dst[i*16+j] = src[i+j*4];
945
0
        }
946
0
        nnz[(i&1) + (i>>1)*8] = !!nz;
947
0
    }
948
0
}
949
950
void x264_zigzag_init( uint32_t cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced )
951
169
{
952
169
    pf_interlaced->scan_8x8   = zigzag_scan_8x8_field;
953
169
    pf_progressive->scan_8x8  = zigzag_scan_8x8_frame;
954
169
    pf_interlaced->scan_4x4   = zigzag_scan_4x4_field;
955
169
    pf_progressive->scan_4x4  = zigzag_scan_4x4_frame;
956
169
    pf_interlaced->sub_8x8    = zigzag_sub_8x8_field;
957
169
    pf_progressive->sub_8x8   = zigzag_sub_8x8_frame;
958
169
    pf_interlaced->sub_4x4    = zigzag_sub_4x4_field;
959
169
    pf_progressive->sub_4x4   = zigzag_sub_4x4_frame;
960
169
    pf_interlaced->sub_4x4ac  = zigzag_sub_4x4ac_field;
961
169
    pf_progressive->sub_4x4ac = zigzag_sub_4x4ac_frame;
962
963
#if HIGH_BIT_DEPTH
964
#if HAVE_MMX
965
    if( cpu&X264_CPU_SSE2 )
966
    {
967
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse2;
968
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
969
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
970
    }
971
    if( cpu&X264_CPU_SSE4 )
972
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
973
    if( cpu&X264_CPU_AVX )
974
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
975
#if ARCH_X86_64
976
    if( cpu&X264_CPU_AVX )
977
    {
978
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
979
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
980
    }
981
#endif // ARCH_X86_64
982
    if( cpu&X264_CPU_AVX512 )
983
    {
984
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
985
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
986
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
987
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
988
    }
989
#endif // HAVE_MMX
990
#else
991
#if HAVE_MMX
992
    if( cpu&X264_CPU_MMX )
993
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
994
    if( cpu&X264_CPU_MMX2 )
995
    {
996
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_mmx2;
997
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
998
    }
999
    if( cpu&X264_CPU_SSE )
1000
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse;
1001
    if( cpu&X264_CPU_SSE2_IS_FAST )
1002
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
1003
    if( cpu&X264_CPU_SSSE3 )
1004
    {
1005
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_ssse3;
1006
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_ssse3;
1007
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_ssse3;
1008
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
1009
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
1010
        if( !(cpu&X264_CPU_SLOW_SHUFFLE) )
1011
            pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
1012
    }
1013
    if( cpu&X264_CPU_AVX )
1014
    {
1015
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1016
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1017
#if ARCH_X86_64
1018
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1019
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1020
#endif
1021
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
1022
    }
1023
    if( cpu&X264_CPU_XOP )
1024
    {
1025
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_xop;
1026
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_xop;
1027
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_xop;
1028
    }
1029
    if( cpu&X264_CPU_AVX512 )
1030
    {
1031
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
1032
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
1033
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
1034
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
1035
    }
1036
#endif // HAVE_MMX
1037
#if HAVE_ALTIVEC
1038
    if( cpu&X264_CPU_ALTIVEC )
1039
    {
1040
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_altivec;
1041
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
1042
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
1043
    }
1044
#endif
1045
#if HAVE_ARMV6 || HAVE_AARCH64
1046
    if( cpu&X264_CPU_NEON )
1047
    {
1048
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
1049
#if HAVE_AARCH64
1050
        pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
1051
        pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
1052
        pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
1053
        pf_interlaced->sub_4x4ac  = x264_zigzag_sub_4x4ac_field_neon;
1054
        pf_interlaced->sub_8x8    = x264_zigzag_sub_8x8_field_neon;
1055
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_neon;
1056
        pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
1057
        pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
1058
        pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
1059
#endif // HAVE_AARCH64
1060
    }
1061
#endif // HAVE_ARMV6 || HAVE_AARCH64
1062
#endif // HIGH_BIT_DEPTH
1063
1064
169
    pf_interlaced->interleave_8x8_cavlc =
1065
169
    pf_progressive->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
1066
#if HAVE_MMX
1067
#if HIGH_BIT_DEPTH
1068
    if( cpu&X264_CPU_SSE2 )
1069
    {
1070
        pf_interlaced->interleave_8x8_cavlc =
1071
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1072
    }
1073
    if( cpu&X264_CPU_AVX )
1074
    {
1075
        pf_interlaced->interleave_8x8_cavlc =
1076
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1077
    }
1078
    if( cpu&X264_CPU_AVX512 )
1079
    {
1080
        pf_interlaced->interleave_8x8_cavlc =
1081
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1082
    }
1083
#else
1084
    if( cpu&X264_CPU_MMX )
1085
    {
1086
        pf_interlaced->interleave_8x8_cavlc =
1087
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
1088
    }
1089
    if( (cpu&X264_CPU_SSE2) && !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SSE2_IS_SLOW)) )
1090
    {
1091
        pf_interlaced->interleave_8x8_cavlc =
1092
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1093
    }
1094
1095
    if( cpu&X264_CPU_AVX )
1096
    {
1097
        pf_interlaced->interleave_8x8_cavlc =
1098
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1099
    }
1100
1101
    if( cpu&X264_CPU_AVX2 )
1102
    {
1103
        pf_interlaced->interleave_8x8_cavlc =
1104
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx2;
1105
    }
1106
    if( cpu&X264_CPU_AVX512 )
1107
    {
1108
        pf_interlaced->interleave_8x8_cavlc =
1109
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1110
    }
1111
#endif // HIGH_BIT_DEPTH
1112
#endif
1113
#if !HIGH_BIT_DEPTH
1114
#if HAVE_AARCH64
1115
    if( cpu&X264_CPU_NEON )
1116
    {
1117
        pf_interlaced->interleave_8x8_cavlc =
1118
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
1119
    }
1120
#if HAVE_SVE
1121
    if( cpu&X264_CPU_SVE )
1122
    {
1123
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_sve;
1124
    }
1125
#endif
1126
#endif // HAVE_AARCH64
1127
1128
#if HAVE_ALTIVEC
1129
    if( cpu&X264_CPU_ALTIVEC )
1130
    {
1131
        pf_interlaced->interleave_8x8_cavlc =
1132
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_altivec;
1133
    }
1134
#endif // HAVE_ALTIVEC
1135
1136
#if HAVE_MSA
1137
    if( cpu&X264_CPU_MSA )
1138
    {
1139
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_msa;
1140
    }
1141
#endif
1142
1143
#if HAVE_LSX
1144
    if( cpu&X264_CPU_LASX )
1145
    {
1146
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_lasx;
1147
    }
1148
#endif
1149
#endif // !HIGH_BIT_DEPTH
1150
169
}
x264_8_zigzag_init
Line
Count
Source
951
169
{
952
169
    pf_interlaced->scan_8x8   = zigzag_scan_8x8_field;
953
169
    pf_progressive->scan_8x8  = zigzag_scan_8x8_frame;
954
169
    pf_interlaced->scan_4x4   = zigzag_scan_4x4_field;
955
169
    pf_progressive->scan_4x4  = zigzag_scan_4x4_frame;
956
169
    pf_interlaced->sub_8x8    = zigzag_sub_8x8_field;
957
169
    pf_progressive->sub_8x8   = zigzag_sub_8x8_frame;
958
169
    pf_interlaced->sub_4x4    = zigzag_sub_4x4_field;
959
169
    pf_progressive->sub_4x4   = zigzag_sub_4x4_frame;
960
169
    pf_interlaced->sub_4x4ac  = zigzag_sub_4x4ac_field;
961
169
    pf_progressive->sub_4x4ac = zigzag_sub_4x4ac_frame;
962
963
#if HIGH_BIT_DEPTH
964
#if HAVE_MMX
965
    if( cpu&X264_CPU_SSE2 )
966
    {
967
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse2;
968
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
969
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
970
    }
971
    if( cpu&X264_CPU_SSE4 )
972
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
973
    if( cpu&X264_CPU_AVX )
974
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
975
#if ARCH_X86_64
976
    if( cpu&X264_CPU_AVX )
977
    {
978
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
979
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
980
    }
981
#endif // ARCH_X86_64
982
    if( cpu&X264_CPU_AVX512 )
983
    {
984
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
985
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
986
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
987
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
988
    }
989
#endif // HAVE_MMX
990
#else
991
#if HAVE_MMX
992
    if( cpu&X264_CPU_MMX )
993
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
994
    if( cpu&X264_CPU_MMX2 )
995
    {
996
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_mmx2;
997
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmx2;
998
    }
999
    if( cpu&X264_CPU_SSE )
1000
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_sse;
1001
    if( cpu&X264_CPU_SSE2_IS_FAST )
1002
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
1003
    if( cpu&X264_CPU_SSSE3 )
1004
    {
1005
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_ssse3;
1006
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_ssse3;
1007
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_ssse3;
1008
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
1009
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
1010
        if( !(cpu&X264_CPU_SLOW_SHUFFLE) )
1011
            pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
1012
    }
1013
    if( cpu&X264_CPU_AVX )
1014
    {
1015
        pf_interlaced->sub_4x4   = x264_zigzag_sub_4x4_field_avx;
1016
        pf_progressive->sub_4x4  = x264_zigzag_sub_4x4_frame_avx;
1017
#if ARCH_X86_64
1018
        pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
1019
        pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
1020
#endif
1021
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
1022
    }
1023
    if( cpu&X264_CPU_XOP )
1024
    {
1025
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_xop;
1026
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_xop;
1027
        pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_xop;
1028
    }
1029
    if( cpu&X264_CPU_AVX512 )
1030
    {
1031
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
1032
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
1033
        pf_interlaced->scan_8x8  = x264_zigzag_scan_8x8_field_avx512;
1034
        pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
1035
    }
1036
#endif // HAVE_MMX
1037
#if HAVE_ALTIVEC
1038
    if( cpu&X264_CPU_ALTIVEC )
1039
    {
1040
        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_altivec;
1041
        pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
1042
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
1043
    }
1044
#endif
1045
#if HAVE_ARMV6 || HAVE_AARCH64
1046
    if( cpu&X264_CPU_NEON )
1047
    {
1048
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
1049
#if HAVE_AARCH64
1050
        pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
1051
        pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
1052
        pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
1053
        pf_interlaced->sub_4x4ac  = x264_zigzag_sub_4x4ac_field_neon;
1054
        pf_interlaced->sub_8x8    = x264_zigzag_sub_8x8_field_neon;
1055
        pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_neon;
1056
        pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
1057
        pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
1058
        pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
1059
#endif // HAVE_AARCH64
1060
    }
1061
#endif // HAVE_ARMV6 || HAVE_AARCH64
1062
169
#endif // HIGH_BIT_DEPTH
1063
1064
169
    pf_interlaced->interleave_8x8_cavlc =
1065
169
    pf_progressive->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
1066
#if HAVE_MMX
1067
#if HIGH_BIT_DEPTH
1068
    if( cpu&X264_CPU_SSE2 )
1069
    {
1070
        pf_interlaced->interleave_8x8_cavlc =
1071
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1072
    }
1073
    if( cpu&X264_CPU_AVX )
1074
    {
1075
        pf_interlaced->interleave_8x8_cavlc =
1076
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1077
    }
1078
    if( cpu&X264_CPU_AVX512 )
1079
    {
1080
        pf_interlaced->interleave_8x8_cavlc =
1081
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1082
    }
1083
#else
1084
    if( cpu&X264_CPU_MMX )
1085
    {
1086
        pf_interlaced->interleave_8x8_cavlc =
1087
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
1088
    }
1089
    if( (cpu&X264_CPU_SSE2) && !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SSE2_IS_SLOW)) )
1090
    {
1091
        pf_interlaced->interleave_8x8_cavlc =
1092
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
1093
    }
1094
1095
    if( cpu&X264_CPU_AVX )
1096
    {
1097
        pf_interlaced->interleave_8x8_cavlc =
1098
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
1099
    }
1100
1101
    if( cpu&X264_CPU_AVX2 )
1102
    {
1103
        pf_interlaced->interleave_8x8_cavlc =
1104
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx2;
1105
    }
1106
    if( cpu&X264_CPU_AVX512 )
1107
    {
1108
        pf_interlaced->interleave_8x8_cavlc =
1109
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx512;
1110
    }
1111
#endif // HIGH_BIT_DEPTH
1112
#endif
1113
169
#if !HIGH_BIT_DEPTH
1114
#if HAVE_AARCH64
1115
    if( cpu&X264_CPU_NEON )
1116
    {
1117
        pf_interlaced->interleave_8x8_cavlc =
1118
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
1119
    }
1120
#if HAVE_SVE
1121
    if( cpu&X264_CPU_SVE )
1122
    {
1123
        pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_sve;
1124
    }
1125
#endif
1126
#endif // HAVE_AARCH64
1127
1128
#if HAVE_ALTIVEC
1129
    if( cpu&X264_CPU_ALTIVEC )
1130
    {
1131
        pf_interlaced->interleave_8x8_cavlc =
1132
        pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_altivec;
1133
    }
1134
#endif // HAVE_ALTIVEC
1135
1136
#if HAVE_MSA
1137
    if( cpu&X264_CPU_MSA )
1138
    {
1139
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_msa;
1140
    }
1141
#endif
1142
1143
#if HAVE_LSX
1144
    if( cpu&X264_CPU_LASX )
1145
    {
1146
        pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_lasx;
1147
    }
1148
#endif
1149
169
#endif // !HIGH_BIT_DEPTH
1150
169
}
Unexecuted instantiation: x264_10_zigzag_init