Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * quant.c: quantization and level-run |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2005-2025 x264 project |
5 | | * |
6 | | * Authors: Loren Merritt <lorenm@u.washington.edu> |
7 | | * Fiona Glaser <fiona@x264.com> |
8 | | * Christian Heine <sennindemokrit@gmx.net> |
9 | | * Henrik Gramner <henrik@gramner.com> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify |
12 | | * it under the terms of the GNU General Public License as published by |
13 | | * the Free Software Foundation; either version 2 of the License, or |
14 | | * (at your option) any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU General Public License |
22 | | * along with this program; if not, write to the Free Software |
23 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
24 | | * |
25 | | * This program is also available under a commercial proprietary license. |
26 | | * For more information, contact us at licensing@x264.com. |
27 | | *****************************************************************************/ |
28 | | |
29 | | #include "common.h" |
30 | | |
31 | | #if HAVE_MMX |
32 | | #include "x86/quant.h" |
33 | | #endif |
34 | | #if HAVE_ALTIVEC |
35 | | # include "ppc/quant.h" |
36 | | #endif |
37 | | #if HAVE_ARMV6 |
38 | | # include "arm/quant.h" |
39 | | #endif |
40 | | #if HAVE_AARCH64 |
41 | | # include "aarch64/quant.h" |
42 | | #endif |
43 | | #if HAVE_MSA |
44 | | # include "mips/quant.h" |
45 | | #endif |
46 | | #if HAVE_LSX |
47 | | # include "loongarch/quant.h" |
48 | | #endif |
49 | | |
50 | 0 | #define QUANT_ONE( coef, mf, f ) \ |
51 | 0 | { \ |
52 | 0 | if( (coef) > 0 ) \ |
53 | 0 | (coef) = ((f) + (uint32_t)(coef)) * (mf) >> 16; \ |
54 | 0 | else \ |
55 | 0 | (coef) = -(int32_t)(((f) + (uint32_t)(-coef)) * (mf) >> 16); \ |
56 | 0 | nz |= (coef); \ |
57 | 0 | } |
58 | | |
59 | | static int quant_8x8( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ) |
60 | 0 | { |
61 | 0 | int nz = 0; |
62 | 0 | for( int i = 0; i < 64; i++ ) |
63 | 0 | QUANT_ONE( dct[i], mf[i], bias[i] ); |
64 | 0 | return !!nz; |
65 | 0 | } |
66 | | |
67 | | static int quant_4x4( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ) |
68 | 0 | { |
69 | 0 | int nz = 0; |
70 | 0 | for( int i = 0; i < 16; i++ ) |
71 | 0 | QUANT_ONE( dct[i], mf[i], bias[i] ); |
72 | 0 | return !!nz; |
73 | 0 | } |
74 | | |
75 | | static int quant_4x4x4( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] ) |
76 | 0 | { |
77 | 0 | int nza = 0; |
78 | 0 | for( int j = 0; j < 4; j++ ) |
79 | 0 | { |
80 | 0 | int nz = 0; |
81 | 0 | for( int i = 0; i < 16; i++ ) |
82 | 0 | QUANT_ONE( dct[j][i], mf[i], bias[i] ); |
83 | 0 | nza |= (!!nz)<<j; |
84 | 0 | } |
85 | 0 | return nza; |
86 | 0 | } |
87 | | |
88 | | static int quant_4x4_dc( dctcoef dct[16], int mf, int bias ) |
89 | 0 | { |
90 | 0 | int nz = 0; |
91 | 0 | for( int i = 0; i < 16; i++ ) |
92 | 0 | QUANT_ONE( dct[i], mf, bias ); |
93 | 0 | return !!nz; |
94 | 0 | } |
95 | | |
96 | | static int quant_2x2_dc( dctcoef dct[4], int mf, int bias ) |
97 | 0 | { |
98 | 0 | int nz = 0; |
99 | 0 | QUANT_ONE( dct[0], mf, bias ); |
100 | 0 | QUANT_ONE( dct[1], mf, bias ); |
101 | 0 | QUANT_ONE( dct[2], mf, bias ); |
102 | 0 | QUANT_ONE( dct[3], mf, bias ); |
103 | 0 | return !!nz; |
104 | 0 | } |
105 | | |
106 | | #define DEQUANT_SHL( x ) \ |
107 | 0 | dct[x] = ( dct[x] * dequant_mf[i_mf][x] ) * (1 << i_qbits) |
108 | | |
109 | | #define DEQUANT_SHR( x ) \ |
110 | 0 | dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits) |
111 | | |
112 | | static void dequant_4x4( dctcoef dct[16], int dequant_mf[6][16], int i_qp ) |
113 | 0 | { |
114 | 0 | const int i_mf = i_qp%6; |
115 | 0 | const int i_qbits = i_qp/6 - 4; |
116 | |
|
117 | 0 | if( i_qbits >= 0 ) |
118 | 0 | { |
119 | 0 | for( int i = 0; i < 16; i++ ) |
120 | 0 | DEQUANT_SHL( i ); |
121 | 0 | } |
122 | 0 | else |
123 | 0 | { |
124 | 0 | const int f = 1 << (-i_qbits-1); |
125 | 0 | for( int i = 0; i < 16; i++ ) |
126 | 0 | DEQUANT_SHR( i ); |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | static void dequant_8x8( dctcoef dct[64], int dequant_mf[6][64], int i_qp ) |
131 | 0 | { |
132 | 0 | const int i_mf = i_qp%6; |
133 | 0 | const int i_qbits = i_qp/6 - 6; |
134 | |
|
135 | 0 | if( i_qbits >= 0 ) |
136 | 0 | { |
137 | 0 | for( int i = 0; i < 64; i++ ) |
138 | 0 | DEQUANT_SHL( i ); |
139 | 0 | } |
140 | 0 | else |
141 | 0 | { |
142 | 0 | const int f = 1 << (-i_qbits-1); |
143 | 0 | for( int i = 0; i < 64; i++ ) |
144 | 0 | DEQUANT_SHR( i ); |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | | static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp ) |
149 | 0 | { |
150 | 0 | const int i_qbits = i_qp/6 - 6; |
151 | |
|
152 | 0 | if( i_qbits >= 0 ) |
153 | 0 | { |
154 | 0 | const int i_dmf = dequant_mf[i_qp%6][0] << i_qbits; |
155 | 0 | for( int i = 0; i < 16; i++ ) |
156 | 0 | dct[i] *= i_dmf; |
157 | 0 | } |
158 | 0 | else |
159 | 0 | { |
160 | 0 | const int i_dmf = dequant_mf[i_qp%6][0]; |
161 | 0 | const int f = 1 << (-i_qbits-1); |
162 | 0 | for( int i = 0; i < 16; i++ ) |
163 | 0 | dct[i] = ( dct[i] * i_dmf + f ) >> (-i_qbits); |
164 | 0 | } |
165 | 0 | } |
166 | | |
167 | | #define IDCT_DEQUANT_2X4_START \ |
168 | 0 | int a0 = dct[0] + dct[1]; \ |
169 | 0 | int a1 = dct[2] + dct[3]; \ |
170 | 0 | int a2 = dct[4] + dct[5]; \ |
171 | 0 | int a3 = dct[6] + dct[7]; \ |
172 | 0 | int a4 = dct[0] - dct[1]; \ |
173 | 0 | int a5 = dct[2] - dct[3]; \ |
174 | 0 | int a6 = dct[4] - dct[5]; \ |
175 | 0 | int a7 = dct[6] - dct[7]; \ |
176 | 0 | int b0 = a0 + a1; \ |
177 | 0 | int b1 = a2 + a3; \ |
178 | 0 | int b2 = a4 + a5; \ |
179 | 0 | int b3 = a6 + a7; \ |
180 | 0 | int b4 = a0 - a1; \ |
181 | 0 | int b5 = a2 - a3; \ |
182 | 0 | int b6 = a4 - a5; \ |
183 | 0 | int b7 = a6 - a7; |
184 | | |
185 | | static void idct_dequant_2x4_dc( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp ) |
186 | 0 | { |
187 | 0 | IDCT_DEQUANT_2X4_START |
188 | 0 | int dmf = dequant_mf[i_qp%6][0] << i_qp/6; |
189 | 0 | dct4x4[0][0] = ((b0 + b1) * dmf + 32) >> 6; |
190 | 0 | dct4x4[1][0] = ((b2 + b3) * dmf + 32) >> 6; |
191 | 0 | dct4x4[2][0] = ((b0 - b1) * dmf + 32) >> 6; |
192 | 0 | dct4x4[3][0] = ((b2 - b3) * dmf + 32) >> 6; |
193 | 0 | dct4x4[4][0] = ((b4 - b5) * dmf + 32) >> 6; |
194 | 0 | dct4x4[5][0] = ((b6 - b7) * dmf + 32) >> 6; |
195 | 0 | dct4x4[6][0] = ((b4 + b5) * dmf + 32) >> 6; |
196 | 0 | dct4x4[7][0] = ((b6 + b7) * dmf + 32) >> 6; |
197 | 0 | } |
198 | | |
199 | | static void idct_dequant_2x4_dconly( dctcoef dct[8], int dequant_mf[6][16], int i_qp ) |
200 | 0 | { |
201 | 0 | IDCT_DEQUANT_2X4_START |
202 | 0 | int dmf = dequant_mf[i_qp%6][0] << i_qp/6; |
203 | 0 | dct[0] = ((b0 + b1) * dmf + 32) >> 6; |
204 | 0 | dct[1] = ((b2 + b3) * dmf + 32) >> 6; |
205 | 0 | dct[2] = ((b0 - b1) * dmf + 32) >> 6; |
206 | 0 | dct[3] = ((b2 - b3) * dmf + 32) >> 6; |
207 | 0 | dct[4] = ((b4 - b5) * dmf + 32) >> 6; |
208 | 0 | dct[5] = ((b6 - b7) * dmf + 32) >> 6; |
209 | 0 | dct[6] = ((b4 + b5) * dmf + 32) >> 6; |
210 | 0 | dct[7] = ((b6 + b7) * dmf + 32) >> 6; |
211 | 0 | } |
212 | | |
213 | | static ALWAYS_INLINE void optimize_chroma_idct_dequant_2x4( dctcoef out[8], dctcoef dct[8], int dmf ) |
214 | 0 | { |
215 | 0 | IDCT_DEQUANT_2X4_START |
216 | 0 | out[0] = ((b0 + b1) * dmf + 2080) >> 6; /* 2080 = 32 + (32<<6) */ |
217 | 0 | out[1] = ((b2 + b3) * dmf + 2080) >> 6; |
218 | 0 | out[2] = ((b0 - b1) * dmf + 2080) >> 6; |
219 | 0 | out[3] = ((b2 - b3) * dmf + 2080) >> 6; |
220 | 0 | out[4] = ((b4 - b5) * dmf + 2080) >> 6; |
221 | 0 | out[5] = ((b6 - b7) * dmf + 2080) >> 6; |
222 | 0 | out[6] = ((b4 + b5) * dmf + 2080) >> 6; |
223 | 0 | out[7] = ((b6 + b7) * dmf + 2080) >> 6; |
224 | 0 | } |
225 | | #undef IDCT_DEQUANT_2X4_START |
226 | | |
227 | | static ALWAYS_INLINE void optimize_chroma_idct_dequant_2x2( dctcoef out[4], dctcoef dct[4], int dmf ) |
228 | 0 | { |
229 | 0 | int d0 = dct[0] + dct[1]; |
230 | 0 | int d1 = dct[2] + dct[3]; |
231 | 0 | int d2 = dct[0] - dct[1]; |
232 | 0 | int d3 = dct[2] - dct[3]; |
233 | 0 | out[0] = ((d0 + d1) * dmf >> 5) + 32; |
234 | 0 | out[1] = ((d0 - d1) * dmf >> 5) + 32; |
235 | 0 | out[2] = ((d2 + d3) * dmf >> 5) + 32; |
236 | 0 | out[3] = ((d2 - d3) * dmf >> 5) + 32; |
237 | 0 | } |
238 | | |
239 | | static ALWAYS_INLINE int optimize_chroma_round( dctcoef *ref, dctcoef *dct, int dequant_mf, int chroma422 ) |
240 | 0 | { |
241 | 0 | dctcoef out[8]; |
242 | |
|
243 | 0 | if( chroma422 ) |
244 | 0 | optimize_chroma_idct_dequant_2x4( out, dct, dequant_mf ); |
245 | 0 | else |
246 | 0 | optimize_chroma_idct_dequant_2x2( out, dct, dequant_mf ); |
247 | |
|
248 | 0 | int sum = 0; |
249 | 0 | for( int i = 0; i < (chroma422?8:4); i++ ) |
250 | 0 | sum |= ref[i] ^ out[i]; |
251 | 0 | return sum >> 6; |
252 | 0 | } |
253 | | |
254 | | static ALWAYS_INLINE int optimize_chroma_dc_internal( dctcoef *dct, int dequant_mf, int chroma422 ) |
255 | 0 | { |
256 | | /* dequant_mf = h->dequant4_mf[CQM_4IC + b_inter][i_qp%6][0] << i_qp/6, max 32*64 */ |
257 | 0 | dctcoef dct_orig[8]; |
258 | 0 | int coeff, nz; |
259 | |
|
260 | 0 | if( chroma422 ) |
261 | 0 | optimize_chroma_idct_dequant_2x4( dct_orig, dct, dequant_mf ); |
262 | 0 | else |
263 | 0 | optimize_chroma_idct_dequant_2x2( dct_orig, dct, dequant_mf ); |
264 | | |
265 | | /* If the DC coefficients already round to zero, terminate early. */ |
266 | 0 | int sum = 0; |
267 | 0 | for( int i = 0; i < (chroma422?8:4); i++ ) |
268 | 0 | sum |= dct_orig[i]; |
269 | 0 | if( !(sum >> 6) ) |
270 | 0 | return 0; |
271 | | |
272 | | /* Start with the highest frequency coefficient... is this the best option? */ |
273 | 0 | for( nz = 0, coeff = (chroma422?7:3); coeff >= 0; coeff-- ) |
274 | 0 | { |
275 | 0 | int level = dct[coeff]; |
276 | 0 | int sign = level>>31 | 1; /* dct[coeff] < 0 ? -1 : 1 */ |
277 | |
|
278 | 0 | while( level ) |
279 | 0 | { |
280 | 0 | dct[coeff] = level - sign; |
281 | 0 | if( optimize_chroma_round( dct_orig, dct, dequant_mf, chroma422 ) ) |
282 | 0 | { |
283 | 0 | nz = 1; |
284 | 0 | dct[coeff] = level; |
285 | 0 | break; |
286 | 0 | } |
287 | 0 | level -= sign; |
288 | 0 | } |
289 | 0 | } |
290 | |
|
291 | 0 | return nz; |
292 | 0 | } |
293 | | |
294 | | static int optimize_chroma_2x2_dc( dctcoef dct[4], int dequant_mf ) |
295 | 0 | { |
296 | 0 | return optimize_chroma_dc_internal( dct, dequant_mf, 0 ); |
297 | 0 | } |
298 | | |
299 | | static int optimize_chroma_2x4_dc( dctcoef dct[8], int dequant_mf ) |
300 | 0 | { |
301 | 0 | return optimize_chroma_dc_internal( dct, dequant_mf, 1 ); |
302 | 0 | } |
303 | | |
304 | | static void denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ) |
305 | 0 | { |
306 | 0 | for( int i = 0; i < size; i++ ) |
307 | 0 | { |
308 | 0 | int level = dct[i]; |
309 | 0 | int sign = level>>31; |
310 | 0 | level = (level+sign)^sign; |
311 | 0 | sum[i] += level; |
312 | 0 | level -= offset[i]; |
313 | 0 | dct[i] = level<0 ? 0 : (level^sign)-sign; |
314 | 0 | } |
315 | 0 | } |
316 | | |
317 | | /* (ref: JVT-B118) |
318 | | * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs |
319 | | * to 0 (low score means set it to null) |
320 | | * Used in inter macroblock (luma and chroma) |
321 | | * luma: for a 8x8 block: if score < 4 -> null |
322 | | * for the complete mb: if score < 6 -> null |
323 | | * chroma: for the complete mb: if score < 7 -> null |
324 | | */ |
325 | | |
326 | | static ALWAYS_INLINE int decimate_score_internal( dctcoef *dct, int i_max ) |
327 | 0 | { |
328 | 0 | const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4; |
329 | 0 | int i_score = 0; |
330 | 0 | int idx = i_max - 1; |
331 | |
|
332 | 0 | while( idx >= 0 && dct[idx] == 0 ) |
333 | 0 | idx--; |
334 | 0 | while( idx >= 0 ) |
335 | 0 | { |
336 | 0 | int i_run; |
337 | |
|
338 | 0 | if( (unsigned)(dct[idx--] + 1) > 2 ) |
339 | 0 | return 9; |
340 | | |
341 | 0 | i_run = 0; |
342 | 0 | while( idx >= 0 && dct[idx] == 0 ) |
343 | 0 | { |
344 | 0 | idx--; |
345 | 0 | i_run++; |
346 | 0 | } |
347 | 0 | i_score += ds_table[i_run]; |
348 | 0 | } |
349 | | |
350 | 0 | return i_score; |
351 | 0 | } |
352 | | |
353 | | static int decimate_score15( dctcoef *dct ) |
354 | 0 | { |
355 | 0 | return decimate_score_internal( dct+1, 15 ); |
356 | 0 | } |
357 | | static int decimate_score16( dctcoef *dct ) |
358 | 0 | { |
359 | 0 | return decimate_score_internal( dct, 16 ); |
360 | 0 | } |
361 | | static int decimate_score64( dctcoef *dct ) |
362 | 0 | { |
363 | 0 | return decimate_score_internal( dct, 64 ); |
364 | 0 | } |
365 | | |
366 | | #define last(num)\ |
367 | 0 | static int coeff_last##num( dctcoef *l )\ |
368 | 0 | {\ |
369 | 0 | int i_last = num-1;\ |
370 | 0 | while( i_last >= 0 && l[i_last] == 0 )\ |
371 | 0 | i_last--;\ |
372 | 0 | return i_last;\ |
373 | 0 | } Unexecuted instantiation: quant.c:coeff_last4 Unexecuted instantiation: quant.c:coeff_last8 Unexecuted instantiation: quant.c:coeff_last15 Unexecuted instantiation: quant.c:coeff_last16 Unexecuted instantiation: quant.c:coeff_last64 |
374 | | |
375 | | last(4) |
376 | | last(8) |
377 | | last(15) |
378 | | last(16) |
379 | | last(64) |
380 | | |
381 | | #define level_run(num)\ |
382 | 0 | static int coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\ |
383 | 0 | {\ |
384 | 0 | int i_last = runlevel->last = coeff_last##num(dct);\ |
385 | 0 | int i_total = 0;\ |
386 | 0 | int mask = 0;\ |
387 | 0 | do\ |
388 | 0 | {\ |
389 | 0 | runlevel->level[i_total++] = dct[i_last];\ |
390 | 0 | mask |= 1 << (i_last);\ |
391 | 0 | while( --i_last >= 0 && dct[i_last] == 0 );\ |
392 | 0 | } while( i_last >= 0 );\ |
393 | 0 | runlevel->mask = mask;\ |
394 | 0 | return i_total;\ |
395 | 0 | } Unexecuted instantiation: quant.c:coeff_level_run4 Unexecuted instantiation: quant.c:coeff_level_run8 Unexecuted instantiation: quant.c:coeff_level_run15 Unexecuted instantiation: quant.c:coeff_level_run16 |
396 | | |
397 | | level_run(4) |
398 | | level_run(8) |
399 | | level_run(15) |
400 | | level_run(16) |
401 | | |
402 | | #if ARCH_X86_64 |
403 | | #define INIT_TRELLIS(cpu)\ |
404 | | pf->trellis_cabac_4x4 = x264_trellis_cabac_4x4_##cpu;\ |
405 | | pf->trellis_cabac_8x8 = x264_trellis_cabac_8x8_##cpu;\ |
406 | | pf->trellis_cabac_4x4_psy = x264_trellis_cabac_4x4_psy_##cpu;\ |
407 | | pf->trellis_cabac_8x8_psy = x264_trellis_cabac_8x8_psy_##cpu;\ |
408 | | pf->trellis_cabac_dc = x264_trellis_cabac_dc_##cpu;\ |
409 | | pf->trellis_cabac_chroma_422_dc = x264_trellis_cabac_chroma_422_dc_##cpu; |
410 | | #else |
411 | | #define INIT_TRELLIS(...) |
412 | | #endif |
413 | | |
414 | | void x264_quant_init( x264_t *h, uint32_t cpu, x264_quant_function_t *pf ) |
415 | 0 | { |
416 | 0 | pf->quant_8x8 = quant_8x8; |
417 | 0 | pf->quant_4x4 = quant_4x4; |
418 | 0 | pf->quant_4x4x4 = quant_4x4x4; |
419 | 0 | pf->quant_4x4_dc = quant_4x4_dc; |
420 | 0 | pf->quant_2x2_dc = quant_2x2_dc; |
421 | |
|
422 | 0 | pf->dequant_4x4 = dequant_4x4; |
423 | 0 | pf->dequant_4x4_dc = dequant_4x4_dc; |
424 | 0 | pf->dequant_8x8 = dequant_8x8; |
425 | |
|
426 | 0 | pf->idct_dequant_2x4_dc = idct_dequant_2x4_dc; |
427 | 0 | pf->idct_dequant_2x4_dconly = idct_dequant_2x4_dconly; |
428 | |
|
429 | 0 | pf->optimize_chroma_2x2_dc = optimize_chroma_2x2_dc; |
430 | 0 | pf->optimize_chroma_2x4_dc = optimize_chroma_2x4_dc; |
431 | |
|
432 | 0 | pf->denoise_dct = denoise_dct; |
433 | 0 | pf->decimate_score15 = decimate_score15; |
434 | 0 | pf->decimate_score16 = decimate_score16; |
435 | 0 | pf->decimate_score64 = decimate_score64; |
436 | |
|
437 | 0 | pf->coeff_last4 = coeff_last4; |
438 | 0 | pf->coeff_last8 = coeff_last8; |
439 | 0 | pf->coeff_last[ DCT_LUMA_AC] = coeff_last15; |
440 | 0 | pf->coeff_last[ DCT_LUMA_4x4] = coeff_last16; |
441 | 0 | pf->coeff_last[ DCT_LUMA_8x8] = coeff_last64; |
442 | 0 | pf->coeff_level_run4 = coeff_level_run4; |
443 | 0 | pf->coeff_level_run8 = coeff_level_run8; |
444 | 0 | pf->coeff_level_run[ DCT_LUMA_AC] = coeff_level_run15; |
445 | 0 | pf->coeff_level_run[ DCT_LUMA_4x4] = coeff_level_run16; |
446 | |
|
447 | | #if HIGH_BIT_DEPTH |
448 | | #if HAVE_MMX |
449 | | INIT_TRELLIS( sse2 ); |
450 | | if( cpu&X264_CPU_MMX2 ) |
451 | | { |
452 | | #if ARCH_X86 |
453 | | pf->denoise_dct = x264_denoise_dct_mmx; |
454 | | pf->coeff_last8 = x264_coeff_last8_mmx2; |
455 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmx2; |
456 | | pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmx2; |
457 | | pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmx2; |
458 | | pf->coeff_level_run8 = x264_coeff_level_run8_mmx2; |
459 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_mmx2; |
460 | | pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_mmx2; |
461 | | #endif |
462 | | pf->coeff_last4 = x264_coeff_last4_mmx2; |
463 | | pf->coeff_level_run4 = x264_coeff_level_run4_mmx2; |
464 | | } |
465 | | if( cpu&X264_CPU_SSE2 ) |
466 | | { |
467 | | pf->quant_4x4 = x264_quant_4x4_sse2; |
468 | | pf->quant_4x4x4 = x264_quant_4x4x4_sse2; |
469 | | pf->quant_8x8 = x264_quant_8x8_sse2; |
470 | | pf->quant_2x2_dc = x264_quant_2x2_dc_sse2; |
471 | | pf->quant_4x4_dc = x264_quant_4x4_dc_sse2; |
472 | | pf->dequant_4x4 = x264_dequant_4x4_sse2; |
473 | | pf->dequant_8x8 = x264_dequant_8x8_sse2; |
474 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2; |
475 | | pf->idct_dequant_2x4_dc = x264_idct_dequant_2x4_dc_sse2; |
476 | | pf->idct_dequant_2x4_dconly = x264_idct_dequant_2x4_dconly_sse2; |
477 | | pf->denoise_dct = x264_denoise_dct_sse2; |
478 | | pf->decimate_score15 = x264_decimate_score15_sse2; |
479 | | pf->decimate_score16 = x264_decimate_score16_sse2; |
480 | | pf->decimate_score64 = x264_decimate_score64_sse2; |
481 | | pf->coeff_last8 = x264_coeff_last8_sse2; |
482 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2; |
483 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2; |
484 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2; |
485 | | pf->coeff_level_run8 = x264_coeff_level_run8_sse2; |
486 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2; |
487 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2; |
488 | | } |
489 | | if( cpu&X264_CPU_LZCNT ) |
490 | | { |
491 | | pf->coeff_last4 = x264_coeff_last4_lzcnt; |
492 | | pf->coeff_last8 = x264_coeff_last8_lzcnt; |
493 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lzcnt; |
494 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lzcnt; |
495 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lzcnt; |
496 | | pf->coeff_level_run4 = x264_coeff_level_run4_lzcnt; |
497 | | pf->coeff_level_run8 = x264_coeff_level_run8_lzcnt; |
498 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lzcnt; |
499 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lzcnt; |
500 | | } |
501 | | if( cpu&X264_CPU_SSSE3 ) |
502 | | { |
503 | | pf->quant_4x4 = x264_quant_4x4_ssse3; |
504 | | pf->quant_4x4x4 = x264_quant_4x4x4_ssse3; |
505 | | pf->quant_8x8 = x264_quant_8x8_ssse3; |
506 | | pf->quant_2x2_dc = x264_quant_2x2_dc_ssse3; |
507 | | pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3; |
508 | | pf->denoise_dct = x264_denoise_dct_ssse3; |
509 | | pf->decimate_score15 = x264_decimate_score15_ssse3; |
510 | | pf->decimate_score16 = x264_decimate_score16_ssse3; |
511 | | pf->decimate_score64 = x264_decimate_score64_ssse3; |
512 | | INIT_TRELLIS( ssse3 ); |
513 | | } |
514 | | if( cpu&X264_CPU_SSE4 ) |
515 | | { |
516 | | pf->quant_2x2_dc = x264_quant_2x2_dc_sse4; |
517 | | pf->quant_4x4_dc = x264_quant_4x4_dc_sse4; |
518 | | pf->quant_4x4 = x264_quant_4x4_sse4; |
519 | | pf->quant_4x4x4 = x264_quant_4x4x4_sse4; |
520 | | pf->quant_8x8 = x264_quant_8x8_sse4; |
521 | | } |
522 | | if( cpu&X264_CPU_AVX ) |
523 | | { |
524 | | pf->idct_dequant_2x4_dc = x264_idct_dequant_2x4_dc_avx; |
525 | | pf->idct_dequant_2x4_dconly = x264_idct_dequant_2x4_dconly_avx; |
526 | | pf->denoise_dct = x264_denoise_dct_avx; |
527 | | } |
528 | | if( cpu&X264_CPU_XOP ) |
529 | | { |
530 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_xop; |
531 | | if( h->param.i_cqm_preset != X264_CQM_FLAT ) |
532 | | { |
533 | | pf->dequant_4x4 = x264_dequant_4x4_xop; |
534 | | pf->dequant_8x8 = x264_dequant_8x8_xop; |
535 | | } |
536 | | } |
537 | | if( cpu&X264_CPU_AVX2 ) |
538 | | { |
539 | | pf->quant_4x4 = x264_quant_4x4_avx2; |
540 | | pf->quant_4x4_dc = x264_quant_4x4_dc_avx2; |
541 | | pf->quant_8x8 = x264_quant_8x8_avx2; |
542 | | pf->quant_4x4x4 = x264_quant_4x4x4_avx2; |
543 | | pf->dequant_4x4 = x264_dequant_4x4_avx2; |
544 | | pf->dequant_8x8 = x264_dequant_8x8_avx2; |
545 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_avx2; |
546 | | pf->denoise_dct = x264_denoise_dct_avx2; |
547 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2; |
548 | | } |
549 | | if( cpu&X264_CPU_AVX512 ) |
550 | | { |
551 | | pf->dequant_4x4 = x264_dequant_4x4_avx512; |
552 | | pf->dequant_8x8 = x264_dequant_8x8_avx512; |
553 | | pf->decimate_score15 = x264_decimate_score15_avx512; |
554 | | pf->decimate_score16 = x264_decimate_score16_avx512; |
555 | | pf->decimate_score64 = x264_decimate_score64_avx512; |
556 | | pf->coeff_last4 = x264_coeff_last4_avx512; |
557 | | pf->coeff_last8 = x264_coeff_last8_avx512; |
558 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_avx512; |
559 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_avx512; |
560 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx512; |
561 | | } |
562 | | #endif // HAVE_MMX |
563 | | #if HAVE_AARCH64 |
564 | | |
565 | | if( cpu&X264_CPU_NEON ) |
566 | | { |
567 | | pf->quant_2x2_dc = x264_quant_2x2_dc_neon; |
568 | | pf->quant_4x4_dc = x264_quant_4x4_dc_neon; |
569 | | pf->quant_4x4 = x264_quant_4x4_neon; |
570 | | pf->quant_4x4x4 = x264_quant_4x4x4_neon; |
571 | | pf->quant_8x8 = x264_quant_8x8_neon; |
572 | | |
573 | | pf->dequant_4x4 = x264_dequant_4x4_neon; |
574 | | pf->dequant_8x8 = x264_dequant_8x8_neon; |
575 | | pf->dequant_4x4_dc = x264_dequant_4x4_dc_neon; |
576 | | |
577 | | pf->decimate_score15 = x264_decimate_score15_neon; |
578 | | pf->decimate_score16 = x264_decimate_score16_neon; |
579 | | pf->decimate_score64 = x264_decimate_score64_neon; |
580 | | |
581 | | pf->coeff_last4 = x264_coeff_last4_neon; |
582 | | pf->coeff_last8 = x264_coeff_last8_neon; |
583 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_neon; |
584 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_neon; |
585 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon; |
586 | | pf->coeff_level_run4 = x264_coeff_level_run4_neon; |
587 | | pf->coeff_level_run8 = x264_coeff_level_run8_neon; |
588 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_neon; |
589 | | pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_neon; |
590 | | |
591 | | pf->denoise_dct = x264_denoise_dct_neon; |
592 | | } |
593 | | |
594 | | #endif // HAVE_AARCH64 |
595 | | #else // !HIGH_BIT_DEPTH |
596 | | #if HAVE_MMX |
597 | | INIT_TRELLIS( sse2 ); |
598 | | if( cpu&X264_CPU_MMX ) |
599 | | { |
600 | | #if ARCH_X86 |
601 | | pf->dequant_4x4 = x264_dequant_4x4_mmx; |
602 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2; |
603 | | pf->dequant_8x8 = x264_dequant_8x8_mmx; |
604 | | if( h->param.i_cqm_preset == X264_CQM_FLAT ) |
605 | | { |
606 | | pf->dequant_4x4 = x264_dequant_4x4_flat16_mmx; |
607 | | pf->dequant_8x8 = x264_dequant_8x8_flat16_mmx; |
608 | | } |
609 | | pf->denoise_dct = x264_denoise_dct_mmx; |
610 | | #endif |
611 | | } |
612 | | |
613 | | if( cpu&X264_CPU_MMX2 ) |
614 | | { |
615 | | pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2; |
616 | | #if ARCH_X86 |
617 | | pf->quant_4x4 = x264_quant_4x4_mmx2; |
618 | | pf->quant_8x8 = x264_quant_8x8_mmx2; |
619 | | pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2; |
620 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmx2; |
621 | | pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmx2; |
622 | | pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmx2; |
623 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_mmx2; |
624 | | pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_mmx2; |
625 | | #endif |
626 | | pf->coeff_last4 = x264_coeff_last4_mmx2; |
627 | | pf->coeff_last8 = x264_coeff_last8_mmx2; |
628 | | pf->coeff_level_run4 = x264_coeff_level_run4_mmx2; |
629 | | pf->coeff_level_run8 = x264_coeff_level_run8_mmx2; |
630 | | } |
631 | | |
632 | | if( cpu&X264_CPU_SSE2 ) |
633 | | { |
634 | | pf->quant_4x4_dc = x264_quant_4x4_dc_sse2; |
635 | | pf->quant_4x4 = x264_quant_4x4_sse2; |
636 | | pf->quant_4x4x4 = x264_quant_4x4x4_sse2; |
637 | | pf->quant_8x8 = x264_quant_8x8_sse2; |
638 | | pf->dequant_4x4 = x264_dequant_4x4_sse2; |
639 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2; |
640 | | pf->dequant_8x8 = x264_dequant_8x8_sse2; |
641 | | if( h->param.i_cqm_preset == X264_CQM_FLAT ) |
642 | | { |
643 | | pf->dequant_4x4 = x264_dequant_4x4_flat16_sse2; |
644 | | pf->dequant_8x8 = x264_dequant_8x8_flat16_sse2; |
645 | | } |
646 | | pf->idct_dequant_2x4_dc = x264_idct_dequant_2x4_dc_sse2; |
647 | | pf->idct_dequant_2x4_dconly = x264_idct_dequant_2x4_dconly_sse2; |
648 | | pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_sse2; |
649 | | pf->denoise_dct = x264_denoise_dct_sse2; |
650 | | pf->decimate_score15 = x264_decimate_score15_sse2; |
651 | | pf->decimate_score16 = x264_decimate_score16_sse2; |
652 | | pf->decimate_score64 = x264_decimate_score64_sse2; |
653 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2; |
654 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2; |
655 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2; |
656 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2; |
657 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2; |
658 | | } |
659 | | |
660 | | if( cpu&X264_CPU_LZCNT ) |
661 | | { |
662 | | pf->coeff_last4 = x264_coeff_last4_lzcnt; |
663 | | pf->coeff_last8 = x264_coeff_last8_lzcnt; |
664 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lzcnt; |
665 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lzcnt; |
666 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lzcnt; |
667 | | pf->coeff_level_run4 = x264_coeff_level_run4_lzcnt; |
668 | | pf->coeff_level_run8 = x264_coeff_level_run8_lzcnt; |
669 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lzcnt; |
670 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lzcnt; |
671 | | } |
672 | | |
673 | | if( cpu&X264_CPU_SSSE3 ) |
674 | | { |
675 | | pf->quant_2x2_dc = x264_quant_2x2_dc_ssse3; |
676 | | pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3; |
677 | | pf->quant_4x4 = x264_quant_4x4_ssse3; |
678 | | pf->quant_4x4x4 = x264_quant_4x4x4_ssse3; |
679 | | pf->quant_8x8 = x264_quant_8x8_ssse3; |
680 | | pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_ssse3; |
681 | | pf->denoise_dct = x264_denoise_dct_ssse3; |
682 | | pf->decimate_score15 = x264_decimate_score15_ssse3; |
683 | | pf->decimate_score16 = x264_decimate_score16_ssse3; |
684 | | pf->decimate_score64 = x264_decimate_score64_ssse3; |
685 | | INIT_TRELLIS( ssse3 ); |
686 | | pf->coeff_level_run4 = x264_coeff_level_run4_ssse3; |
687 | | pf->coeff_level_run8 = x264_coeff_level_run8_ssse3; |
688 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3; |
689 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3; |
690 | | if( cpu&X264_CPU_LZCNT ) |
691 | | { |
692 | | pf->coeff_level_run4 = x264_coeff_level_run4_ssse3_lzcnt; |
693 | | pf->coeff_level_run8 = x264_coeff_level_run8_ssse3_lzcnt; |
694 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_ssse3_lzcnt; |
695 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_ssse3_lzcnt; |
696 | | } |
697 | | } |
698 | | |
699 | | if( cpu&X264_CPU_SSE4 ) |
700 | | { |
701 | | pf->quant_4x4_dc = x264_quant_4x4_dc_sse4; |
702 | | pf->quant_4x4 = x264_quant_4x4_sse4; |
703 | | pf->quant_8x8 = x264_quant_8x8_sse4; |
704 | | pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_sse4; |
705 | | } |
706 | | |
707 | | if( cpu&X264_CPU_AVX ) |
708 | | { |
709 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_avx; |
710 | | if( h->param.i_cqm_preset != X264_CQM_FLAT ) |
711 | | { |
712 | | pf->dequant_4x4 = x264_dequant_4x4_avx; |
713 | | pf->dequant_8x8 = x264_dequant_8x8_avx; |
714 | | } |
715 | | pf->idct_dequant_2x4_dc = x264_idct_dequant_2x4_dc_avx; |
716 | | pf->idct_dequant_2x4_dconly = x264_idct_dequant_2x4_dconly_avx; |
717 | | pf->optimize_chroma_2x2_dc = x264_optimize_chroma_2x2_dc_avx; |
718 | | pf->denoise_dct = x264_denoise_dct_avx; |
719 | | } |
720 | | |
721 | | if( cpu&X264_CPU_XOP ) |
722 | | { |
723 | | if( h->param.i_cqm_preset != X264_CQM_FLAT ) |
724 | | { |
725 | | pf->dequant_4x4 = x264_dequant_4x4_xop; |
726 | | pf->dequant_8x8 = x264_dequant_8x8_xop; |
727 | | } |
728 | | } |
729 | | |
730 | | if( cpu&X264_CPU_AVX2 ) |
731 | | { |
732 | | pf->quant_4x4 = x264_quant_4x4_avx2; |
733 | | pf->quant_4x4_dc = x264_quant_4x4_dc_avx2; |
734 | | pf->quant_8x8 = x264_quant_8x8_avx2; |
735 | | pf->quant_4x4x4 = x264_quant_4x4x4_avx2; |
736 | | pf->dequant_4x4 = x264_dequant_4x4_avx2; |
737 | | pf->dequant_8x8 = x264_dequant_8x8_avx2; |
738 | | pf->dequant_4x4_dc = x264_dequant_4x4dc_avx2; |
739 | | if( h->param.i_cqm_preset == X264_CQM_FLAT ) |
740 | | { |
741 | | pf->dequant_4x4 = x264_dequant_4x4_flat16_avx2; |
742 | | pf->dequant_8x8 = x264_dequant_8x8_flat16_avx2; |
743 | | } |
744 | | pf->decimate_score64 = x264_decimate_score64_avx2; |
745 | | pf->denoise_dct = x264_denoise_dct_avx2; |
746 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx2; |
747 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_avx2; |
748 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_avx2; |
749 | | } |
750 | | if( cpu&X264_CPU_AVX512 ) |
751 | | { |
752 | | if( h->param.i_cqm_preset == X264_CQM_FLAT ) |
753 | | pf->dequant_8x8 = x264_dequant_8x8_flat16_avx512; |
754 | | else |
755 | | { |
756 | | pf->dequant_4x4 = x264_dequant_4x4_avx512; |
757 | | pf->dequant_8x8 = x264_dequant_8x8_avx512; |
758 | | } |
759 | | pf->decimate_score15 = x264_decimate_score15_avx512; |
760 | | pf->decimate_score16 = x264_decimate_score16_avx512; |
761 | | pf->decimate_score64 = x264_decimate_score64_avx512; |
762 | | pf->coeff_last8 = x264_coeff_last8_avx512; |
763 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_avx512; |
764 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_avx512; |
765 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_avx512; |
766 | | } |
767 | | #endif // HAVE_MMX |
768 | | |
769 | | #if HAVE_ALTIVEC |
770 | | if( cpu&X264_CPU_ALTIVEC ) |
771 | | { |
772 | | pf->quant_2x2_dc = x264_quant_2x2_dc_altivec; |
773 | | pf->quant_4x4_dc = x264_quant_4x4_dc_altivec; |
774 | | pf->quant_4x4 = x264_quant_4x4_altivec; |
775 | | pf->quant_4x4x4 = x264_quant_4x4x4_altivec; |
776 | | pf->quant_8x8 = x264_quant_8x8_altivec; |
777 | | |
778 | | pf->dequant_4x4 = x264_dequant_4x4_altivec; |
779 | | pf->dequant_8x8 = x264_dequant_8x8_altivec; |
780 | | } |
781 | | #endif |
782 | | |
783 | | #if HAVE_ARMV6 |
784 | | if( cpu&X264_CPU_ARMV6 ) |
785 | | { |
786 | | pf->coeff_last4 = x264_coeff_last4_arm; |
787 | | pf->coeff_last8 = x264_coeff_last8_arm; |
788 | | } |
789 | | #endif |
790 | | #if HAVE_ARMV6 || HAVE_AARCH64 |
791 | | if( cpu&X264_CPU_NEON ) |
792 | | { |
793 | | pf->quant_2x2_dc = x264_quant_2x2_dc_neon; |
794 | | pf->quant_4x4 = x264_quant_4x4_neon; |
795 | | pf->quant_4x4_dc = x264_quant_4x4_dc_neon; |
796 | | pf->quant_4x4x4 = x264_quant_4x4x4_neon; |
797 | | pf->quant_8x8 = x264_quant_8x8_neon; |
798 | | pf->dequant_4x4 = x264_dequant_4x4_neon; |
799 | | pf->dequant_4x4_dc = x264_dequant_4x4_dc_neon; |
800 | | pf->dequant_8x8 = x264_dequant_8x8_neon; |
801 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_neon; |
802 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_neon; |
803 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon; |
804 | | pf->denoise_dct = x264_denoise_dct_neon; |
805 | | pf->decimate_score15 = x264_decimate_score15_neon; |
806 | | pf->decimate_score16 = x264_decimate_score16_neon; |
807 | | pf->decimate_score64 = x264_decimate_score64_neon; |
808 | | } |
809 | | #endif |
810 | | #if HAVE_AARCH64 |
811 | | if( cpu&X264_CPU_ARMV8 ) |
812 | | { |
813 | | pf->coeff_last4 = x264_coeff_last4_aarch64; |
814 | | pf->coeff_last8 = x264_coeff_last8_aarch64; |
815 | | pf->coeff_level_run4 = x264_coeff_level_run4_aarch64; |
816 | | } |
817 | | if( cpu&X264_CPU_NEON ) |
818 | | { |
819 | | pf->coeff_level_run8 = x264_coeff_level_run8_neon; |
820 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_neon; |
821 | | pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_neon; |
822 | | } |
823 | | #endif |
824 | | |
825 | | #if HAVE_MSA |
826 | | if( cpu&X264_CPU_MSA ) |
827 | | { |
828 | | pf->quant_4x4 = x264_quant_4x4_msa; |
829 | | pf->quant_4x4_dc = x264_quant_4x4_dc_msa; |
830 | | pf->quant_4x4x4 = x264_quant_4x4x4_msa; |
831 | | pf->quant_8x8 = x264_quant_8x8_msa; |
832 | | pf->dequant_4x4 = x264_dequant_4x4_msa; |
833 | | pf->dequant_4x4_dc = x264_dequant_4x4_dc_msa; |
834 | | pf->dequant_8x8 = x264_dequant_8x8_msa; |
835 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_msa; |
836 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_msa; |
837 | | } |
838 | | #endif |
839 | | |
840 | | #if HAVE_LSX |
841 | | if( cpu&X264_CPU_LSX ) |
842 | | { |
843 | | pf->quant_4x4 = x264_quant_4x4_lsx; |
844 | | pf->quant_4x4x4 = x264_quant_4x4x4_lsx; |
845 | | pf->quant_8x8 = x264_quant_8x8_lsx; |
846 | | pf->quant_4x4_dc = x264_quant_4x4_dc_lsx; |
847 | | pf->quant_2x2_dc = x264_quant_2x2_dc_lsx; |
848 | | pf->dequant_4x4 = x264_dequant_4x4_lsx; |
849 | | pf->dequant_8x8 = x264_dequant_8x8_lsx; |
850 | | pf->dequant_4x4_dc = x264_dequant_4x4_dc_lsx; |
851 | | pf->decimate_score15 = x264_decimate_score15_lsx; |
852 | | pf->decimate_score16 = x264_decimate_score16_lsx; |
853 | | pf->decimate_score64 = x264_decimate_score64_lsx; |
854 | | pf->coeff_last4 = x264_coeff_last4_lsx; |
855 | | pf->coeff_last8 = x264_coeff_last8_lsx; |
856 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lsx; |
857 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lsx; |
858 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lsx; |
859 | | pf->coeff_level_run8 = x264_coeff_level_run8_lsx; |
860 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lsx; |
861 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lsx; |
862 | | } |
863 | | if( cpu&X264_CPU_LASX ) |
864 | | { |
865 | | pf->quant_4x4x4 = x264_quant_4x4x4_lasx; |
866 | | pf->dequant_4x4 = x264_dequant_4x4_lasx; |
867 | | pf->dequant_8x8 = x264_dequant_8x8_lasx; |
868 | | pf->dequant_4x4_dc = x264_dequant_4x4_dc_lasx; |
869 | | pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_lasx; |
870 | | pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_lasx; |
871 | | pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_lasx; |
872 | | pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_lasx; |
873 | | pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_lasx; |
874 | | } |
875 | | #endif |
876 | | |
877 | | #endif // HIGH_BIT_DEPTH |
878 | 0 | pf->coeff_last[DCT_LUMA_DC] = pf->coeff_last[DCT_CHROMAU_DC] = pf->coeff_last[DCT_CHROMAV_DC] = |
879 | 0 | pf->coeff_last[DCT_CHROMAU_4x4] = pf->coeff_last[DCT_CHROMAV_4x4] = pf->coeff_last[DCT_LUMA_4x4]; |
880 | 0 | pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[DCT_CHROMAU_AC] = |
881 | 0 | pf->coeff_last[DCT_CHROMAV_AC] = pf->coeff_last[DCT_LUMA_AC]; |
882 | 0 | pf->coeff_last[DCT_CHROMAU_8x8] = pf->coeff_last[DCT_CHROMAV_8x8] = pf->coeff_last[DCT_LUMA_8x8]; |
883 | |
|
884 | 0 | pf->coeff_level_run[DCT_LUMA_DC] = pf->coeff_level_run[DCT_CHROMAU_DC] = pf->coeff_level_run[DCT_CHROMAV_DC] = |
885 | 0 | pf->coeff_level_run[DCT_CHROMAU_4x4] = pf->coeff_level_run[DCT_CHROMAV_4x4] = pf->coeff_level_run[DCT_LUMA_4x4]; |
886 | 0 | pf->coeff_level_run[DCT_CHROMA_AC] = pf->coeff_level_run[DCT_CHROMAU_AC] = |
887 | 0 | pf->coeff_level_run[DCT_CHROMAV_AC] = pf->coeff_level_run[DCT_LUMA_AC]; |
888 | 0 | } Unexecuted instantiation: x264_8_quant_init Unexecuted instantiation: x264_10_quant_init |