/src/x264/common/predict.c
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * predict.c: intra prediction |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2003-2025 x264 project |
5 | | * |
6 | | * Authors: Laurent Aimar <fenrir@via.ecp.fr> |
7 | | * Loren Merritt <lorenm@u.washington.edu> |
8 | | * Fiona Glaser <fiona@x264.com> |
9 | | * Henrik Gramner <henrik@gramner.com> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify |
12 | | * it under the terms of the GNU General Public License as published by |
13 | | * the Free Software Foundation; either version 2 of the License, or |
14 | | * (at your option) any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU General Public License |
22 | | * along with this program; if not, write to the Free Software |
23 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
24 | | * |
25 | | * This program is also available under a commercial proprietary license. |
26 | | * For more information, contact us at licensing@x264.com. |
27 | | *****************************************************************************/ |
28 | | |
29 | | /* predict4x4 are inspired from ffmpeg h264 decoder */ |
30 | | |
31 | | |
32 | | #include "common.h" |
33 | | |
34 | | #if HAVE_MMX |
35 | | # include "x86/predict.h" |
36 | | #endif |
37 | | #if HAVE_ALTIVEC |
38 | | # include "ppc/predict.h" |
39 | | #endif |
40 | | #if HAVE_ARMV6 |
41 | | # include "arm/predict.h" |
42 | | #endif |
43 | | #if HAVE_AARCH64 |
44 | | # include "aarch64/predict.h" |
45 | | #endif |
46 | | #if HAVE_MSA |
47 | | # include "mips/predict.h" |
48 | | #endif |
49 | | #if HAVE_LSX |
50 | | # include "loongarch/predict.h" |
51 | | #endif |
52 | | |
53 | | /**************************************************************************** |
54 | | * 16x16 prediction for intra luma block |
55 | | ****************************************************************************/ |
56 | | |
57 | | #define PREDICT_16x16_DC(v)\ |
58 | 0 | for( int i = 0; i < 16; i++ )\ |
59 | 0 | {\ |
60 | 0 | MPIXEL_X4( src+ 0 ) = v;\ |
61 | 0 | MPIXEL_X4( src+ 4 ) = v;\ |
62 | 0 | MPIXEL_X4( src+ 8 ) = v;\ |
63 | 0 | MPIXEL_X4( src+12 ) = v;\ |
64 | 0 | src += FDEC_STRIDE;\ |
65 | 0 | } |
66 | | |
67 | | void x264_predict_16x16_dc_c( pixel *src ) |
68 | 0 | { |
69 | 0 | int dc = 0; |
70 | |
|
71 | 0 | for( int i = 0; i < 16; i++ ) |
72 | 0 | { |
73 | 0 | dc += src[-1 + i * FDEC_STRIDE]; |
74 | 0 | dc += src[i - FDEC_STRIDE]; |
75 | 0 | } |
76 | 0 | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 ); |
77 | |
|
78 | 0 | PREDICT_16x16_DC( dcsplat ); |
79 | 0 | } Unexecuted instantiation: x264_8_predict_16x16_dc_c Unexecuted instantiation: x264_10_predict_16x16_dc_c |
80 | | static void predict_16x16_dc_left_c( pixel *src ) |
81 | 0 | { |
82 | 0 | int dc = 0; |
83 | |
|
84 | 0 | for( int i = 0; i < 16; i++ ) |
85 | 0 | dc += src[-1 + i * FDEC_STRIDE]; |
86 | 0 | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 ); |
87 | |
|
88 | 0 | PREDICT_16x16_DC( dcsplat ); |
89 | 0 | } |
90 | | static void predict_16x16_dc_top_c( pixel *src ) |
91 | 0 | { |
92 | 0 | int dc = 0; |
93 | |
|
94 | 0 | for( int i = 0; i < 16; i++ ) |
95 | 0 | dc += src[i - FDEC_STRIDE]; |
96 | 0 | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 ); |
97 | |
|
98 | 0 | PREDICT_16x16_DC( dcsplat ); |
99 | 0 | } |
100 | | static void predict_16x16_dc_128_c( pixel *src ) |
101 | 0 | { |
102 | 0 | PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
103 | 0 | } |
104 | | void x264_predict_16x16_h_c( pixel *src ) |
105 | 0 | { |
106 | 0 | for( int i = 0; i < 16; i++ ) |
107 | 0 | { |
108 | 0 | const pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
109 | 0 | MPIXEL_X4( src+ 0 ) = v; |
110 | 0 | MPIXEL_X4( src+ 4 ) = v; |
111 | 0 | MPIXEL_X4( src+ 8 ) = v; |
112 | 0 | MPIXEL_X4( src+12 ) = v; |
113 | 0 | src += FDEC_STRIDE; |
114 | 0 | } |
115 | 0 | } Unexecuted instantiation: x264_8_predict_16x16_h_c Unexecuted instantiation: x264_10_predict_16x16_h_c |
116 | | void x264_predict_16x16_v_c( pixel *src ) |
117 | 0 | { |
118 | 0 | pixel4 v0 = MPIXEL_X4( &src[ 0-FDEC_STRIDE] ); |
119 | 0 | pixel4 v1 = MPIXEL_X4( &src[ 4-FDEC_STRIDE] ); |
120 | 0 | pixel4 v2 = MPIXEL_X4( &src[ 8-FDEC_STRIDE] ); |
121 | 0 | pixel4 v3 = MPIXEL_X4( &src[12-FDEC_STRIDE] ); |
122 | |
|
123 | 0 | for( int i = 0; i < 16; i++ ) |
124 | 0 | { |
125 | 0 | MPIXEL_X4( src+ 0 ) = v0; |
126 | 0 | MPIXEL_X4( src+ 4 ) = v1; |
127 | 0 | MPIXEL_X4( src+ 8 ) = v2; |
128 | 0 | MPIXEL_X4( src+12 ) = v3; |
129 | 0 | src += FDEC_STRIDE; |
130 | 0 | } |
131 | 0 | } Unexecuted instantiation: x264_8_predict_16x16_v_c Unexecuted instantiation: x264_10_predict_16x16_v_c |
132 | | void x264_predict_16x16_p_c( pixel *src ) |
133 | 0 | { |
134 | 0 | int H = 0, V = 0; |
135 | | |
136 | | /* calculate H and V */ |
137 | 0 | for( int i = 0; i <= 7; i++ ) |
138 | 0 | { |
139 | 0 | H += ( i + 1 ) * ( src[ 8 + i - FDEC_STRIDE ] - src[6 -i -FDEC_STRIDE] ); |
140 | 0 | V += ( i + 1 ) * ( src[-1 + (8+i)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] ); |
141 | 0 | } |
142 | |
|
143 | 0 | int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[15 - FDEC_STRIDE] ); |
144 | 0 | int b = ( 5 * H + 32 ) >> 6; |
145 | 0 | int c = ( 5 * V + 32 ) >> 6; |
146 | |
|
147 | 0 | int i00 = a - b * 7 - c * 7 + 16; |
148 | |
|
149 | 0 | for( int y = 0; y < 16; y++ ) |
150 | 0 | { |
151 | 0 | int pix = i00; |
152 | 0 | for( int x = 0; x < 16; x++ ) |
153 | 0 | { |
154 | 0 | src[x] = x264_clip_pixel( pix>>5 ); |
155 | 0 | pix += b; |
156 | 0 | } |
157 | 0 | src += FDEC_STRIDE; |
158 | 0 | i00 += c; |
159 | 0 | } |
160 | 0 | } Unexecuted instantiation: x264_8_predict_16x16_p_c Unexecuted instantiation: x264_10_predict_16x16_p_c |
161 | | |
162 | | |
163 | | /**************************************************************************** |
164 | | * 8x8 prediction for intra chroma block (4:2:0) |
165 | | ****************************************************************************/ |
166 | | |
167 | | static void predict_8x8c_dc_128_c( pixel *src ) |
168 | 0 | { |
169 | 0 | for( int y = 0; y < 8; y++ ) |
170 | 0 | { |
171 | 0 | MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
172 | 0 | MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
173 | 0 | src += FDEC_STRIDE; |
174 | 0 | } |
175 | 0 | } |
176 | | static void predict_8x8c_dc_left_c( pixel *src ) |
177 | 0 | { |
178 | 0 | int dc0 = 0, dc1 = 0; |
179 | |
|
180 | 0 | for( int y = 0; y < 4; y++ ) |
181 | 0 | { |
182 | 0 | dc0 += src[y * FDEC_STRIDE - 1]; |
183 | 0 | dc1 += src[(y+4) * FDEC_STRIDE - 1]; |
184 | 0 | } |
185 | 0 | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
186 | 0 | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
187 | |
|
188 | 0 | for( int y = 0; y < 4; y++ ) |
189 | 0 | { |
190 | 0 | MPIXEL_X4( src+0 ) = dc0splat; |
191 | 0 | MPIXEL_X4( src+4 ) = dc0splat; |
192 | 0 | src += FDEC_STRIDE; |
193 | 0 | } |
194 | 0 | for( int y = 0; y < 4; y++ ) |
195 | 0 | { |
196 | 0 | MPIXEL_X4( src+0 ) = dc1splat; |
197 | 0 | MPIXEL_X4( src+4 ) = dc1splat; |
198 | 0 | src += FDEC_STRIDE; |
199 | 0 | } |
200 | |
|
201 | 0 | } |
202 | | static void predict_8x8c_dc_top_c( pixel *src ) |
203 | 0 | { |
204 | 0 | int dc0 = 0, dc1 = 0; |
205 | |
|
206 | 0 | for( int x = 0; x < 4; x++ ) |
207 | 0 | { |
208 | 0 | dc0 += src[x - FDEC_STRIDE]; |
209 | 0 | dc1 += src[x + 4 - FDEC_STRIDE]; |
210 | 0 | } |
211 | 0 | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
212 | 0 | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
213 | |
|
214 | 0 | for( int y = 0; y < 8; y++ ) |
215 | 0 | { |
216 | 0 | MPIXEL_X4( src+0 ) = dc0splat; |
217 | 0 | MPIXEL_X4( src+4 ) = dc1splat; |
218 | 0 | src += FDEC_STRIDE; |
219 | 0 | } |
220 | 0 | } |
221 | | void x264_predict_8x8c_dc_c( pixel *src ) |
222 | 0 | { |
223 | 0 | int s0 = 0, s1 = 0, s2 = 0, s3 = 0; |
224 | | |
225 | | /* |
226 | | s0 s1 |
227 | | s2 |
228 | | s3 |
229 | | */ |
230 | 0 | for( int i = 0; i < 4; i++ ) |
231 | 0 | { |
232 | 0 | s0 += src[i - FDEC_STRIDE]; |
233 | 0 | s1 += src[i + 4 - FDEC_STRIDE]; |
234 | 0 | s2 += src[-1 + i * FDEC_STRIDE]; |
235 | 0 | s3 += src[-1 + (i+4)*FDEC_STRIDE]; |
236 | 0 | } |
237 | | /* |
238 | | dc0 dc1 |
239 | | dc2 dc3 |
240 | | */ |
241 | 0 | pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 ); |
242 | 0 | pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 ); |
243 | 0 | pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 ); |
244 | 0 | pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 ); |
245 | |
|
246 | 0 | for( int y = 0; y < 4; y++ ) |
247 | 0 | { |
248 | 0 | MPIXEL_X4( src+0 ) = dc0; |
249 | 0 | MPIXEL_X4( src+4 ) = dc1; |
250 | 0 | src += FDEC_STRIDE; |
251 | 0 | } |
252 | |
|
253 | 0 | for( int y = 0; y < 4; y++ ) |
254 | 0 | { |
255 | 0 | MPIXEL_X4( src+0 ) = dc2; |
256 | 0 | MPIXEL_X4( src+4 ) = dc3; |
257 | 0 | src += FDEC_STRIDE; |
258 | 0 | } |
259 | 0 | } Unexecuted instantiation: x264_8_predict_8x8c_dc_c Unexecuted instantiation: x264_10_predict_8x8c_dc_c |
260 | | void x264_predict_8x8c_h_c( pixel *src ) |
261 | 0 | { |
262 | 0 | for( int i = 0; i < 8; i++ ) |
263 | 0 | { |
264 | 0 | pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
265 | 0 | MPIXEL_X4( src+0 ) = v; |
266 | 0 | MPIXEL_X4( src+4 ) = v; |
267 | 0 | src += FDEC_STRIDE; |
268 | 0 | } |
269 | 0 | } Unexecuted instantiation: x264_8_predict_8x8c_h_c Unexecuted instantiation: x264_10_predict_8x8c_h_c |
270 | | void x264_predict_8x8c_v_c( pixel *src ) |
271 | 0 | { |
272 | 0 | pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE ); |
273 | 0 | pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE ); |
274 | |
|
275 | 0 | for( int i = 0; i < 8; i++ ) |
276 | 0 | { |
277 | 0 | MPIXEL_X4( src+0 ) = v0; |
278 | 0 | MPIXEL_X4( src+4 ) = v1; |
279 | 0 | src += FDEC_STRIDE; |
280 | 0 | } |
281 | 0 | } Unexecuted instantiation: x264_8_predict_8x8c_v_c Unexecuted instantiation: x264_10_predict_8x8c_v_c |
282 | | void x264_predict_8x8c_p_c( pixel *src ) |
283 | 0 | { |
284 | 0 | int H = 0, V = 0; |
285 | |
|
286 | 0 | for( int i = 0; i < 4; i++ ) |
287 | 0 | { |
288 | 0 | H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] ); |
289 | 0 | V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] ); |
290 | 0 | } |
291 | |
|
292 | 0 | int a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] ); |
293 | 0 | int b = ( 17 * H + 16 ) >> 5; |
294 | 0 | int c = ( 17 * V + 16 ) >> 5; |
295 | 0 | int i00 = a -3*b -3*c + 16; |
296 | |
|
297 | 0 | for( int y = 0; y < 8; y++ ) |
298 | 0 | { |
299 | 0 | int pix = i00; |
300 | 0 | for( int x = 0; x < 8; x++ ) |
301 | 0 | { |
302 | 0 | src[x] = x264_clip_pixel( pix>>5 ); |
303 | 0 | pix += b; |
304 | 0 | } |
305 | 0 | src += FDEC_STRIDE; |
306 | 0 | i00 += c; |
307 | 0 | } |
308 | 0 | } Unexecuted instantiation: x264_8_predict_8x8c_p_c Unexecuted instantiation: x264_10_predict_8x8c_p_c |
309 | | |
310 | | /**************************************************************************** |
311 | | * 8x16 prediction for intra chroma block (4:2:2) |
312 | | ****************************************************************************/ |
313 | | |
314 | | static void predict_8x16c_dc_128_c( pixel *src ) |
315 | 0 | { |
316 | 0 | for( int y = 0; y < 16; y++ ) |
317 | 0 | { |
318 | 0 | MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
319 | 0 | MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
320 | 0 | src += FDEC_STRIDE; |
321 | 0 | } |
322 | 0 | } |
323 | | static void predict_8x16c_dc_left_c( pixel *src ) |
324 | 0 | { |
325 | 0 | for( int i = 0; i < 4; i++ ) |
326 | 0 | { |
327 | 0 | int dc = 0; |
328 | |
|
329 | 0 | for( int y = 0; y < 4; y++ ) |
330 | 0 | dc += src[y*FDEC_STRIDE - 1]; |
331 | |
|
332 | 0 | pixel4 dcsplat = PIXEL_SPLAT_X4( (dc + 2) >> 2 ); |
333 | |
|
334 | 0 | for( int y = 0; y < 4; y++ ) |
335 | 0 | { |
336 | 0 | MPIXEL_X4( src+0 ) = dcsplat; |
337 | 0 | MPIXEL_X4( src+4 ) = dcsplat; |
338 | 0 | src += FDEC_STRIDE; |
339 | 0 | } |
340 | 0 | } |
341 | 0 | } |
342 | | static void predict_8x16c_dc_top_c( pixel *src ) |
343 | 0 | { |
344 | 0 | int dc0 = 0, dc1 = 0; |
345 | |
|
346 | 0 | for( int x = 0; x < 4; x++ ) |
347 | 0 | { |
348 | 0 | dc0 += src[x - FDEC_STRIDE]; |
349 | 0 | dc1 += src[x + 4 - FDEC_STRIDE]; |
350 | 0 | } |
351 | 0 | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
352 | 0 | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
353 | |
|
354 | 0 | for( int y = 0; y < 16; y++ ) |
355 | 0 | { |
356 | 0 | MPIXEL_X4( src+0 ) = dc0splat; |
357 | 0 | MPIXEL_X4( src+4 ) = dc1splat; |
358 | 0 | src += FDEC_STRIDE; |
359 | 0 | } |
360 | 0 | } |
361 | | void x264_predict_8x16c_dc_c( pixel *src ) |
362 | 0 | { |
363 | 0 | int s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, s5 = 0; |
364 | | |
365 | | /* |
366 | | s0 s1 |
367 | | s2 |
368 | | s3 |
369 | | s4 |
370 | | s5 |
371 | | */ |
372 | 0 | for( int i = 0; i < 4; i++ ) |
373 | 0 | { |
374 | 0 | s0 += src[i+0 - FDEC_STRIDE]; |
375 | 0 | s1 += src[i+4 - FDEC_STRIDE]; |
376 | 0 | s2 += src[-1 + (i+0) * FDEC_STRIDE]; |
377 | 0 | s3 += src[-1 + (i+4) * FDEC_STRIDE]; |
378 | 0 | s4 += src[-1 + (i+8) * FDEC_STRIDE]; |
379 | 0 | s5 += src[-1 + (i+12) * FDEC_STRIDE]; |
380 | 0 | } |
381 | | /* |
382 | | dc0 dc1 |
383 | | dc2 dc3 |
384 | | dc4 dc5 |
385 | | dc6 dc7 |
386 | | */ |
387 | 0 | pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 ); |
388 | 0 | pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 ); |
389 | 0 | pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 ); |
390 | 0 | pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 ); |
391 | 0 | pixel4 dc4 = PIXEL_SPLAT_X4( ( s4 + 2 ) >> 2 ); |
392 | 0 | pixel4 dc5 = PIXEL_SPLAT_X4( ( s1 + s4 + 4 ) >> 3 ); |
393 | 0 | pixel4 dc6 = PIXEL_SPLAT_X4( ( s5 + 2 ) >> 2 ); |
394 | 0 | pixel4 dc7 = PIXEL_SPLAT_X4( ( s1 + s5 + 4 ) >> 3 ); |
395 | |
|
396 | 0 | for( int y = 0; y < 4; y++ ) |
397 | 0 | { |
398 | 0 | MPIXEL_X4( src+0 ) = dc0; |
399 | 0 | MPIXEL_X4( src+4 ) = dc1; |
400 | 0 | src += FDEC_STRIDE; |
401 | 0 | } |
402 | 0 | for( int y = 0; y < 4; y++ ) |
403 | 0 | { |
404 | 0 | MPIXEL_X4( src+0 ) = dc2; |
405 | 0 | MPIXEL_X4( src+4 ) = dc3; |
406 | 0 | src += FDEC_STRIDE; |
407 | 0 | } |
408 | 0 | for( int y = 0; y < 4; y++ ) |
409 | 0 | { |
410 | 0 | MPIXEL_X4( src+0 ) = dc4; |
411 | 0 | MPIXEL_X4( src+4 ) = dc5; |
412 | 0 | src += FDEC_STRIDE; |
413 | 0 | } |
414 | 0 | for( int y = 0; y < 4; y++ ) |
415 | 0 | { |
416 | 0 | MPIXEL_X4( src+0 ) = dc6; |
417 | 0 | MPIXEL_X4( src+4 ) = dc7; |
418 | 0 | src += FDEC_STRIDE; |
419 | 0 | } |
420 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_dc_c Unexecuted instantiation: x264_10_predict_8x16c_dc_c |
421 | | void x264_predict_8x16c_h_c( pixel *src ) |
422 | 0 | { |
423 | 0 | for( int i = 0; i < 16; i++ ) |
424 | 0 | { |
425 | 0 | pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
426 | 0 | MPIXEL_X4( src+0 ) = v; |
427 | 0 | MPIXEL_X4( src+4 ) = v; |
428 | 0 | src += FDEC_STRIDE; |
429 | 0 | } |
430 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_h_c Unexecuted instantiation: x264_10_predict_8x16c_h_c |
431 | | void x264_predict_8x16c_v_c( pixel *src ) |
432 | 0 | { |
433 | 0 | pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE ); |
434 | 0 | pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE ); |
435 | |
|
436 | 0 | for( int i = 0; i < 16; i++ ) |
437 | 0 | { |
438 | 0 | MPIXEL_X4( src+0 ) = v0; |
439 | 0 | MPIXEL_X4( src+4 ) = v1; |
440 | 0 | src += FDEC_STRIDE; |
441 | 0 | } |
442 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_v_c Unexecuted instantiation: x264_10_predict_8x16c_v_c |
443 | | void x264_predict_8x16c_p_c( pixel *src ) |
444 | 0 | { |
445 | 0 | int H = 0; |
446 | 0 | int V = 0; |
447 | |
|
448 | 0 | for( int i = 0; i < 4; i++ ) |
449 | 0 | H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] ); |
450 | 0 | for( int i = 0; i < 8; i++ ) |
451 | 0 | V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] ); |
452 | |
|
453 | 0 | int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] ); |
454 | 0 | int b = ( 17 * H + 16 ) >> 5; |
455 | 0 | int c = ( 5 * V + 32 ) >> 6; |
456 | 0 | int i00 = a -3*b -7*c + 16; |
457 | |
|
458 | 0 | for( int y = 0; y < 16; y++ ) |
459 | 0 | { |
460 | 0 | int pix = i00; |
461 | 0 | for( int x = 0; x < 8; x++ ) |
462 | 0 | { |
463 | 0 | src[x] = x264_clip_pixel( pix>>5 ); |
464 | 0 | pix += b; |
465 | 0 | } |
466 | 0 | src += FDEC_STRIDE; |
467 | 0 | i00 += c; |
468 | 0 | } |
469 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_p_c Unexecuted instantiation: x264_10_predict_8x16c_p_c |
470 | | |
471 | | /**************************************************************************** |
472 | | * 4x4 prediction for intra luma block |
473 | | ****************************************************************************/ |
474 | | |
475 | 0 | #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE] |
476 | 0 | #define SRC_X4(x,y) MPIXEL_X4( &SRC(x,y) ) |
477 | | |
478 | | #define PREDICT_4x4_DC(v)\ |
479 | 0 | SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v; |
480 | | |
481 | | static void predict_4x4_dc_128_c( pixel *src ) |
482 | 0 | { |
483 | 0 | PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
484 | 0 | } |
485 | | static void predict_4x4_dc_left_c( pixel *src ) |
486 | 0 | { |
487 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 ); |
488 | 0 | PREDICT_4x4_DC( dc ); |
489 | 0 | } |
490 | | static void predict_4x4_dc_top_c( pixel *src ) |
491 | 0 | { |
492 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 ); |
493 | 0 | PREDICT_4x4_DC( dc ); |
494 | 0 | } |
495 | | void x264_predict_4x4_dc_c( pixel *src ) |
496 | 0 | { |
497 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + |
498 | 0 | SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3 ); |
499 | 0 | PREDICT_4x4_DC( dc ); |
500 | 0 | } Unexecuted instantiation: x264_8_predict_4x4_dc_c Unexecuted instantiation: x264_10_predict_4x4_dc_c |
501 | | void x264_predict_4x4_h_c( pixel *src ) |
502 | 0 | { |
503 | 0 | SRC_X4(0,0) = PIXEL_SPLAT_X4( SRC(-1,0) ); |
504 | 0 | SRC_X4(0,1) = PIXEL_SPLAT_X4( SRC(-1,1) ); |
505 | 0 | SRC_X4(0,2) = PIXEL_SPLAT_X4( SRC(-1,2) ); |
506 | 0 | SRC_X4(0,3) = PIXEL_SPLAT_X4( SRC(-1,3) ); |
507 | 0 | } Unexecuted instantiation: x264_8_predict_4x4_h_c Unexecuted instantiation: x264_10_predict_4x4_h_c |
508 | | void x264_predict_4x4_v_c( pixel *src ) |
509 | 0 | { |
510 | 0 | PREDICT_4x4_DC(SRC_X4(0,-1)); |
511 | 0 | } Unexecuted instantiation: x264_8_predict_4x4_v_c Unexecuted instantiation: x264_10_predict_4x4_v_c |
512 | | |
513 | | #define PREDICT_4x4_LOAD_LEFT\ |
514 | 0 | int l0 = SRC(-1,0);\ |
515 | 0 | int l1 = SRC(-1,1);\ |
516 | 0 | int l2 = SRC(-1,2);\ |
517 | 0 | UNUSED int l3 = SRC(-1,3); |
518 | | |
519 | | #define PREDICT_4x4_LOAD_TOP\ |
520 | 0 | int t0 = SRC(0,-1);\ |
521 | 0 | int t1 = SRC(1,-1);\ |
522 | 0 | int t2 = SRC(2,-1);\ |
523 | 0 | UNUSED int t3 = SRC(3,-1); |
524 | | |
525 | | #define PREDICT_4x4_LOAD_TOP_RIGHT\ |
526 | 0 | int t4 = SRC(4,-1);\ |
527 | 0 | int t5 = SRC(5,-1);\ |
528 | 0 | int t6 = SRC(6,-1);\ |
529 | 0 | UNUSED int t7 = SRC(7,-1); |
530 | | |
531 | 0 | #define F1(a,b) (((a)+(b)+1)>>1) |
532 | 0 | #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2) |
533 | | |
534 | | static void predict_4x4_ddl_c( pixel *src ) |
535 | 0 | { |
536 | 0 | PREDICT_4x4_LOAD_TOP |
537 | 0 | PREDICT_4x4_LOAD_TOP_RIGHT |
538 | 0 | SRC(0,0)= F2(t0,t1,t2); |
539 | 0 | SRC(1,0)=SRC(0,1)= F2(t1,t2,t3); |
540 | 0 | SRC(2,0)=SRC(1,1)=SRC(0,2)= F2(t2,t3,t4); |
541 | 0 | SRC(3,0)=SRC(2,1)=SRC(1,2)=SRC(0,3)= F2(t3,t4,t5); |
542 | 0 | SRC(3,1)=SRC(2,2)=SRC(1,3)= F2(t4,t5,t6); |
543 | 0 | SRC(3,2)=SRC(2,3)= F2(t5,t6,t7); |
544 | 0 | SRC(3,3)= F2(t6,t7,t7); |
545 | 0 | } |
546 | | static void predict_4x4_ddr_c( pixel *src ) |
547 | 0 | { |
548 | 0 | int lt = SRC(-1,-1); |
549 | 0 | PREDICT_4x4_LOAD_LEFT |
550 | 0 | PREDICT_4x4_LOAD_TOP |
551 | 0 | SRC(3,0)= F2(t3,t2,t1); |
552 | 0 | SRC(2,0)=SRC(3,1)= F2(t2,t1,t0); |
553 | 0 | SRC(1,0)=SRC(2,1)=SRC(3,2)= F2(t1,t0,lt); |
554 | 0 | SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)= F2(t0,lt,l0); |
555 | 0 | SRC(0,1)=SRC(1,2)=SRC(2,3)= F2(lt,l0,l1); |
556 | 0 | SRC(0,2)=SRC(1,3)= F2(l0,l1,l2); |
557 | 0 | SRC(0,3)= F2(l1,l2,l3); |
558 | 0 | } |
559 | | |
560 | | static void predict_4x4_vr_c( pixel *src ) |
561 | 0 | { |
562 | 0 | int lt = SRC(-1,-1); |
563 | 0 | PREDICT_4x4_LOAD_LEFT |
564 | 0 | PREDICT_4x4_LOAD_TOP |
565 | 0 | SRC(0,3)= F2(l2,l1,l0); |
566 | 0 | SRC(0,2)= F2(l1,l0,lt); |
567 | 0 | SRC(0,1)=SRC(1,3)= F2(l0,lt,t0); |
568 | 0 | SRC(0,0)=SRC(1,2)= F1(lt,t0); |
569 | 0 | SRC(1,1)=SRC(2,3)= F2(lt,t0,t1); |
570 | 0 | SRC(1,0)=SRC(2,2)= F1(t0,t1); |
571 | 0 | SRC(2,1)=SRC(3,3)= F2(t0,t1,t2); |
572 | 0 | SRC(2,0)=SRC(3,2)= F1(t1,t2); |
573 | 0 | SRC(3,1)= F2(t1,t2,t3); |
574 | 0 | SRC(3,0)= F1(t2,t3); |
575 | 0 | } |
576 | | |
577 | | static void predict_4x4_hd_c( pixel *src ) |
578 | 0 | { |
579 | 0 | int lt= SRC(-1,-1); |
580 | 0 | PREDICT_4x4_LOAD_LEFT |
581 | 0 | PREDICT_4x4_LOAD_TOP |
582 | 0 | SRC(0,3)= F1(l2,l3); |
583 | 0 | SRC(1,3)= F2(l1,l2,l3); |
584 | 0 | SRC(0,2)=SRC(2,3)= F1(l1,l2); |
585 | 0 | SRC(1,2)=SRC(3,3)= F2(l0,l1,l2); |
586 | 0 | SRC(0,1)=SRC(2,2)= F1(l0,l1); |
587 | 0 | SRC(1,1)=SRC(3,2)= F2(lt,l0,l1); |
588 | 0 | SRC(0,0)=SRC(2,1)= F1(lt,l0); |
589 | 0 | SRC(1,0)=SRC(3,1)= F2(t0,lt,l0); |
590 | 0 | SRC(2,0)= F2(t1,t0,lt); |
591 | 0 | SRC(3,0)= F2(t2,t1,t0); |
592 | 0 | } |
593 | | |
594 | | static void predict_4x4_vl_c( pixel *src ) |
595 | 0 | { |
596 | 0 | PREDICT_4x4_LOAD_TOP |
597 | 0 | PREDICT_4x4_LOAD_TOP_RIGHT |
598 | 0 | SRC(0,0)= F1(t0,t1); |
599 | 0 | SRC(0,1)= F2(t0,t1,t2); |
600 | 0 | SRC(1,0)=SRC(0,2)= F1(t1,t2); |
601 | 0 | SRC(1,1)=SRC(0,3)= F2(t1,t2,t3); |
602 | 0 | SRC(2,0)=SRC(1,2)= F1(t2,t3); |
603 | 0 | SRC(2,1)=SRC(1,3)= F2(t2,t3,t4); |
604 | 0 | SRC(3,0)=SRC(2,2)= F1(t3,t4); |
605 | 0 | SRC(3,1)=SRC(2,3)= F2(t3,t4,t5); |
606 | 0 | SRC(3,2)= F1(t4,t5); |
607 | 0 | SRC(3,3)= F2(t4,t5,t6); |
608 | 0 | } |
609 | | |
610 | | static void predict_4x4_hu_c( pixel *src ) |
611 | 0 | { |
612 | 0 | PREDICT_4x4_LOAD_LEFT |
613 | 0 | SRC(0,0)= F1(l0,l1); |
614 | 0 | SRC(1,0)= F2(l0,l1,l2); |
615 | 0 | SRC(2,0)=SRC(0,1)= F1(l1,l2); |
616 | 0 | SRC(3,0)=SRC(1,1)= F2(l1,l2,l3); |
617 | 0 | SRC(2,1)=SRC(0,2)= F1(l2,l3); |
618 | 0 | SRC(3,1)=SRC(1,2)= F2(l2,l3,l3); |
619 | 0 | SRC(3,2)=SRC(1,3)=SRC(0,3)= |
620 | 0 | SRC(2,2)=SRC(2,3)=SRC(3,3)= l3; |
621 | 0 | } |
622 | | |
623 | | /**************************************************************************** |
624 | | * 8x8 prediction for intra luma block |
625 | | ****************************************************************************/ |
626 | | |
627 | | #define PL(y) \ |
628 | 0 | edge[14-y] = F2(SRC(-1,y-1), SRC(-1,y), SRC(-1,y+1)); |
629 | | #define PT(x) \ |
630 | 0 | edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1)); |
631 | | |
632 | | static void predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters ) |
633 | 0 | { |
634 | | /* edge[7..14] = l7..l0 |
635 | | * edge[15] = lt |
636 | | * edge[16..31] = t0 .. t15 |
637 | | * edge[32] = t15 */ |
638 | |
|
639 | 0 | int have_lt = i_neighbor & MB_TOPLEFT; |
640 | 0 | if( i_filters & MB_LEFT ) |
641 | 0 | { |
642 | 0 | edge[15] = (SRC(0,-1) + 2*SRC(-1,-1) + SRC(-1,0) + 2) >> 2; |
643 | 0 | edge[14] = ((have_lt ? SRC(-1,-1) : SRC(-1,0)) |
644 | 0 | + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; |
645 | 0 | PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) |
646 | 0 | edge[6] = |
647 | 0 | edge[7] = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2; |
648 | 0 | } |
649 | |
|
650 | 0 | if( i_filters & MB_TOP ) |
651 | 0 | { |
652 | 0 | int have_tr = i_neighbor & MB_TOPRIGHT; |
653 | 0 | edge[16] = ((have_lt ? SRC(-1,-1) : SRC(0,-1)) |
654 | 0 | + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; |
655 | 0 | PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) |
656 | 0 | edge[23] = (SRC(6,-1) + 2*SRC(7,-1) |
657 | 0 | + (have_tr ? SRC(8,-1) : SRC(7,-1)) + 2) >> 2; |
658 | |
|
659 | 0 | if( i_filters & MB_TOPRIGHT ) |
660 | 0 | { |
661 | 0 | if( have_tr ) |
662 | 0 | { |
663 | 0 | PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) |
664 | 0 | edge[31] = |
665 | 0 | edge[32] = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; |
666 | 0 | } |
667 | 0 | else |
668 | 0 | { |
669 | 0 | MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) ); |
670 | 0 | MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) ); |
671 | 0 | edge[32] = SRC(7,-1); |
672 | 0 | } |
673 | 0 | } |
674 | 0 | } |
675 | 0 | } |
676 | | |
677 | | #undef PL |
678 | | #undef PT |
679 | | |
680 | | #define PL(y) \ |
681 | 0 | UNUSED int l##y = edge[14-y]; |
682 | | #define PT(x) \ |
683 | 0 | UNUSED int t##x = edge[16+x]; |
684 | | #define PREDICT_8x8_LOAD_TOPLEFT \ |
685 | 0 | int lt = edge[15]; |
686 | | #define PREDICT_8x8_LOAD_LEFT \ |
687 | 0 | PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7) |
688 | | #define PREDICT_8x8_LOAD_TOP \ |
689 | 0 | PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7) |
690 | | #define PREDICT_8x8_LOAD_TOPRIGHT \ |
691 | 0 | PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) PT(15) |
692 | | |
693 | | #define PREDICT_8x8_DC(v) \ |
694 | 0 | for( int y = 0; y < 8; y++ ) { \ |
695 | 0 | MPIXEL_X4( src+0 ) = v; \ |
696 | 0 | MPIXEL_X4( src+4 ) = v; \ |
697 | 0 | src += FDEC_STRIDE; \ |
698 | 0 | } |
699 | | |
700 | | static void predict_8x8_dc_128_c( pixel *src, pixel edge[36] ) |
701 | 0 | { |
702 | 0 | PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
703 | 0 | } |
704 | | static void predict_8x8_dc_left_c( pixel *src, pixel edge[36] ) |
705 | 0 | { |
706 | 0 | PREDICT_8x8_LOAD_LEFT |
707 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 ); |
708 | 0 | PREDICT_8x8_DC( dc ); |
709 | 0 | } |
710 | | static void predict_8x8_dc_top_c( pixel *src, pixel edge[36] ) |
711 | 0 | { |
712 | 0 | PREDICT_8x8_LOAD_TOP |
713 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 ); |
714 | 0 | PREDICT_8x8_DC( dc ); |
715 | 0 | } |
716 | | void x264_predict_8x8_dc_c( pixel *src, pixel edge[36] ) |
717 | 0 | { |
718 | 0 | PREDICT_8x8_LOAD_LEFT |
719 | 0 | PREDICT_8x8_LOAD_TOP |
720 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4 ); |
721 | 0 | PREDICT_8x8_DC( dc ); |
722 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_dc_c Unexecuted instantiation: x264_10_predict_8x8_dc_c |
723 | | void x264_predict_8x8_h_c( pixel *src, pixel edge[36] ) |
724 | 0 | { |
725 | 0 | PREDICT_8x8_LOAD_LEFT |
726 | 0 | #define ROW(y) MPIXEL_X4( src+y*FDEC_STRIDE+0 ) =\ |
727 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = PIXEL_SPLAT_X4( l##y ); |
728 | 0 | ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); |
729 | 0 | #undef ROW |
730 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_h_c Unexecuted instantiation: x264_10_predict_8x8_h_c |
731 | | void x264_predict_8x8_v_c( pixel *src, pixel edge[36] ) |
732 | 0 | { |
733 | 0 | pixel4 top[2] = { MPIXEL_X4( edge+16 ), |
734 | 0 | MPIXEL_X4( edge+20 ) }; |
735 | 0 | for( int y = 0; y < 8; y++ ) |
736 | 0 | { |
737 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0]; |
738 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1]; |
739 | 0 | } |
740 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_v_c Unexecuted instantiation: x264_10_predict_8x8_v_c |
741 | | static void predict_8x8_ddl_c( pixel *src, pixel edge[36] ) |
742 | 0 | { |
743 | 0 | PREDICT_8x8_LOAD_TOP |
744 | 0 | PREDICT_8x8_LOAD_TOPRIGHT |
745 | 0 | SRC(0,0)= F2(t0,t1,t2); |
746 | 0 | SRC(0,1)=SRC(1,0)= F2(t1,t2,t3); |
747 | 0 | SRC(0,2)=SRC(1,1)=SRC(2,0)= F2(t2,t3,t4); |
748 | 0 | SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= F2(t3,t4,t5); |
749 | 0 | SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= F2(t4,t5,t6); |
750 | 0 | SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= F2(t5,t6,t7); |
751 | 0 | SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= F2(t6,t7,t8); |
752 | 0 | SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= F2(t7,t8,t9); |
753 | 0 | SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= F2(t8,t9,t10); |
754 | 0 | SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= F2(t9,t10,t11); |
755 | 0 | SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= F2(t10,t11,t12); |
756 | 0 | SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= F2(t11,t12,t13); |
757 | 0 | SRC(5,7)=SRC(6,6)=SRC(7,5)= F2(t12,t13,t14); |
758 | 0 | SRC(6,7)=SRC(7,6)= F2(t13,t14,t15); |
759 | 0 | SRC(7,7)= F2(t14,t15,t15); |
760 | 0 | } |
761 | | static void predict_8x8_ddr_c( pixel *src, pixel edge[36] ) |
762 | 0 | { |
763 | 0 | PREDICT_8x8_LOAD_TOP |
764 | 0 | PREDICT_8x8_LOAD_LEFT |
765 | 0 | PREDICT_8x8_LOAD_TOPLEFT |
766 | 0 | SRC(0,7)= F2(l7,l6,l5); |
767 | 0 | SRC(0,6)=SRC(1,7)= F2(l6,l5,l4); |
768 | 0 | SRC(0,5)=SRC(1,6)=SRC(2,7)= F2(l5,l4,l3); |
769 | 0 | SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= F2(l4,l3,l2); |
770 | 0 | SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= F2(l3,l2,l1); |
771 | 0 | SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= F2(l2,l1,l0); |
772 | 0 | SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= F2(l1,l0,lt); |
773 | 0 | SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= F2(l0,lt,t0); |
774 | 0 | SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= F2(lt,t0,t1); |
775 | 0 | SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= F2(t0,t1,t2); |
776 | 0 | SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= F2(t1,t2,t3); |
777 | 0 | SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= F2(t2,t3,t4); |
778 | 0 | SRC(5,0)=SRC(6,1)=SRC(7,2)= F2(t3,t4,t5); |
779 | 0 | SRC(6,0)=SRC(7,1)= F2(t4,t5,t6); |
780 | 0 | SRC(7,0)= F2(t5,t6,t7); |
781 | |
|
782 | 0 | } |
783 | | static void predict_8x8_vr_c( pixel *src, pixel edge[36] ) |
784 | 0 | { |
785 | 0 | PREDICT_8x8_LOAD_TOP |
786 | 0 | PREDICT_8x8_LOAD_LEFT |
787 | 0 | PREDICT_8x8_LOAD_TOPLEFT |
788 | 0 | SRC(0,6)= F2(l5,l4,l3); |
789 | 0 | SRC(0,7)= F2(l6,l5,l4); |
790 | 0 | SRC(0,4)=SRC(1,6)= F2(l3,l2,l1); |
791 | 0 | SRC(0,5)=SRC(1,7)= F2(l4,l3,l2); |
792 | 0 | SRC(0,2)=SRC(1,4)=SRC(2,6)= F2(l1,l0,lt); |
793 | 0 | SRC(0,3)=SRC(1,5)=SRC(2,7)= F2(l2,l1,l0); |
794 | 0 | SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= F2(l0,lt,t0); |
795 | 0 | SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= F1(lt,t0); |
796 | 0 | SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= F2(lt,t0,t1); |
797 | 0 | SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= F1(t0,t1); |
798 | 0 | SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= F2(t0,t1,t2); |
799 | 0 | SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= F1(t1,t2); |
800 | 0 | SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= F2(t1,t2,t3); |
801 | 0 | SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= F1(t2,t3); |
802 | 0 | SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= F2(t2,t3,t4); |
803 | 0 | SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= F1(t3,t4); |
804 | 0 | SRC(5,1)=SRC(6,3)=SRC(7,5)= F2(t3,t4,t5); |
805 | 0 | SRC(5,0)=SRC(6,2)=SRC(7,4)= F1(t4,t5); |
806 | 0 | SRC(6,1)=SRC(7,3)= F2(t4,t5,t6); |
807 | 0 | SRC(6,0)=SRC(7,2)= F1(t5,t6); |
808 | 0 | SRC(7,1)= F2(t5,t6,t7); |
809 | 0 | SRC(7,0)= F1(t6,t7); |
810 | 0 | } |
811 | | static void predict_8x8_hd_c( pixel *src, pixel edge[36] ) |
812 | 0 | { |
813 | 0 | PREDICT_8x8_LOAD_TOP |
814 | 0 | PREDICT_8x8_LOAD_LEFT |
815 | 0 | PREDICT_8x8_LOAD_TOPLEFT |
816 | 0 | int p1 = pack_pixel_1to2(F1(l6,l7), F2(l5,l6,l7)); |
817 | 0 | int p2 = pack_pixel_1to2(F1(l5,l6), F2(l4,l5,l6)); |
818 | 0 | int p3 = pack_pixel_1to2(F1(l4,l5), F2(l3,l4,l5)); |
819 | 0 | int p4 = pack_pixel_1to2(F1(l3,l4), F2(l2,l3,l4)); |
820 | 0 | int p5 = pack_pixel_1to2(F1(l2,l3), F2(l1,l2,l3)); |
821 | 0 | int p6 = pack_pixel_1to2(F1(l1,l2), F2(l0,l1,l2)); |
822 | 0 | int p7 = pack_pixel_1to2(F1(l0,l1), F2(lt,l0,l1)); |
823 | 0 | int p8 = pack_pixel_1to2(F1(lt,l0), F2(l0,lt,t0)); |
824 | 0 | int p9 = pack_pixel_1to2(F2(t1,t0,lt), F2(t2,t1,t0)); |
825 | 0 | int p10 = pack_pixel_1to2(F2(t3,t2,t1), F2(t4,t3,t2)); |
826 | 0 | int p11 = pack_pixel_1to2(F2(t5,t4,t3), F2(t6,t5,t4)); |
827 | 0 | SRC_X4(0,7)= pack_pixel_2to4(p1,p2); |
828 | 0 | SRC_X4(0,6)= pack_pixel_2to4(p2,p3); |
829 | 0 | SRC_X4(4,7)=SRC_X4(0,5)= pack_pixel_2to4(p3,p4); |
830 | 0 | SRC_X4(4,6)=SRC_X4(0,4)= pack_pixel_2to4(p4,p5); |
831 | 0 | SRC_X4(4,5)=SRC_X4(0,3)= pack_pixel_2to4(p5,p6); |
832 | 0 | SRC_X4(4,4)=SRC_X4(0,2)= pack_pixel_2to4(p6,p7); |
833 | 0 | SRC_X4(4,3)=SRC_X4(0,1)= pack_pixel_2to4(p7,p8); |
834 | 0 | SRC_X4(4,2)=SRC_X4(0,0)= pack_pixel_2to4(p8,p9); |
835 | 0 | SRC_X4(4,1)= pack_pixel_2to4(p9,p10); |
836 | 0 | SRC_X4(4,0)= pack_pixel_2to4(p10,p11); |
837 | 0 | } |
838 | | static void predict_8x8_vl_c( pixel *src, pixel edge[36] ) |
839 | 0 | { |
840 | 0 | PREDICT_8x8_LOAD_TOP |
841 | 0 | PREDICT_8x8_LOAD_TOPRIGHT |
842 | 0 | SRC(0,0)= F1(t0,t1); |
843 | 0 | SRC(0,1)= F2(t0,t1,t2); |
844 | 0 | SRC(0,2)=SRC(1,0)= F1(t1,t2); |
845 | 0 | SRC(0,3)=SRC(1,1)= F2(t1,t2,t3); |
846 | 0 | SRC(0,4)=SRC(1,2)=SRC(2,0)= F1(t2,t3); |
847 | 0 | SRC(0,5)=SRC(1,3)=SRC(2,1)= F2(t2,t3,t4); |
848 | 0 | SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= F1(t3,t4); |
849 | 0 | SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= F2(t3,t4,t5); |
850 | 0 | SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= F1(t4,t5); |
851 | 0 | SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= F2(t4,t5,t6); |
852 | 0 | SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= F1(t5,t6); |
853 | 0 | SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= F2(t5,t6,t7); |
854 | 0 | SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= F1(t6,t7); |
855 | 0 | SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= F2(t6,t7,t8); |
856 | 0 | SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= F1(t7,t8); |
857 | 0 | SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= F2(t7,t8,t9); |
858 | 0 | SRC(5,6)=SRC(6,4)=SRC(7,2)= F1(t8,t9); |
859 | 0 | SRC(5,7)=SRC(6,5)=SRC(7,3)= F2(t8,t9,t10); |
860 | 0 | SRC(6,6)=SRC(7,4)= F1(t9,t10); |
861 | 0 | SRC(6,7)=SRC(7,5)= F2(t9,t10,t11); |
862 | 0 | SRC(7,6)= F1(t10,t11); |
863 | 0 | SRC(7,7)= F2(t10,t11,t12); |
864 | 0 | } |
865 | | static void predict_8x8_hu_c( pixel *src, pixel edge[36] ) |
866 | 0 | { |
867 | 0 | PREDICT_8x8_LOAD_LEFT |
868 | 0 | int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2)); |
869 | 0 | int p2 = pack_pixel_1to2(F1(l1,l2), F2(l1,l2,l3)); |
870 | 0 | int p3 = pack_pixel_1to2(F1(l2,l3), F2(l2,l3,l4)); |
871 | 0 | int p4 = pack_pixel_1to2(F1(l3,l4), F2(l3,l4,l5)); |
872 | 0 | int p5 = pack_pixel_1to2(F1(l4,l5), F2(l4,l5,l6)); |
873 | 0 | int p6 = pack_pixel_1to2(F1(l5,l6), F2(l5,l6,l7)); |
874 | 0 | int p7 = pack_pixel_1to2(F1(l6,l7), F2(l6,l7,l7)); |
875 | 0 | int p8 = pack_pixel_1to2(l7,l7); |
876 | 0 | SRC_X4(0,0)= pack_pixel_2to4(p1,p2); |
877 | 0 | SRC_X4(0,1)= pack_pixel_2to4(p2,p3); |
878 | 0 | SRC_X4(4,0)=SRC_X4(0,2)= pack_pixel_2to4(p3,p4); |
879 | 0 | SRC_X4(4,1)=SRC_X4(0,3)= pack_pixel_2to4(p4,p5); |
880 | 0 | SRC_X4(4,2)=SRC_X4(0,4)= pack_pixel_2to4(p5,p6); |
881 | 0 | SRC_X4(4,3)=SRC_X4(0,5)= pack_pixel_2to4(p6,p7); |
882 | 0 | SRC_X4(4,4)=SRC_X4(0,6)= pack_pixel_2to4(p7,p8); |
883 | 0 | SRC_X4(4,5)=SRC_X4(4,6)= SRC_X4(0,7) = SRC_X4(4,7) = pack_pixel_2to4(p8,p8); |
884 | 0 | } |
885 | | |
886 | | /**************************************************************************** |
887 | | * Exported functions: |
888 | | ****************************************************************************/ |
889 | | void x264_predict_16x16_init( uint32_t cpu, x264_predict_t pf[7] ) |
890 | 0 | { |
891 | 0 | pf[I_PRED_16x16_V ] = x264_predict_16x16_v_c; |
892 | 0 | pf[I_PRED_16x16_H ] = x264_predict_16x16_h_c; |
893 | 0 | pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_c; |
894 | 0 | pf[I_PRED_16x16_P ] = x264_predict_16x16_p_c; |
895 | 0 | pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left_c; |
896 | 0 | pf[I_PRED_16x16_DC_TOP ]= predict_16x16_dc_top_c; |
897 | 0 | pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128_c; |
898 | |
|
899 | | #if HAVE_MMX |
900 | | x264_predict_16x16_init_mmx( cpu, pf ); |
901 | | #endif |
902 | |
|
903 | | #if HAVE_ALTIVEC |
904 | | if( cpu&X264_CPU_ALTIVEC ) |
905 | | x264_predict_16x16_init_altivec( pf ); |
906 | | #endif |
907 | |
|
908 | | #if HAVE_ARMV6 |
909 | | x264_predict_16x16_init_arm( cpu, pf ); |
910 | | #endif |
911 | |
|
912 | | #if HAVE_AARCH64 |
913 | | x264_predict_16x16_init_aarch64( cpu, pf ); |
914 | | #endif |
915 | |
|
916 | | #if !HIGH_BIT_DEPTH |
917 | | #if HAVE_MSA |
918 | | if( cpu&X264_CPU_MSA ) |
919 | | { |
920 | | pf[I_PRED_16x16_V ] = x264_intra_predict_vert_16x16_msa; |
921 | | pf[I_PRED_16x16_H ] = x264_intra_predict_hor_16x16_msa; |
922 | | pf[I_PRED_16x16_DC] = x264_intra_predict_dc_16x16_msa; |
923 | | pf[I_PRED_16x16_P ] = x264_intra_predict_plane_16x16_msa; |
924 | | pf[I_PRED_16x16_DC_LEFT]= x264_intra_predict_dc_left_16x16_msa; |
925 | | pf[I_PRED_16x16_DC_TOP ]= x264_intra_predict_dc_top_16x16_msa; |
926 | | pf[I_PRED_16x16_DC_128 ]= x264_intra_predict_dc_128_16x16_msa; |
927 | | } |
928 | | #endif |
929 | | #endif |
930 | |
|
931 | | #if HAVE_LSX |
932 | | x264_predict_16x16_init_loongarch( cpu, pf ); |
933 | | #endif |
934 | 0 | } Unexecuted instantiation: x264_8_predict_16x16_init Unexecuted instantiation: x264_10_predict_16x16_init |
935 | | |
936 | | void x264_predict_8x8c_init( uint32_t cpu, x264_predict_t pf[7] ) |
937 | 0 | { |
938 | 0 | pf[I_PRED_CHROMA_V ] = x264_predict_8x8c_v_c; |
939 | 0 | pf[I_PRED_CHROMA_H ] = x264_predict_8x8c_h_c; |
940 | 0 | pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_c; |
941 | 0 | pf[I_PRED_CHROMA_P ] = x264_predict_8x8c_p_c; |
942 | 0 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left_c; |
943 | 0 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top_c; |
944 | 0 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128_c; |
945 | |
|
946 | | #if HAVE_MMX |
947 | | x264_predict_8x8c_init_mmx( cpu, pf ); |
948 | | #endif |
949 | |
|
950 | | #if HAVE_ALTIVEC |
951 | | if( cpu&X264_CPU_ALTIVEC ) |
952 | | x264_predict_8x8c_init_altivec( pf ); |
953 | | #endif |
954 | |
|
955 | | #if HAVE_ARMV6 |
956 | | x264_predict_8x8c_init_arm( cpu, pf ); |
957 | | #endif |
958 | |
|
959 | | #if HAVE_AARCH64 |
960 | | x264_predict_8x8c_init_aarch64( cpu, pf ); |
961 | | #endif |
962 | |
|
963 | | #if !HIGH_BIT_DEPTH |
964 | | #if HAVE_MSA |
965 | | if( cpu&X264_CPU_MSA ) |
966 | | { |
967 | | pf[I_PRED_CHROMA_P ] = x264_intra_predict_plane_8x8_msa; |
968 | | } |
969 | | #endif |
970 | | #endif |
971 | |
|
972 | | #if HAVE_LSX |
973 | | x264_predict_8x8c_init_loongarch( cpu, pf ); |
974 | | #endif |
975 | 0 | } Unexecuted instantiation: x264_8_predict_8x8c_init Unexecuted instantiation: x264_10_predict_8x8c_init |
976 | | |
977 | | void x264_predict_8x16c_init( uint32_t cpu, x264_predict_t pf[7] ) |
978 | 0 | { |
979 | 0 | pf[I_PRED_CHROMA_V ] = x264_predict_8x16c_v_c; |
980 | 0 | pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_c; |
981 | 0 | pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_c; |
982 | 0 | pf[I_PRED_CHROMA_P ] = x264_predict_8x16c_p_c; |
983 | 0 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x16c_dc_left_c; |
984 | 0 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x16c_dc_top_c; |
985 | 0 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x16c_dc_128_c; |
986 | |
|
987 | | #if HAVE_MMX |
988 | | x264_predict_8x16c_init_mmx( cpu, pf ); |
989 | | #endif |
990 | |
|
991 | | #if HAVE_ARMV6 |
992 | | x264_predict_8x16c_init_arm( cpu, pf ); |
993 | | #endif |
994 | |
|
995 | | #if HAVE_AARCH64 |
996 | | x264_predict_8x16c_init_aarch64( cpu, pf ); |
997 | | #endif |
998 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_init Unexecuted instantiation: x264_10_predict_8x16c_init |
999 | | |
1000 | | void x264_predict_8x8_init( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) |
1001 | 0 | { |
1002 | 0 | pf[I_PRED_8x8_V] = x264_predict_8x8_v_c; |
1003 | 0 | pf[I_PRED_8x8_H] = x264_predict_8x8_h_c; |
1004 | 0 | pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_c; |
1005 | 0 | pf[I_PRED_8x8_DDL] = predict_8x8_ddl_c; |
1006 | 0 | pf[I_PRED_8x8_DDR] = predict_8x8_ddr_c; |
1007 | 0 | pf[I_PRED_8x8_VR] = predict_8x8_vr_c; |
1008 | 0 | pf[I_PRED_8x8_HD] = predict_8x8_hd_c; |
1009 | 0 | pf[I_PRED_8x8_VL] = predict_8x8_vl_c; |
1010 | 0 | pf[I_PRED_8x8_HU] = predict_8x8_hu_c; |
1011 | 0 | pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_c; |
1012 | 0 | pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_c; |
1013 | 0 | pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128_c; |
1014 | 0 | *predict_filter = predict_8x8_filter_c; |
1015 | |
|
1016 | | #if HAVE_MMX |
1017 | | x264_predict_8x8_init_mmx( cpu, pf, predict_filter ); |
1018 | | #endif |
1019 | |
|
1020 | | #if HAVE_ARMV6 |
1021 | | x264_predict_8x8_init_arm( cpu, pf, predict_filter ); |
1022 | | #endif |
1023 | |
|
1024 | | #if HAVE_AARCH64 |
1025 | | x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); |
1026 | | #endif |
1027 | |
|
1028 | | #if !HIGH_BIT_DEPTH |
1029 | | #if HAVE_MSA |
1030 | | if( cpu&X264_CPU_MSA ) |
1031 | | { |
1032 | | pf[I_PRED_8x8_DDL] = x264_intra_predict_ddl_8x8_msa; |
1033 | | } |
1034 | | #endif |
1035 | | #endif |
1036 | |
|
1037 | | #if HAVE_LSX |
1038 | | x264_predict_8x8_init_loongarch( cpu, pf, predict_filter ); |
1039 | | #endif |
1040 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_init Unexecuted instantiation: x264_10_predict_8x8_init |
1041 | | |
1042 | | void x264_predict_4x4_init( uint32_t cpu, x264_predict_t pf[12] ) |
1043 | 0 | { |
1044 | 0 | pf[I_PRED_4x4_V] = x264_predict_4x4_v_c; |
1045 | 0 | pf[I_PRED_4x4_H] = x264_predict_4x4_h_c; |
1046 | 0 | pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_c; |
1047 | 0 | pf[I_PRED_4x4_DDL] = predict_4x4_ddl_c; |
1048 | 0 | pf[I_PRED_4x4_DDR] = predict_4x4_ddr_c; |
1049 | 0 | pf[I_PRED_4x4_VR] = predict_4x4_vr_c; |
1050 | 0 | pf[I_PRED_4x4_HD] = predict_4x4_hd_c; |
1051 | 0 | pf[I_PRED_4x4_VL] = predict_4x4_vl_c; |
1052 | 0 | pf[I_PRED_4x4_HU] = predict_4x4_hu_c; |
1053 | 0 | pf[I_PRED_4x4_DC_LEFT]= predict_4x4_dc_left_c; |
1054 | 0 | pf[I_PRED_4x4_DC_TOP] = predict_4x4_dc_top_c; |
1055 | 0 | pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128_c; |
1056 | |
|
1057 | | #if HAVE_MMX |
1058 | | x264_predict_4x4_init_mmx( cpu, pf ); |
1059 | | #endif |
1060 | |
|
1061 | | #if HAVE_ARMV6 |
1062 | | x264_predict_4x4_init_arm( cpu, pf ); |
1063 | | #endif |
1064 | |
|
1065 | | #if HAVE_AARCH64 |
1066 | | x264_predict_4x4_init_aarch64( cpu, pf ); |
1067 | | #endif |
1068 | |
|
1069 | | #if HAVE_LSX |
1070 | | x264_predict_4x4_init_loongarch( cpu, pf ); |
1071 | | #endif |
1072 | 0 | } Unexecuted instantiation: x264_8_predict_4x4_init Unexecuted instantiation: x264_10_predict_4x4_init |
1073 | | |