/work/x264/common/predict.c
Line | Count | Source |
1 | | /***************************************************************************** |
2 | | * predict.c: intra prediction |
3 | | ***************************************************************************** |
4 | | * Copyright (C) 2003-2025 x264 project |
5 | | * |
6 | | * Authors: Laurent Aimar <fenrir@via.ecp.fr> |
7 | | * Loren Merritt <lorenm@u.washington.edu> |
8 | | * Fiona Glaser <fiona@x264.com> |
9 | | * Henrik Gramner <henrik@gramner.com> |
10 | | * |
11 | | * This program is free software; you can redistribute it and/or modify |
12 | | * it under the terms of the GNU General Public License as published by |
13 | | * the Free Software Foundation; either version 2 of the License, or |
14 | | * (at your option) any later version. |
15 | | * |
16 | | * This program is distributed in the hope that it will be useful, |
17 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | | * GNU General Public License for more details. |
20 | | * |
21 | | * You should have received a copy of the GNU General Public License |
22 | | * along with this program; if not, write to the Free Software |
23 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
24 | | * |
25 | | * This program is also available under a commercial proprietary license. |
26 | | * For more information, contact us at licensing@x264.com. |
27 | | *****************************************************************************/ |
28 | | |
29 | | /* predict4x4 are inspired from ffmpeg h264 decoder */ |
30 | | |
31 | | |
32 | | #include "common.h" |
33 | | |
34 | | #if HAVE_MMX |
35 | | # include "x86/predict.h" |
36 | | #endif |
37 | | #if HAVE_ALTIVEC |
38 | | # include "ppc/predict.h" |
39 | | #endif |
40 | | #if HAVE_ARMV6 |
41 | | # include "arm/predict.h" |
42 | | #endif |
43 | | #if HAVE_AARCH64 |
44 | | # include "aarch64/predict.h" |
45 | | #endif |
46 | | #if HAVE_MSA |
47 | | # include "mips/predict.h" |
48 | | #endif |
49 | | #if HAVE_LSX |
50 | | # include "loongarch/predict.h" |
51 | | #endif |
52 | | |
53 | | /**************************************************************************** |
54 | | * 16x16 prediction for intra luma block |
55 | | ****************************************************************************/ |
56 | | |
57 | | #define PREDICT_16x16_DC(v)\ |
58 | 415k | for( int i = 0; i < 16; i++ )\ |
59 | 390k | {\ |
60 | 390k | MPIXEL_X4( src+ 0 ) = v;\ |
61 | 390k | MPIXEL_X4( src+ 4 ) = v;\ |
62 | 390k | MPIXEL_X4( src+ 8 ) = v;\ |
63 | 390k | MPIXEL_X4( src+12 ) = v;\ |
64 | 390k | src += FDEC_STRIDE;\ |
65 | 390k | } |
66 | | |
67 | | void x264_predict_16x16_dc_c( pixel *src ) |
68 | 17.7k | { |
69 | 17.7k | int dc = 0; |
70 | | |
71 | 302k | for( int i = 0; i < 16; i++ ) |
72 | 284k | { |
73 | 284k | dc += src[-1 + i * FDEC_STRIDE]; |
74 | 284k | dc += src[i - FDEC_STRIDE]; |
75 | 284k | } |
76 | 17.7k | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 ); |
77 | | |
78 | 17.7k | PREDICT_16x16_DC( dcsplat ); |
79 | 17.7k | } x264_8_predict_16x16_dc_c Line | Count | Source | 68 | 17.7k | { | 69 | 17.7k | int dc = 0; | 70 | | | 71 | 302k | for( int i = 0; i < 16; i++ ) | 72 | 284k | { | 73 | 284k | dc += src[-1 + i * FDEC_STRIDE]; | 74 | 284k | dc += src[i - FDEC_STRIDE]; | 75 | 284k | } | 76 | 17.7k | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 ); | 77 | | | 78 | 17.7k | PREDICT_16x16_DC( dcsplat ); | 79 | 17.7k | } |
Unexecuted instantiation: x264_10_predict_16x16_dc_c |
80 | | static void predict_16x16_dc_left_c( pixel *src ) |
81 | 4.48k | { |
82 | 4.48k | int dc = 0; |
83 | | |
84 | 76.1k | for( int i = 0; i < 16; i++ ) |
85 | 71.7k | dc += src[-1 + i * FDEC_STRIDE]; |
86 | 4.48k | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 ); |
87 | | |
88 | 4.48k | PREDICT_16x16_DC( dcsplat ); |
89 | 4.48k | } |
90 | | static void predict_16x16_dc_top_c( pixel *src ) |
91 | 1.72k | { |
92 | 1.72k | int dc = 0; |
93 | | |
94 | 29.3k | for( int i = 0; i < 16; i++ ) |
95 | 27.6k | dc += src[i - FDEC_STRIDE]; |
96 | 1.72k | pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 ); |
97 | | |
98 | 1.72k | PREDICT_16x16_DC( dcsplat ); |
99 | 1.72k | } |
100 | | static void predict_16x16_dc_128_c( pixel *src ) |
101 | 454 | { |
102 | 454 | PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
103 | 454 | } |
104 | | void x264_predict_16x16_h_c( pixel *src ) |
105 | 15.0k | { |
106 | 255k | for( int i = 0; i < 16; i++ ) |
107 | 240k | { |
108 | 240k | const pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
109 | 240k | MPIXEL_X4( src+ 0 ) = v; |
110 | 240k | MPIXEL_X4( src+ 4 ) = v; |
111 | 240k | MPIXEL_X4( src+ 8 ) = v; |
112 | 240k | MPIXEL_X4( src+12 ) = v; |
113 | 240k | src += FDEC_STRIDE; |
114 | 240k | } |
115 | 15.0k | } Line | Count | Source | 105 | 15.0k | { | 106 | 255k | for( int i = 0; i < 16; i++ ) | 107 | 240k | { | 108 | 240k | const pixel4 v = PIXEL_SPLAT_X4( src[-1] ); | 109 | 240k | MPIXEL_X4( src+ 0 ) = v; | 110 | 240k | MPIXEL_X4( src+ 4 ) = v; | 111 | 240k | MPIXEL_X4( src+ 8 ) = v; | 112 | 240k | MPIXEL_X4( src+12 ) = v; | 113 | 240k | src += FDEC_STRIDE; | 114 | 240k | } | 115 | 15.0k | } |
Unexecuted instantiation: x264_10_predict_16x16_h_c |
116 | | void x264_predict_16x16_v_c( pixel *src ) |
117 | 39.8k | { |
118 | 39.8k | pixel4 v0 = MPIXEL_X4( &src[ 0-FDEC_STRIDE] ); |
119 | 39.8k | pixel4 v1 = MPIXEL_X4( &src[ 4-FDEC_STRIDE] ); |
120 | 39.8k | pixel4 v2 = MPIXEL_X4( &src[ 8-FDEC_STRIDE] ); |
121 | 39.8k | pixel4 v3 = MPIXEL_X4( &src[12-FDEC_STRIDE] ); |
122 | | |
123 | 676k | for( int i = 0; i < 16; i++ ) |
124 | 636k | { |
125 | 636k | MPIXEL_X4( src+ 0 ) = v0; |
126 | 636k | MPIXEL_X4( src+ 4 ) = v1; |
127 | 636k | MPIXEL_X4( src+ 8 ) = v2; |
128 | 636k | MPIXEL_X4( src+12 ) = v3; |
129 | 636k | src += FDEC_STRIDE; |
130 | 636k | } |
131 | 39.8k | } Line | Count | Source | 117 | 39.8k | { | 118 | 39.8k | pixel4 v0 = MPIXEL_X4( &src[ 0-FDEC_STRIDE] ); | 119 | 39.8k | pixel4 v1 = MPIXEL_X4( &src[ 4-FDEC_STRIDE] ); | 120 | 39.8k | pixel4 v2 = MPIXEL_X4( &src[ 8-FDEC_STRIDE] ); | 121 | 39.8k | pixel4 v3 = MPIXEL_X4( &src[12-FDEC_STRIDE] ); | 122 | | | 123 | 676k | for( int i = 0; i < 16; i++ ) | 124 | 636k | { | 125 | 636k | MPIXEL_X4( src+ 0 ) = v0; | 126 | 636k | MPIXEL_X4( src+ 4 ) = v1; | 127 | 636k | MPIXEL_X4( src+ 8 ) = v2; | 128 | 636k | MPIXEL_X4( src+12 ) = v3; | 129 | 636k | src += FDEC_STRIDE; | 130 | 636k | } | 131 | 39.8k | } |
Unexecuted instantiation: x264_10_predict_16x16_v_c |
132 | | void x264_predict_16x16_p_c( pixel *src ) |
133 | 17.7k | { |
134 | 17.7k | int H = 0, V = 0; |
135 | | |
136 | | /* calculate H and V */ |
137 | 159k | for( int i = 0; i <= 7; i++ ) |
138 | 142k | { |
139 | 142k | H += ( i + 1 ) * ( src[ 8 + i - FDEC_STRIDE ] - src[6 -i -FDEC_STRIDE] ); |
140 | 142k | V += ( i + 1 ) * ( src[-1 + (8+i)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] ); |
141 | 142k | } |
142 | | |
143 | 17.7k | int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[15 - FDEC_STRIDE] ); |
144 | 17.7k | int b = ( 5 * H + 32 ) >> 6; |
145 | 17.7k | int c = ( 5 * V + 32 ) >> 6; |
146 | | |
147 | 17.7k | int i00 = a - b * 7 - c * 7 + 16; |
148 | | |
149 | 302k | for( int y = 0; y < 16; y++ ) |
150 | 284k | { |
151 | 284k | int pix = i00; |
152 | 4.83M | for( int x = 0; x < 16; x++ ) |
153 | 4.54M | { |
154 | 4.54M | src[x] = x264_clip_pixel( pix>>5 ); |
155 | 4.54M | pix += b; |
156 | 4.54M | } |
157 | 284k | src += FDEC_STRIDE; |
158 | 284k | i00 += c; |
159 | 284k | } |
160 | 17.7k | } Line | Count | Source | 133 | 17.7k | { | 134 | 17.7k | int H = 0, V = 0; | 135 | | | 136 | | /* calculate H and V */ | 137 | 159k | for( int i = 0; i <= 7; i++ ) | 138 | 142k | { | 139 | 142k | H += ( i + 1 ) * ( src[ 8 + i - FDEC_STRIDE ] - src[6 -i -FDEC_STRIDE] ); | 140 | 142k | V += ( i + 1 ) * ( src[-1 + (8+i)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] ); | 141 | 142k | } | 142 | | | 143 | 17.7k | int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[15 - FDEC_STRIDE] ); | 144 | 17.7k | int b = ( 5 * H + 32 ) >> 6; | 145 | 17.7k | int c = ( 5 * V + 32 ) >> 6; | 146 | | | 147 | 17.7k | int i00 = a - b * 7 - c * 7 + 16; | 148 | | | 149 | 302k | for( int y = 0; y < 16; y++ ) | 150 | 284k | { | 151 | 284k | int pix = i00; | 152 | 4.83M | for( int x = 0; x < 16; x++ ) | 153 | 4.54M | { | 154 | 4.54M | src[x] = x264_clip_pixel( pix>>5 ); | 155 | 4.54M | pix += b; | 156 | 4.54M | } | 157 | 284k | src += FDEC_STRIDE; | 158 | 284k | i00 += c; | 159 | 284k | } | 160 | 17.7k | } |
Unexecuted instantiation: x264_10_predict_16x16_p_c |
161 | | |
162 | | |
163 | | /**************************************************************************** |
164 | | * 8x8 prediction for intra chroma block (4:2:0) |
165 | | ****************************************************************************/ |
166 | | |
167 | | static void predict_8x8c_dc_128_c( pixel *src ) |
168 | 1.35k | { |
169 | 12.1k | for( int y = 0; y < 8; y++ ) |
170 | 10.8k | { |
171 | 10.8k | MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
172 | 10.8k | MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
173 | 10.8k | src += FDEC_STRIDE; |
174 | 10.8k | } |
175 | 1.35k | } |
176 | | static void predict_8x8c_dc_left_c( pixel *src ) |
177 | 13.6k | { |
178 | 13.6k | int dc0 = 0, dc1 = 0; |
179 | | |
180 | 68.0k | for( int y = 0; y < 4; y++ ) |
181 | 54.4k | { |
182 | 54.4k | dc0 += src[y * FDEC_STRIDE - 1]; |
183 | 54.4k | dc1 += src[(y+4) * FDEC_STRIDE - 1]; |
184 | 54.4k | } |
185 | 13.6k | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
186 | 13.6k | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
187 | | |
188 | 68.0k | for( int y = 0; y < 4; y++ ) |
189 | 54.4k | { |
190 | 54.4k | MPIXEL_X4( src+0 ) = dc0splat; |
191 | 54.4k | MPIXEL_X4( src+4 ) = dc0splat; |
192 | 54.4k | src += FDEC_STRIDE; |
193 | 54.4k | } |
194 | 68.0k | for( int y = 0; y < 4; y++ ) |
195 | 54.4k | { |
196 | 54.4k | MPIXEL_X4( src+0 ) = dc1splat; |
197 | 54.4k | MPIXEL_X4( src+4 ) = dc1splat; |
198 | 54.4k | src += FDEC_STRIDE; |
199 | 54.4k | } |
200 | | |
201 | 13.6k | } |
202 | | static void predict_8x8c_dc_top_c( pixel *src ) |
203 | 10.3k | { |
204 | 10.3k | int dc0 = 0, dc1 = 0; |
205 | | |
206 | 51.7k | for( int x = 0; x < 4; x++ ) |
207 | 41.4k | { |
208 | 41.4k | dc0 += src[x - FDEC_STRIDE]; |
209 | 41.4k | dc1 += src[x + 4 - FDEC_STRIDE]; |
210 | 41.4k | } |
211 | 10.3k | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
212 | 10.3k | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
213 | | |
214 | 93.2k | for( int y = 0; y < 8; y++ ) |
215 | 82.8k | { |
216 | 82.8k | MPIXEL_X4( src+0 ) = dc0splat; |
217 | 82.8k | MPIXEL_X4( src+4 ) = dc1splat; |
218 | 82.8k | src += FDEC_STRIDE; |
219 | 82.8k | } |
220 | 10.3k | } |
221 | | void x264_predict_8x8c_dc_c( pixel *src ) |
222 | 121k | { |
223 | 121k | int s0 = 0, s1 = 0, s2 = 0, s3 = 0; |
224 | | |
225 | | /* |
226 | | s0 s1 |
227 | | s2 |
228 | | s3 |
229 | | */ |
230 | 605k | for( int i = 0; i < 4; i++ ) |
231 | 484k | { |
232 | 484k | s0 += src[i - FDEC_STRIDE]; |
233 | 484k | s1 += src[i + 4 - FDEC_STRIDE]; |
234 | 484k | s2 += src[-1 + i * FDEC_STRIDE]; |
235 | 484k | s3 += src[-1 + (i+4)*FDEC_STRIDE]; |
236 | 484k | } |
237 | | /* |
238 | | dc0 dc1 |
239 | | dc2 dc3 |
240 | | */ |
241 | 121k | pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 ); |
242 | 121k | pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 ); |
243 | 121k | pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 ); |
244 | 121k | pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 ); |
245 | | |
246 | 605k | for( int y = 0; y < 4; y++ ) |
247 | 484k | { |
248 | 484k | MPIXEL_X4( src+0 ) = dc0; |
249 | 484k | MPIXEL_X4( src+4 ) = dc1; |
250 | 484k | src += FDEC_STRIDE; |
251 | 484k | } |
252 | | |
253 | 605k | for( int y = 0; y < 4; y++ ) |
254 | 484k | { |
255 | 484k | MPIXEL_X4( src+0 ) = dc2; |
256 | 484k | MPIXEL_X4( src+4 ) = dc3; |
257 | 484k | src += FDEC_STRIDE; |
258 | 484k | } |
259 | 121k | } Line | Count | Source | 222 | 121k | { | 223 | 121k | int s0 = 0, s1 = 0, s2 = 0, s3 = 0; | 224 | | | 225 | | /* | 226 | | s0 s1 | 227 | | s2 | 228 | | s3 | 229 | | */ | 230 | 605k | for( int i = 0; i < 4; i++ ) | 231 | 484k | { | 232 | 484k | s0 += src[i - FDEC_STRIDE]; | 233 | 484k | s1 += src[i + 4 - FDEC_STRIDE]; | 234 | 484k | s2 += src[-1 + i * FDEC_STRIDE]; | 235 | 484k | s3 += src[-1 + (i+4)*FDEC_STRIDE]; | 236 | 484k | } | 237 | | /* | 238 | | dc0 dc1 | 239 | | dc2 dc3 | 240 | | */ | 241 | 121k | pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 ); | 242 | 121k | pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 ); | 243 | 121k | pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 ); | 244 | 121k | pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 ); | 245 | | | 246 | 605k | for( int y = 0; y < 4; y++ ) | 247 | 484k | { | 248 | 484k | MPIXEL_X4( src+0 ) = dc0; | 249 | 484k | MPIXEL_X4( src+4 ) = dc1; | 250 | 484k | src += FDEC_STRIDE; | 251 | 484k | } | 252 | | | 253 | 605k | for( int y = 0; y < 4; y++ ) | 254 | 484k | { | 255 | 484k | MPIXEL_X4( src+0 ) = dc2; | 256 | 484k | MPIXEL_X4( src+4 ) = dc3; | 257 | 484k | src += FDEC_STRIDE; | 258 | 484k | } | 259 | 121k | } |
Unexecuted instantiation: x264_10_predict_8x8c_dc_c |
260 | | void x264_predict_8x8c_h_c( pixel *src ) |
261 | 41.0k | { |
262 | 369k | for( int i = 0; i < 8; i++ ) |
263 | 328k | { |
264 | 328k | pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
265 | 328k | MPIXEL_X4( src+0 ) = v; |
266 | 328k | MPIXEL_X4( src+4 ) = v; |
267 | 328k | src += FDEC_STRIDE; |
268 | 328k | } |
269 | 41.0k | } Line | Count | Source | 261 | 41.0k | { | 262 | 369k | for( int i = 0; i < 8; i++ ) | 263 | 328k | { | 264 | 328k | pixel4 v = PIXEL_SPLAT_X4( src[-1] ); | 265 | 328k | MPIXEL_X4( src+0 ) = v; | 266 | 328k | MPIXEL_X4( src+4 ) = v; | 267 | 328k | src += FDEC_STRIDE; | 268 | 328k | } | 269 | 41.0k | } |
Unexecuted instantiation: x264_10_predict_8x8c_h_c |
270 | | void x264_predict_8x8c_v_c( pixel *src ) |
271 | 41.1k | { |
272 | 41.1k | pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE ); |
273 | 41.1k | pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE ); |
274 | | |
275 | 370k | for( int i = 0; i < 8; i++ ) |
276 | 329k | { |
277 | 329k | MPIXEL_X4( src+0 ) = v0; |
278 | 329k | MPIXEL_X4( src+4 ) = v1; |
279 | 329k | src += FDEC_STRIDE; |
280 | 329k | } |
281 | 41.1k | } Line | Count | Source | 271 | 41.1k | { | 272 | 41.1k | pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE ); | 273 | 41.1k | pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE ); | 274 | | | 275 | 370k | for( int i = 0; i < 8; i++ ) | 276 | 329k | { | 277 | 329k | MPIXEL_X4( src+0 ) = v0; | 278 | 329k | MPIXEL_X4( src+4 ) = v1; | 279 | 329k | src += FDEC_STRIDE; | 280 | 329k | } | 281 | 41.1k | } |
Unexecuted instantiation: x264_10_predict_8x8c_v_c |
282 | | void x264_predict_8x8c_p_c( pixel *src ) |
283 | 50.0k | { |
284 | 50.0k | int H = 0, V = 0; |
285 | | |
286 | 250k | for( int i = 0; i < 4; i++ ) |
287 | 200k | { |
288 | 200k | H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] ); |
289 | 200k | V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] ); |
290 | 200k | } |
291 | | |
292 | 50.0k | int a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] ); |
293 | 50.0k | int b = ( 17 * H + 16 ) >> 5; |
294 | 50.0k | int c = ( 17 * V + 16 ) >> 5; |
295 | 50.0k | int i00 = a -3*b -3*c + 16; |
296 | | |
297 | 450k | for( int y = 0; y < 8; y++ ) |
298 | 400k | { |
299 | 400k | int pix = i00; |
300 | 3.60M | for( int x = 0; x < 8; x++ ) |
301 | 3.20M | { |
302 | 3.20M | src[x] = x264_clip_pixel( pix>>5 ); |
303 | 3.20M | pix += b; |
304 | 3.20M | } |
305 | 400k | src += FDEC_STRIDE; |
306 | 400k | i00 += c; |
307 | 400k | } |
308 | 50.0k | } Line | Count | Source | 283 | 50.0k | { | 284 | 50.0k | int H = 0, V = 0; | 285 | | | 286 | 250k | for( int i = 0; i < 4; i++ ) | 287 | 200k | { | 288 | 200k | H += ( i + 1 ) * ( src[4+i - FDEC_STRIDE] - src[2 - i -FDEC_STRIDE] ); | 289 | 200k | V += ( i + 1 ) * ( src[-1 +(i+4)*FDEC_STRIDE] - src[-1+(2-i)*FDEC_STRIDE] ); | 290 | 200k | } | 291 | | | 292 | 50.0k | int a = 16 * ( src[-1+7*FDEC_STRIDE] + src[7 - FDEC_STRIDE] ); | 293 | 50.0k | int b = ( 17 * H + 16 ) >> 5; | 294 | 50.0k | int c = ( 17 * V + 16 ) >> 5; | 295 | 50.0k | int i00 = a -3*b -3*c + 16; | 296 | | | 297 | 450k | for( int y = 0; y < 8; y++ ) | 298 | 400k | { | 299 | 400k | int pix = i00; | 300 | 3.60M | for( int x = 0; x < 8; x++ ) | 301 | 3.20M | { | 302 | 3.20M | src[x] = x264_clip_pixel( pix>>5 ); | 303 | 3.20M | pix += b; | 304 | 3.20M | } | 305 | 400k | src += FDEC_STRIDE; | 306 | 400k | i00 += c; | 307 | 400k | } | 308 | 50.0k | } |
Unexecuted instantiation: x264_10_predict_8x8c_p_c |
309 | | |
310 | | /**************************************************************************** |
311 | | * 8x16 prediction for intra chroma block (4:2:2) |
312 | | ****************************************************************************/ |
313 | | |
314 | | static void predict_8x16c_dc_128_c( pixel *src ) |
315 | 0 | { |
316 | 0 | for( int y = 0; y < 16; y++ ) |
317 | 0 | { |
318 | 0 | MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
319 | 0 | MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ); |
320 | 0 | src += FDEC_STRIDE; |
321 | 0 | } |
322 | 0 | } |
323 | | static void predict_8x16c_dc_left_c( pixel *src ) |
324 | 0 | { |
325 | 0 | for( int i = 0; i < 4; i++ ) |
326 | 0 | { |
327 | 0 | int dc = 0; |
328 | |
|
329 | 0 | for( int y = 0; y < 4; y++ ) |
330 | 0 | dc += src[y*FDEC_STRIDE - 1]; |
331 | |
|
332 | 0 | pixel4 dcsplat = PIXEL_SPLAT_X4( (dc + 2) >> 2 ); |
333 | |
|
334 | 0 | for( int y = 0; y < 4; y++ ) |
335 | 0 | { |
336 | 0 | MPIXEL_X4( src+0 ) = dcsplat; |
337 | 0 | MPIXEL_X4( src+4 ) = dcsplat; |
338 | 0 | src += FDEC_STRIDE; |
339 | 0 | } |
340 | 0 | } |
341 | 0 | } |
342 | | static void predict_8x16c_dc_top_c( pixel *src ) |
343 | 0 | { |
344 | 0 | int dc0 = 0, dc1 = 0; |
345 | |
|
346 | 0 | for( int x = 0; x < 4; x++ ) |
347 | 0 | { |
348 | 0 | dc0 += src[x - FDEC_STRIDE]; |
349 | 0 | dc1 += src[x + 4 - FDEC_STRIDE]; |
350 | 0 | } |
351 | 0 | pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 ); |
352 | 0 | pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 ); |
353 | |
|
354 | 0 | for( int y = 0; y < 16; y++ ) |
355 | 0 | { |
356 | 0 | MPIXEL_X4( src+0 ) = dc0splat; |
357 | 0 | MPIXEL_X4( src+4 ) = dc1splat; |
358 | 0 | src += FDEC_STRIDE; |
359 | 0 | } |
360 | 0 | } |
361 | | void x264_predict_8x16c_dc_c( pixel *src ) |
362 | 0 | { |
363 | 0 | int s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, s5 = 0; |
364 | | |
365 | | /* |
366 | | s0 s1 |
367 | | s2 |
368 | | s3 |
369 | | s4 |
370 | | s5 |
371 | | */ |
372 | 0 | for( int i = 0; i < 4; i++ ) |
373 | 0 | { |
374 | 0 | s0 += src[i+0 - FDEC_STRIDE]; |
375 | 0 | s1 += src[i+4 - FDEC_STRIDE]; |
376 | 0 | s2 += src[-1 + (i+0) * FDEC_STRIDE]; |
377 | 0 | s3 += src[-1 + (i+4) * FDEC_STRIDE]; |
378 | 0 | s4 += src[-1 + (i+8) * FDEC_STRIDE]; |
379 | 0 | s5 += src[-1 + (i+12) * FDEC_STRIDE]; |
380 | 0 | } |
381 | | /* |
382 | | dc0 dc1 |
383 | | dc2 dc3 |
384 | | dc4 dc5 |
385 | | dc6 dc7 |
386 | | */ |
387 | 0 | pixel4 dc0 = PIXEL_SPLAT_X4( ( s0 + s2 + 4 ) >> 3 ); |
388 | 0 | pixel4 dc1 = PIXEL_SPLAT_X4( ( s1 + 2 ) >> 2 ); |
389 | 0 | pixel4 dc2 = PIXEL_SPLAT_X4( ( s3 + 2 ) >> 2 ); |
390 | 0 | pixel4 dc3 = PIXEL_SPLAT_X4( ( s1 + s3 + 4 ) >> 3 ); |
391 | 0 | pixel4 dc4 = PIXEL_SPLAT_X4( ( s4 + 2 ) >> 2 ); |
392 | 0 | pixel4 dc5 = PIXEL_SPLAT_X4( ( s1 + s4 + 4 ) >> 3 ); |
393 | 0 | pixel4 dc6 = PIXEL_SPLAT_X4( ( s5 + 2 ) >> 2 ); |
394 | 0 | pixel4 dc7 = PIXEL_SPLAT_X4( ( s1 + s5 + 4 ) >> 3 ); |
395 | |
|
396 | 0 | for( int y = 0; y < 4; y++ ) |
397 | 0 | { |
398 | 0 | MPIXEL_X4( src+0 ) = dc0; |
399 | 0 | MPIXEL_X4( src+4 ) = dc1; |
400 | 0 | src += FDEC_STRIDE; |
401 | 0 | } |
402 | 0 | for( int y = 0; y < 4; y++ ) |
403 | 0 | { |
404 | 0 | MPIXEL_X4( src+0 ) = dc2; |
405 | 0 | MPIXEL_X4( src+4 ) = dc3; |
406 | 0 | src += FDEC_STRIDE; |
407 | 0 | } |
408 | 0 | for( int y = 0; y < 4; y++ ) |
409 | 0 | { |
410 | 0 | MPIXEL_X4( src+0 ) = dc4; |
411 | 0 | MPIXEL_X4( src+4 ) = dc5; |
412 | 0 | src += FDEC_STRIDE; |
413 | 0 | } |
414 | 0 | for( int y = 0; y < 4; y++ ) |
415 | 0 | { |
416 | 0 | MPIXEL_X4( src+0 ) = dc6; |
417 | 0 | MPIXEL_X4( src+4 ) = dc7; |
418 | 0 | src += FDEC_STRIDE; |
419 | 0 | } |
420 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_dc_c Unexecuted instantiation: x264_10_predict_8x16c_dc_c |
421 | | void x264_predict_8x16c_h_c( pixel *src ) |
422 | 0 | { |
423 | 0 | for( int i = 0; i < 16; i++ ) |
424 | 0 | { |
425 | 0 | pixel4 v = PIXEL_SPLAT_X4( src[-1] ); |
426 | 0 | MPIXEL_X4( src+0 ) = v; |
427 | 0 | MPIXEL_X4( src+4 ) = v; |
428 | 0 | src += FDEC_STRIDE; |
429 | 0 | } |
430 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_h_c Unexecuted instantiation: x264_10_predict_8x16c_h_c |
431 | | void x264_predict_8x16c_v_c( pixel *src ) |
432 | 0 | { |
433 | 0 | pixel4 v0 = MPIXEL_X4( src+0-FDEC_STRIDE ); |
434 | 0 | pixel4 v1 = MPIXEL_X4( src+4-FDEC_STRIDE ); |
435 | |
|
436 | 0 | for( int i = 0; i < 16; i++ ) |
437 | 0 | { |
438 | 0 | MPIXEL_X4( src+0 ) = v0; |
439 | 0 | MPIXEL_X4( src+4 ) = v1; |
440 | 0 | src += FDEC_STRIDE; |
441 | 0 | } |
442 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_v_c Unexecuted instantiation: x264_10_predict_8x16c_v_c |
443 | | void x264_predict_8x16c_p_c( pixel *src ) |
444 | 0 | { |
445 | 0 | int H = 0; |
446 | 0 | int V = 0; |
447 | |
|
448 | 0 | for( int i = 0; i < 4; i++ ) |
449 | 0 | H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] ); |
450 | 0 | for( int i = 0; i < 8; i++ ) |
451 | 0 | V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] ); |
452 | |
|
453 | 0 | int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] ); |
454 | 0 | int b = ( 17 * H + 16 ) >> 5; |
455 | 0 | int c = ( 5 * V + 32 ) >> 6; |
456 | 0 | int i00 = a -3*b -7*c + 16; |
457 | |
|
458 | 0 | for( int y = 0; y < 16; y++ ) |
459 | 0 | { |
460 | 0 | int pix = i00; |
461 | 0 | for( int x = 0; x < 8; x++ ) |
462 | 0 | { |
463 | 0 | src[x] = x264_clip_pixel( pix>>5 ); |
464 | 0 | pix += b; |
465 | 0 | } |
466 | 0 | src += FDEC_STRIDE; |
467 | 0 | i00 += c; |
468 | 0 | } |
469 | 0 | } Unexecuted instantiation: x264_8_predict_8x16c_p_c Unexecuted instantiation: x264_10_predict_8x16c_p_c |
470 | | |
471 | | /**************************************************************************** |
472 | | * 4x4 prediction for intra luma block |
473 | | ****************************************************************************/ |
474 | | |
475 | 4.03M | #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE] |
476 | 686k | #define SRC_X4(x,y) MPIXEL_X4( &SRC(x,y) ) |
477 | | |
478 | | #define PREDICT_4x4_DC(v)\ |
479 | 42.2k | SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v; |
480 | | |
481 | | static void predict_4x4_dc_128_c( pixel *src ) |
482 | 497 | { |
483 | 497 | PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
484 | 497 | } |
485 | | static void predict_4x4_dc_left_c( pixel *src ) |
486 | 3.19k | { |
487 | 3.19k | pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 ); |
488 | 3.19k | PREDICT_4x4_DC( dc ); |
489 | 3.19k | } |
490 | | static void predict_4x4_dc_top_c( pixel *src ) |
491 | 3.21k | { |
492 | 3.21k | pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 ); |
493 | 3.21k | PREDICT_4x4_DC( dc ); |
494 | 3.21k | } |
495 | | void x264_predict_4x4_dc_c( pixel *src ) |
496 | 22.1k | { |
497 | 22.1k | pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + |
498 | 22.1k | SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3 ); |
499 | 22.1k | PREDICT_4x4_DC( dc ); |
500 | 22.1k | } Line | Count | Source | 496 | 22.1k | { | 497 | 22.1k | pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + | 498 | 22.1k | SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 4) >> 3 ); | 499 | 22.1k | PREDICT_4x4_DC( dc ); | 500 | 22.1k | } |
Unexecuted instantiation: x264_10_predict_4x4_dc_c |
501 | | void x264_predict_4x4_h_c( pixel *src ) |
502 | 13.1k | { |
503 | 13.1k | SRC_X4(0,0) = PIXEL_SPLAT_X4( SRC(-1,0) ); |
504 | 13.1k | SRC_X4(0,1) = PIXEL_SPLAT_X4( SRC(-1,1) ); |
505 | 13.1k | SRC_X4(0,2) = PIXEL_SPLAT_X4( SRC(-1,2) ); |
506 | 13.1k | SRC_X4(0,3) = PIXEL_SPLAT_X4( SRC(-1,3) ); |
507 | 13.1k | } Line | Count | Source | 502 | 13.1k | { | 503 | 13.1k | SRC_X4(0,0) = PIXEL_SPLAT_X4( SRC(-1,0) ); | 504 | 13.1k | SRC_X4(0,1) = PIXEL_SPLAT_X4( SRC(-1,1) ); | 505 | 13.1k | SRC_X4(0,2) = PIXEL_SPLAT_X4( SRC(-1,2) ); | 506 | 13.1k | SRC_X4(0,3) = PIXEL_SPLAT_X4( SRC(-1,3) ); | 507 | 13.1k | } |
Unexecuted instantiation: x264_10_predict_4x4_h_c |
508 | | void x264_predict_4x4_v_c( pixel *src ) |
509 | 13.1k | { |
510 | 13.1k | PREDICT_4x4_DC(SRC_X4(0,-1)); |
511 | 13.1k | } Line | Count | Source | 509 | 13.1k | { | 510 | 13.1k | PREDICT_4x4_DC(SRC_X4(0,-1)); | 511 | 13.1k | } |
Unexecuted instantiation: x264_10_predict_4x4_v_c |
512 | | |
513 | | #define PREDICT_4x4_LOAD_LEFT\ |
514 | 2.07k | int l0 = SRC(-1,0);\ |
515 | 2.07k | int l1 = SRC(-1,1);\ |
516 | 2.07k | int l2 = SRC(-1,2);\ |
517 | 2.07k | UNUSED int l3 = SRC(-1,3); |
518 | | |
519 | | #define PREDICT_4x4_LOAD_TOP\ |
520 | 2.70k | int t0 = SRC(0,-1);\ |
521 | 2.70k | int t1 = SRC(1,-1);\ |
522 | 2.70k | int t2 = SRC(2,-1);\ |
523 | 2.70k | UNUSED int t3 = SRC(3,-1); |
524 | | |
525 | | #define PREDICT_4x4_LOAD_TOP_RIGHT\ |
526 | 1.27k | int t4 = SRC(4,-1);\ |
527 | 1.27k | int t5 = SRC(5,-1);\ |
528 | 1.27k | int t6 = SRC(6,-1);\ |
529 | 1.27k | UNUSED int t7 = SRC(7,-1); |
530 | | |
531 | 503k | #define F1(a,b) (((a)+(b)+1)>>1) |
532 | 1.40M | #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2) |
533 | | |
534 | | static void predict_4x4_ddl_c( pixel *src ) |
535 | 639 | { |
536 | 639 | PREDICT_4x4_LOAD_TOP |
537 | 639 | PREDICT_4x4_LOAD_TOP_RIGHT |
538 | 639 | SRC(0,0)= F2(t0,t1,t2); |
539 | 639 | SRC(1,0)=SRC(0,1)= F2(t1,t2,t3); |
540 | 639 | SRC(2,0)=SRC(1,1)=SRC(0,2)= F2(t2,t3,t4); |
541 | 639 | SRC(3,0)=SRC(2,1)=SRC(1,2)=SRC(0,3)= F2(t3,t4,t5); |
542 | 639 | SRC(3,1)=SRC(2,2)=SRC(1,3)= F2(t4,t5,t6); |
543 | 639 | SRC(3,2)=SRC(2,3)= F2(t5,t6,t7); |
544 | 639 | SRC(3,3)= F2(t6,t7,t7); |
545 | 639 | } |
546 | | static void predict_4x4_ddr_c( pixel *src ) |
547 | 477 | { |
548 | 477 | int lt = SRC(-1,-1); |
549 | 477 | PREDICT_4x4_LOAD_LEFT |
550 | 477 | PREDICT_4x4_LOAD_TOP |
551 | 477 | SRC(3,0)= F2(t3,t2,t1); |
552 | 477 | SRC(2,0)=SRC(3,1)= F2(t2,t1,t0); |
553 | 477 | SRC(1,0)=SRC(2,1)=SRC(3,2)= F2(t1,t0,lt); |
554 | 477 | SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)= F2(t0,lt,l0); |
555 | 477 | SRC(0,1)=SRC(1,2)=SRC(2,3)= F2(lt,l0,l1); |
556 | 477 | SRC(0,2)=SRC(1,3)= F2(l0,l1,l2); |
557 | 477 | SRC(0,3)= F2(l1,l2,l3); |
558 | 477 | } |
559 | | |
560 | | static void predict_4x4_vr_c( pixel *src ) |
561 | 477 | { |
562 | 477 | int lt = SRC(-1,-1); |
563 | 477 | PREDICT_4x4_LOAD_LEFT |
564 | 477 | PREDICT_4x4_LOAD_TOP |
565 | 477 | SRC(0,3)= F2(l2,l1,l0); |
566 | 477 | SRC(0,2)= F2(l1,l0,lt); |
567 | 477 | SRC(0,1)=SRC(1,3)= F2(l0,lt,t0); |
568 | 477 | SRC(0,0)=SRC(1,2)= F1(lt,t0); |
569 | 477 | SRC(1,1)=SRC(2,3)= F2(lt,t0,t1); |
570 | 477 | SRC(1,0)=SRC(2,2)= F1(t0,t1); |
571 | 477 | SRC(2,1)=SRC(3,3)= F2(t0,t1,t2); |
572 | 477 | SRC(2,0)=SRC(3,2)= F1(t1,t2); |
573 | 477 | SRC(3,1)= F2(t1,t2,t3); |
574 | 477 | SRC(3,0)= F1(t2,t3); |
575 | 477 | } |
576 | | |
577 | | static void predict_4x4_hd_c( pixel *src ) |
578 | 477 | { |
579 | 477 | int lt= SRC(-1,-1); |
580 | 477 | PREDICT_4x4_LOAD_LEFT |
581 | 477 | PREDICT_4x4_LOAD_TOP |
582 | 477 | SRC(0,3)= F1(l2,l3); |
583 | 477 | SRC(1,3)= F2(l1,l2,l3); |
584 | 477 | SRC(0,2)=SRC(2,3)= F1(l1,l2); |
585 | 477 | SRC(1,2)=SRC(3,3)= F2(l0,l1,l2); |
586 | 477 | SRC(0,1)=SRC(2,2)= F1(l0,l1); |
587 | 477 | SRC(1,1)=SRC(3,2)= F2(lt,l0,l1); |
588 | 477 | SRC(0,0)=SRC(2,1)= F1(lt,l0); |
589 | 477 | SRC(1,0)=SRC(3,1)= F2(t0,lt,l0); |
590 | 477 | SRC(2,0)= F2(t1,t0,lt); |
591 | 477 | SRC(3,0)= F2(t2,t1,t0); |
592 | 477 | } |
593 | | |
594 | | static void predict_4x4_vl_c( pixel *src ) |
595 | 639 | { |
596 | 639 | PREDICT_4x4_LOAD_TOP |
597 | 639 | PREDICT_4x4_LOAD_TOP_RIGHT |
598 | 639 | SRC(0,0)= F1(t0,t1); |
599 | 639 | SRC(0,1)= F2(t0,t1,t2); |
600 | 639 | SRC(1,0)=SRC(0,2)= F1(t1,t2); |
601 | 639 | SRC(1,1)=SRC(0,3)= F2(t1,t2,t3); |
602 | 639 | SRC(2,0)=SRC(1,2)= F1(t2,t3); |
603 | 639 | SRC(2,1)=SRC(1,3)= F2(t2,t3,t4); |
604 | 639 | SRC(3,0)=SRC(2,2)= F1(t3,t4); |
605 | 639 | SRC(3,1)=SRC(2,3)= F2(t3,t4,t5); |
606 | 639 | SRC(3,2)= F1(t4,t5); |
607 | 639 | SRC(3,3)= F2(t4,t5,t6); |
608 | 639 | } |
609 | | |
610 | | static void predict_4x4_hu_c( pixel *src ) |
611 | 639 | { |
612 | 639 | PREDICT_4x4_LOAD_LEFT |
613 | 639 | SRC(0,0)= F1(l0,l1); |
614 | 639 | SRC(1,0)= F2(l0,l1,l2); |
615 | 639 | SRC(2,0)=SRC(0,1)= F1(l1,l2); |
616 | 639 | SRC(3,0)=SRC(1,1)= F2(l1,l2,l3); |
617 | 639 | SRC(2,1)=SRC(0,2)= F1(l2,l3); |
618 | 639 | SRC(3,1)=SRC(1,2)= F2(l2,l3,l3); |
619 | 639 | SRC(3,2)=SRC(1,3)=SRC(0,3)= |
620 | 639 | SRC(2,2)=SRC(2,3)=SRC(3,3)= l3; |
621 | 639 | } |
622 | | |
623 | | /**************************************************************************** |
624 | | * 8x8 prediction for intra luma block |
625 | | ****************************************************************************/ |
626 | | |
627 | | #define PL(y) \ |
628 | 87.3k | edge[14-y] = F2(SRC(-1,y-1), SRC(-1,y), SRC(-1,y+1)); |
629 | | #define PT(x) \ |
630 | 189k | edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1)); |
631 | | |
632 | | static void predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters ) |
633 | 14.5k | { |
634 | | /* edge[7..14] = l7..l0 |
635 | | * edge[15] = lt |
636 | | * edge[16..31] = t0 .. t15 |
637 | | * edge[32] = t15 */ |
638 | | |
639 | 14.5k | int have_lt = i_neighbor & MB_TOPLEFT; |
640 | 14.5k | if( i_filters & MB_LEFT ) |
641 | 14.5k | { |
642 | 14.5k | edge[15] = (SRC(0,-1) + 2*SRC(-1,-1) + SRC(-1,0) + 2) >> 2; |
643 | 14.5k | edge[14] = ((have_lt ? SRC(-1,-1) : SRC(-1,0)) |
644 | 14.5k | + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; |
645 | 14.5k | PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) |
646 | 14.5k | edge[6] = |
647 | 14.5k | edge[7] = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2; |
648 | 14.5k | } |
649 | | |
650 | 14.5k | if( i_filters & MB_TOP ) |
651 | 14.5k | { |
652 | 14.5k | int have_tr = i_neighbor & MB_TOPRIGHT; |
653 | 14.5k | edge[16] = ((have_lt ? SRC(-1,-1) : SRC(0,-1)) |
654 | 14.5k | + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; |
655 | 14.5k | PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) |
656 | 14.5k | edge[23] = (SRC(6,-1) + 2*SRC(7,-1) |
657 | 14.5k | + (have_tr ? SRC(8,-1) : SRC(7,-1)) + 2) >> 2; |
658 | | |
659 | 14.5k | if( i_filters & MB_TOPRIGHT ) |
660 | 14.5k | { |
661 | 14.5k | if( have_tr ) |
662 | 14.5k | { |
663 | 14.5k | PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) |
664 | 14.5k | edge[31] = |
665 | 14.5k | edge[32] = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; |
666 | 14.5k | } |
667 | 0 | else |
668 | 0 | { |
669 | 0 | MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) ); |
670 | 0 | MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) ); |
671 | 0 | edge[32] = SRC(7,-1); |
672 | 0 | } |
673 | 14.5k | } |
674 | 14.5k | } |
675 | 14.5k | } |
676 | | |
677 | | #undef PL |
678 | | #undef PT |
679 | | |
680 | | #define PL(y) \ |
681 | 465k | UNUSED int l##y = edge[14-y]; |
682 | | #define PT(x) \ |
683 | 814k | UNUSED int t##x = edge[16+x]; |
684 | | #define PREDICT_8x8_LOAD_TOPLEFT \ |
685 | 43.6k | int lt = edge[15]; |
686 | | #define PREDICT_8x8_LOAD_LEFT \ |
687 | 58.2k | PL(0) PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) PL(7) |
688 | | #define PREDICT_8x8_LOAD_TOP \ |
689 | 72.7k | PT(0) PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) PT(7) |
690 | | #define PREDICT_8x8_LOAD_TOPRIGHT \ |
691 | 29.1k | PT(8) PT(9) PT(10) PT(11) PT(12) PT(13) PT(14) PT(15) |
692 | | |
693 | | #define PREDICT_8x8_DC(v) \ |
694 | 0 | for( int y = 0; y < 8; y++ ) { \ |
695 | 0 | MPIXEL_X4( src+0 ) = v; \ |
696 | 0 | MPIXEL_X4( src+4 ) = v; \ |
697 | 0 | src += FDEC_STRIDE; \ |
698 | 0 | } |
699 | | |
700 | | static void predict_8x8_dc_128_c( pixel *src, pixel edge[36] ) |
701 | 0 | { |
702 | 0 | PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); |
703 | 0 | } |
704 | | static void predict_8x8_dc_left_c( pixel *src, pixel edge[36] ) |
705 | 0 | { |
706 | 0 | PREDICT_8x8_LOAD_LEFT |
707 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 ); |
708 | 0 | PREDICT_8x8_DC( dc ); |
709 | 0 | } |
710 | | static void predict_8x8_dc_top_c( pixel *src, pixel edge[36] ) |
711 | 0 | { |
712 | 0 | PREDICT_8x8_LOAD_TOP |
713 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 ); |
714 | 0 | PREDICT_8x8_DC( dc ); |
715 | 0 | } |
716 | | void x264_predict_8x8_dc_c( pixel *src, pixel edge[36] ) |
717 | 0 | { |
718 | 0 | PREDICT_8x8_LOAD_LEFT |
719 | 0 | PREDICT_8x8_LOAD_TOP |
720 | 0 | pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4 ); |
721 | 0 | PREDICT_8x8_DC( dc ); |
722 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_dc_c Unexecuted instantiation: x264_10_predict_8x8_dc_c |
723 | | void x264_predict_8x8_h_c( pixel *src, pixel edge[36] ) |
724 | 0 | { |
725 | 0 | PREDICT_8x8_LOAD_LEFT |
726 | 0 | #define ROW(y) MPIXEL_X4( src+y*FDEC_STRIDE+0 ) =\ |
727 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = PIXEL_SPLAT_X4( l##y ); |
728 | 0 | ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); |
729 | 0 | #undef ROW |
730 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_h_c Unexecuted instantiation: x264_10_predict_8x8_h_c |
731 | | void x264_predict_8x8_v_c( pixel *src, pixel edge[36] ) |
732 | 0 | { |
733 | 0 | pixel4 top[2] = { MPIXEL_X4( edge+16 ), |
734 | 0 | MPIXEL_X4( edge+20 ) }; |
735 | 0 | for( int y = 0; y < 8; y++ ) |
736 | 0 | { |
737 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0]; |
738 | 0 | MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1]; |
739 | 0 | } |
740 | 0 | } Unexecuted instantiation: x264_8_predict_8x8_v_c Unexecuted instantiation: x264_10_predict_8x8_v_c |
741 | | static void predict_8x8_ddl_c( pixel *src, pixel edge[36] ) |
742 | 14.5k | { |
743 | 14.5k | PREDICT_8x8_LOAD_TOP |
744 | 14.5k | PREDICT_8x8_LOAD_TOPRIGHT |
745 | 14.5k | SRC(0,0)= F2(t0,t1,t2); |
746 | 14.5k | SRC(0,1)=SRC(1,0)= F2(t1,t2,t3); |
747 | 14.5k | SRC(0,2)=SRC(1,1)=SRC(2,0)= F2(t2,t3,t4); |
748 | 14.5k | SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= F2(t3,t4,t5); |
749 | 14.5k | SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= F2(t4,t5,t6); |
750 | 14.5k | SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= F2(t5,t6,t7); |
751 | 14.5k | SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= F2(t6,t7,t8); |
752 | 14.5k | SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= F2(t7,t8,t9); |
753 | 14.5k | SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= F2(t8,t9,t10); |
754 | 14.5k | SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= F2(t9,t10,t11); |
755 | 14.5k | SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= F2(t10,t11,t12); |
756 | 14.5k | SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= F2(t11,t12,t13); |
757 | 14.5k | SRC(5,7)=SRC(6,6)=SRC(7,5)= F2(t12,t13,t14); |
758 | 14.5k | SRC(6,7)=SRC(7,6)= F2(t13,t14,t15); |
759 | 14.5k | SRC(7,7)= F2(t14,t15,t15); |
760 | 14.5k | } |
761 | | static void predict_8x8_ddr_c( pixel *src, pixel edge[36] ) |
762 | 14.5k | { |
763 | 14.5k | PREDICT_8x8_LOAD_TOP |
764 | 14.5k | PREDICT_8x8_LOAD_LEFT |
765 | 14.5k | PREDICT_8x8_LOAD_TOPLEFT |
766 | 14.5k | SRC(0,7)= F2(l7,l6,l5); |
767 | 14.5k | SRC(0,6)=SRC(1,7)= F2(l6,l5,l4); |
768 | 14.5k | SRC(0,5)=SRC(1,6)=SRC(2,7)= F2(l5,l4,l3); |
769 | 14.5k | SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= F2(l4,l3,l2); |
770 | 14.5k | SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= F2(l3,l2,l1); |
771 | 14.5k | SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= F2(l2,l1,l0); |
772 | 14.5k | SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= F2(l1,l0,lt); |
773 | 14.5k | SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= F2(l0,lt,t0); |
774 | 14.5k | SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= F2(lt,t0,t1); |
775 | 14.5k | SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= F2(t0,t1,t2); |
776 | 14.5k | SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= F2(t1,t2,t3); |
777 | 14.5k | SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= F2(t2,t3,t4); |
778 | 14.5k | SRC(5,0)=SRC(6,1)=SRC(7,2)= F2(t3,t4,t5); |
779 | 14.5k | SRC(6,0)=SRC(7,1)= F2(t4,t5,t6); |
780 | 14.5k | SRC(7,0)= F2(t5,t6,t7); |
781 | | |
782 | 14.5k | } |
783 | | static void predict_8x8_vr_c( pixel *src, pixel edge[36] ) |
784 | 14.5k | { |
785 | 14.5k | PREDICT_8x8_LOAD_TOP |
786 | 14.5k | PREDICT_8x8_LOAD_LEFT |
787 | 14.5k | PREDICT_8x8_LOAD_TOPLEFT |
788 | 14.5k | SRC(0,6)= F2(l5,l4,l3); |
789 | 14.5k | SRC(0,7)= F2(l6,l5,l4); |
790 | 14.5k | SRC(0,4)=SRC(1,6)= F2(l3,l2,l1); |
791 | 14.5k | SRC(0,5)=SRC(1,7)= F2(l4,l3,l2); |
792 | 14.5k | SRC(0,2)=SRC(1,4)=SRC(2,6)= F2(l1,l0,lt); |
793 | 14.5k | SRC(0,3)=SRC(1,5)=SRC(2,7)= F2(l2,l1,l0); |
794 | 14.5k | SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= F2(l0,lt,t0); |
795 | 14.5k | SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= F1(lt,t0); |
796 | 14.5k | SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= F2(lt,t0,t1); |
797 | 14.5k | SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= F1(t0,t1); |
798 | 14.5k | SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= F2(t0,t1,t2); |
799 | 14.5k | SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= F1(t1,t2); |
800 | 14.5k | SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= F2(t1,t2,t3); |
801 | 14.5k | SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= F1(t2,t3); |
802 | 14.5k | SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= F2(t2,t3,t4); |
803 | 14.5k | SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= F1(t3,t4); |
804 | 14.5k | SRC(5,1)=SRC(6,3)=SRC(7,5)= F2(t3,t4,t5); |
805 | 14.5k | SRC(5,0)=SRC(6,2)=SRC(7,4)= F1(t4,t5); |
806 | 14.5k | SRC(6,1)=SRC(7,3)= F2(t4,t5,t6); |
807 | 14.5k | SRC(6,0)=SRC(7,2)= F1(t5,t6); |
808 | 14.5k | SRC(7,1)= F2(t5,t6,t7); |
809 | 14.5k | SRC(7,0)= F1(t6,t7); |
810 | 14.5k | } |
811 | | static void predict_8x8_hd_c( pixel *src, pixel edge[36] ) |
812 | 14.5k | { |
813 | 14.5k | PREDICT_8x8_LOAD_TOP |
814 | 14.5k | PREDICT_8x8_LOAD_LEFT |
815 | 14.5k | PREDICT_8x8_LOAD_TOPLEFT |
816 | 14.5k | int p1 = pack_pixel_1to2(F1(l6,l7), F2(l5,l6,l7)); |
817 | 14.5k | int p2 = pack_pixel_1to2(F1(l5,l6), F2(l4,l5,l6)); |
818 | 14.5k | int p3 = pack_pixel_1to2(F1(l4,l5), F2(l3,l4,l5)); |
819 | 14.5k | int p4 = pack_pixel_1to2(F1(l3,l4), F2(l2,l3,l4)); |
820 | 14.5k | int p5 = pack_pixel_1to2(F1(l2,l3), F2(l1,l2,l3)); |
821 | 14.5k | int p6 = pack_pixel_1to2(F1(l1,l2), F2(l0,l1,l2)); |
822 | 14.5k | int p7 = pack_pixel_1to2(F1(l0,l1), F2(lt,l0,l1)); |
823 | 14.5k | int p8 = pack_pixel_1to2(F1(lt,l0), F2(l0,lt,t0)); |
824 | 14.5k | int p9 = pack_pixel_1to2(F2(t1,t0,lt), F2(t2,t1,t0)); |
825 | 14.5k | int p10 = pack_pixel_1to2(F2(t3,t2,t1), F2(t4,t3,t2)); |
826 | 14.5k | int p11 = pack_pixel_1to2(F2(t5,t4,t3), F2(t6,t5,t4)); |
827 | 14.5k | SRC_X4(0,7)= pack_pixel_2to4(p1,p2); |
828 | 14.5k | SRC_X4(0,6)= pack_pixel_2to4(p2,p3); |
829 | 14.5k | SRC_X4(4,7)=SRC_X4(0,5)= pack_pixel_2to4(p3,p4); |
830 | 14.5k | SRC_X4(4,6)=SRC_X4(0,4)= pack_pixel_2to4(p4,p5); |
831 | 14.5k | SRC_X4(4,5)=SRC_X4(0,3)= pack_pixel_2to4(p5,p6); |
832 | 14.5k | SRC_X4(4,4)=SRC_X4(0,2)= pack_pixel_2to4(p6,p7); |
833 | 14.5k | SRC_X4(4,3)=SRC_X4(0,1)= pack_pixel_2to4(p7,p8); |
834 | 14.5k | SRC_X4(4,2)=SRC_X4(0,0)= pack_pixel_2to4(p8,p9); |
835 | 14.5k | SRC_X4(4,1)= pack_pixel_2to4(p9,p10); |
836 | 14.5k | SRC_X4(4,0)= pack_pixel_2to4(p10,p11); |
837 | 14.5k | } |
838 | | static void predict_8x8_vl_c( pixel *src, pixel edge[36] ) |
839 | 14.5k | { |
840 | 14.5k | PREDICT_8x8_LOAD_TOP |
841 | 14.5k | PREDICT_8x8_LOAD_TOPRIGHT |
842 | 14.5k | SRC(0,0)= F1(t0,t1); |
843 | 14.5k | SRC(0,1)= F2(t0,t1,t2); |
844 | 14.5k | SRC(0,2)=SRC(1,0)= F1(t1,t2); |
845 | 14.5k | SRC(0,3)=SRC(1,1)= F2(t1,t2,t3); |
846 | 14.5k | SRC(0,4)=SRC(1,2)=SRC(2,0)= F1(t2,t3); |
847 | 14.5k | SRC(0,5)=SRC(1,3)=SRC(2,1)= F2(t2,t3,t4); |
848 | 14.5k | SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= F1(t3,t4); |
849 | 14.5k | SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= F2(t3,t4,t5); |
850 | 14.5k | SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= F1(t4,t5); |
851 | 14.5k | SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= F2(t4,t5,t6); |
852 | 14.5k | SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= F1(t5,t6); |
853 | 14.5k | SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= F2(t5,t6,t7); |
854 | 14.5k | SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= F1(t6,t7); |
855 | 14.5k | SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= F2(t6,t7,t8); |
856 | 14.5k | SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= F1(t7,t8); |
857 | 14.5k | SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= F2(t7,t8,t9); |
858 | 14.5k | SRC(5,6)=SRC(6,4)=SRC(7,2)= F1(t8,t9); |
859 | 14.5k | SRC(5,7)=SRC(6,5)=SRC(7,3)= F2(t8,t9,t10); |
860 | 14.5k | SRC(6,6)=SRC(7,4)= F1(t9,t10); |
861 | 14.5k | SRC(6,7)=SRC(7,5)= F2(t9,t10,t11); |
862 | 14.5k | SRC(7,6)= F1(t10,t11); |
863 | 14.5k | SRC(7,7)= F2(t10,t11,t12); |
864 | 14.5k | } |
865 | | static void predict_8x8_hu_c( pixel *src, pixel edge[36] ) |
866 | 14.5k | { |
867 | 14.5k | PREDICT_8x8_LOAD_LEFT |
868 | 14.5k | int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2)); |
869 | 14.5k | int p2 = pack_pixel_1to2(F1(l1,l2), F2(l1,l2,l3)); |
870 | 14.5k | int p3 = pack_pixel_1to2(F1(l2,l3), F2(l2,l3,l4)); |
871 | 14.5k | int p4 = pack_pixel_1to2(F1(l3,l4), F2(l3,l4,l5)); |
872 | 14.5k | int p5 = pack_pixel_1to2(F1(l4,l5), F2(l4,l5,l6)); |
873 | 14.5k | int p6 = pack_pixel_1to2(F1(l5,l6), F2(l5,l6,l7)); |
874 | 14.5k | int p7 = pack_pixel_1to2(F1(l6,l7), F2(l6,l7,l7)); |
875 | 14.5k | int p8 = pack_pixel_1to2(l7,l7); |
876 | 14.5k | SRC_X4(0,0)= pack_pixel_2to4(p1,p2); |
877 | 14.5k | SRC_X4(0,1)= pack_pixel_2to4(p2,p3); |
878 | 14.5k | SRC_X4(4,0)=SRC_X4(0,2)= pack_pixel_2to4(p3,p4); |
879 | 14.5k | SRC_X4(4,1)=SRC_X4(0,3)= pack_pixel_2to4(p4,p5); |
880 | 14.5k | SRC_X4(4,2)=SRC_X4(0,4)= pack_pixel_2to4(p5,p6); |
881 | 14.5k | SRC_X4(4,3)=SRC_X4(0,5)= pack_pixel_2to4(p6,p7); |
882 | 14.5k | SRC_X4(4,4)=SRC_X4(0,6)= pack_pixel_2to4(p7,p8); |
883 | 14.5k | SRC_X4(4,5)=SRC_X4(4,6)= SRC_X4(0,7) = SRC_X4(4,7) = pack_pixel_2to4(p8,p8); |
884 | 14.5k | } |
885 | | |
886 | | /**************************************************************************** |
887 | | * Exported functions: |
888 | | ****************************************************************************/ |
889 | | void x264_predict_16x16_init( uint32_t cpu, x264_predict_t pf[7] ) |
890 | 169 | { |
891 | 169 | pf[I_PRED_16x16_V ] = x264_predict_16x16_v_c; |
892 | 169 | pf[I_PRED_16x16_H ] = x264_predict_16x16_h_c; |
893 | 169 | pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_c; |
894 | 169 | pf[I_PRED_16x16_P ] = x264_predict_16x16_p_c; |
895 | 169 | pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left_c; |
896 | 169 | pf[I_PRED_16x16_DC_TOP ]= predict_16x16_dc_top_c; |
897 | 169 | pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128_c; |
898 | | |
899 | | #if HAVE_MMX |
900 | | x264_predict_16x16_init_mmx( cpu, pf ); |
901 | | #endif |
902 | | |
903 | | #if HAVE_ALTIVEC |
904 | | if( cpu&X264_CPU_ALTIVEC ) |
905 | | x264_predict_16x16_init_altivec( pf ); |
906 | | #endif |
907 | | |
908 | | #if HAVE_ARMV6 |
909 | | x264_predict_16x16_init_arm( cpu, pf ); |
910 | | #endif |
911 | | |
912 | | #if HAVE_AARCH64 |
913 | | x264_predict_16x16_init_aarch64( cpu, pf ); |
914 | | #endif |
915 | | |
916 | | #if !HIGH_BIT_DEPTH |
917 | | #if HAVE_MSA |
918 | | if( cpu&X264_CPU_MSA ) |
919 | | { |
920 | | pf[I_PRED_16x16_V ] = x264_intra_predict_vert_16x16_msa; |
921 | | pf[I_PRED_16x16_H ] = x264_intra_predict_hor_16x16_msa; |
922 | | pf[I_PRED_16x16_DC] = x264_intra_predict_dc_16x16_msa; |
923 | | pf[I_PRED_16x16_P ] = x264_intra_predict_plane_16x16_msa; |
924 | | pf[I_PRED_16x16_DC_LEFT]= x264_intra_predict_dc_left_16x16_msa; |
925 | | pf[I_PRED_16x16_DC_TOP ]= x264_intra_predict_dc_top_16x16_msa; |
926 | | pf[I_PRED_16x16_DC_128 ]= x264_intra_predict_dc_128_16x16_msa; |
927 | | } |
928 | | #endif |
929 | | #endif |
930 | | |
931 | | #if HAVE_LSX |
932 | | x264_predict_16x16_init_loongarch( cpu, pf ); |
933 | | #endif |
934 | 169 | } x264_8_predict_16x16_init Line | Count | Source | 890 | 169 | { | 891 | 169 | pf[I_PRED_16x16_V ] = x264_predict_16x16_v_c; | 892 | 169 | pf[I_PRED_16x16_H ] = x264_predict_16x16_h_c; | 893 | 169 | pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_c; | 894 | 169 | pf[I_PRED_16x16_P ] = x264_predict_16x16_p_c; | 895 | 169 | pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left_c; | 896 | 169 | pf[I_PRED_16x16_DC_TOP ]= predict_16x16_dc_top_c; | 897 | 169 | pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128_c; | 898 | | | 899 | | #if HAVE_MMX | 900 | | x264_predict_16x16_init_mmx( cpu, pf ); | 901 | | #endif | 902 | | | 903 | | #if HAVE_ALTIVEC | 904 | | if( cpu&X264_CPU_ALTIVEC ) | 905 | | x264_predict_16x16_init_altivec( pf ); | 906 | | #endif | 907 | | | 908 | | #if HAVE_ARMV6 | 909 | | x264_predict_16x16_init_arm( cpu, pf ); | 910 | | #endif | 911 | | | 912 | | #if HAVE_AARCH64 | 913 | | x264_predict_16x16_init_aarch64( cpu, pf ); | 914 | | #endif | 915 | | | 916 | 169 | #if !HIGH_BIT_DEPTH | 917 | | #if HAVE_MSA | 918 | | if( cpu&X264_CPU_MSA ) | 919 | | { | 920 | | pf[I_PRED_16x16_V ] = x264_intra_predict_vert_16x16_msa; | 921 | | pf[I_PRED_16x16_H ] = x264_intra_predict_hor_16x16_msa; | 922 | | pf[I_PRED_16x16_DC] = x264_intra_predict_dc_16x16_msa; | 923 | | pf[I_PRED_16x16_P ] = x264_intra_predict_plane_16x16_msa; | 924 | | pf[I_PRED_16x16_DC_LEFT]= x264_intra_predict_dc_left_16x16_msa; | 925 | | pf[I_PRED_16x16_DC_TOP ]= x264_intra_predict_dc_top_16x16_msa; | 926 | | pf[I_PRED_16x16_DC_128 ]= x264_intra_predict_dc_128_16x16_msa; | 927 | | } | 928 | | #endif | 929 | 169 | #endif | 930 | | | 931 | | #if HAVE_LSX | 932 | | x264_predict_16x16_init_loongarch( cpu, pf ); | 933 | | #endif | 934 | 169 | } |
Unexecuted instantiation: x264_10_predict_16x16_init |
935 | | |
936 | | void x264_predict_8x8c_init( uint32_t cpu, x264_predict_t pf[7] ) |
937 | 169 | { |
938 | 169 | pf[I_PRED_CHROMA_V ] = x264_predict_8x8c_v_c; |
939 | 169 | pf[I_PRED_CHROMA_H ] = x264_predict_8x8c_h_c; |
940 | 169 | pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_c; |
941 | 169 | pf[I_PRED_CHROMA_P ] = x264_predict_8x8c_p_c; |
942 | 169 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left_c; |
943 | 169 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top_c; |
944 | 169 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128_c; |
945 | | |
946 | | #if HAVE_MMX |
947 | | x264_predict_8x8c_init_mmx( cpu, pf ); |
948 | | #endif |
949 | | |
950 | | #if HAVE_ALTIVEC |
951 | | if( cpu&X264_CPU_ALTIVEC ) |
952 | | x264_predict_8x8c_init_altivec( pf ); |
953 | | #endif |
954 | | |
955 | | #if HAVE_ARMV6 |
956 | | x264_predict_8x8c_init_arm( cpu, pf ); |
957 | | #endif |
958 | | |
959 | | #if HAVE_AARCH64 |
960 | | x264_predict_8x8c_init_aarch64( cpu, pf ); |
961 | | #endif |
962 | | |
963 | | #if !HIGH_BIT_DEPTH |
964 | | #if HAVE_MSA |
965 | | if( cpu&X264_CPU_MSA ) |
966 | | { |
967 | | pf[I_PRED_CHROMA_P ] = x264_intra_predict_plane_8x8_msa; |
968 | | } |
969 | | #endif |
970 | | #endif |
971 | | |
972 | | #if HAVE_LSX |
973 | | x264_predict_8x8c_init_loongarch( cpu, pf ); |
974 | | #endif |
975 | 169 | } Line | Count | Source | 937 | 169 | { | 938 | 169 | pf[I_PRED_CHROMA_V ] = x264_predict_8x8c_v_c; | 939 | 169 | pf[I_PRED_CHROMA_H ] = x264_predict_8x8c_h_c; | 940 | 169 | pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_c; | 941 | 169 | pf[I_PRED_CHROMA_P ] = x264_predict_8x8c_p_c; | 942 | 169 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left_c; | 943 | 169 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top_c; | 944 | 169 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128_c; | 945 | | | 946 | | #if HAVE_MMX | 947 | | x264_predict_8x8c_init_mmx( cpu, pf ); | 948 | | #endif | 949 | | | 950 | | #if HAVE_ALTIVEC | 951 | | if( cpu&X264_CPU_ALTIVEC ) | 952 | | x264_predict_8x8c_init_altivec( pf ); | 953 | | #endif | 954 | | | 955 | | #if HAVE_ARMV6 | 956 | | x264_predict_8x8c_init_arm( cpu, pf ); | 957 | | #endif | 958 | | | 959 | | #if HAVE_AARCH64 | 960 | | x264_predict_8x8c_init_aarch64( cpu, pf ); | 961 | | #endif | 962 | | | 963 | 169 | #if !HIGH_BIT_DEPTH | 964 | | #if HAVE_MSA | 965 | | if( cpu&X264_CPU_MSA ) | 966 | | { | 967 | | pf[I_PRED_CHROMA_P ] = x264_intra_predict_plane_8x8_msa; | 968 | | } | 969 | | #endif | 970 | 169 | #endif | 971 | | | 972 | | #if HAVE_LSX | 973 | | x264_predict_8x8c_init_loongarch( cpu, pf ); | 974 | | #endif | 975 | 169 | } |
Unexecuted instantiation: x264_10_predict_8x8c_init |
976 | | |
977 | | void x264_predict_8x16c_init( uint32_t cpu, x264_predict_t pf[7] ) |
978 | 169 | { |
979 | 169 | pf[I_PRED_CHROMA_V ] = x264_predict_8x16c_v_c; |
980 | 169 | pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_c; |
981 | 169 | pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_c; |
982 | 169 | pf[I_PRED_CHROMA_P ] = x264_predict_8x16c_p_c; |
983 | 169 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x16c_dc_left_c; |
984 | 169 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x16c_dc_top_c; |
985 | 169 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x16c_dc_128_c; |
986 | | |
987 | | #if HAVE_MMX |
988 | | x264_predict_8x16c_init_mmx( cpu, pf ); |
989 | | #endif |
990 | | |
991 | | #if HAVE_ARMV6 |
992 | | x264_predict_8x16c_init_arm( cpu, pf ); |
993 | | #endif |
994 | | |
995 | | #if HAVE_AARCH64 |
996 | | x264_predict_8x16c_init_aarch64( cpu, pf ); |
997 | | #endif |
998 | 169 | } x264_8_predict_8x16c_init Line | Count | Source | 978 | 169 | { | 979 | 169 | pf[I_PRED_CHROMA_V ] = x264_predict_8x16c_v_c; | 980 | 169 | pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_c; | 981 | 169 | pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_c; | 982 | 169 | pf[I_PRED_CHROMA_P ] = x264_predict_8x16c_p_c; | 983 | 169 | pf[I_PRED_CHROMA_DC_LEFT]= predict_8x16c_dc_left_c; | 984 | 169 | pf[I_PRED_CHROMA_DC_TOP ]= predict_8x16c_dc_top_c; | 985 | 169 | pf[I_PRED_CHROMA_DC_128 ]= predict_8x16c_dc_128_c; | 986 | | | 987 | | #if HAVE_MMX | 988 | | x264_predict_8x16c_init_mmx( cpu, pf ); | 989 | | #endif | 990 | | | 991 | | #if HAVE_ARMV6 | 992 | | x264_predict_8x16c_init_arm( cpu, pf ); | 993 | | #endif | 994 | | | 995 | | #if HAVE_AARCH64 | 996 | | x264_predict_8x16c_init_aarch64( cpu, pf ); | 997 | | #endif | 998 | 169 | } |
Unexecuted instantiation: x264_10_predict_8x16c_init |
999 | | |
1000 | | void x264_predict_8x8_init( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) |
1001 | 169 | { |
1002 | 169 | pf[I_PRED_8x8_V] = x264_predict_8x8_v_c; |
1003 | 169 | pf[I_PRED_8x8_H] = x264_predict_8x8_h_c; |
1004 | 169 | pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_c; |
1005 | 169 | pf[I_PRED_8x8_DDL] = predict_8x8_ddl_c; |
1006 | 169 | pf[I_PRED_8x8_DDR] = predict_8x8_ddr_c; |
1007 | 169 | pf[I_PRED_8x8_VR] = predict_8x8_vr_c; |
1008 | 169 | pf[I_PRED_8x8_HD] = predict_8x8_hd_c; |
1009 | 169 | pf[I_PRED_8x8_VL] = predict_8x8_vl_c; |
1010 | 169 | pf[I_PRED_8x8_HU] = predict_8x8_hu_c; |
1011 | 169 | pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_c; |
1012 | 169 | pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_c; |
1013 | 169 | pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128_c; |
1014 | 169 | *predict_filter = predict_8x8_filter_c; |
1015 | | |
1016 | | #if HAVE_MMX |
1017 | | x264_predict_8x8_init_mmx( cpu, pf, predict_filter ); |
1018 | | #endif |
1019 | | |
1020 | | #if HAVE_ARMV6 |
1021 | | x264_predict_8x8_init_arm( cpu, pf, predict_filter ); |
1022 | | #endif |
1023 | | |
1024 | | #if HAVE_AARCH64 |
1025 | | x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); |
1026 | | #endif |
1027 | | |
1028 | | #if !HIGH_BIT_DEPTH |
1029 | | #if HAVE_MSA |
1030 | | if( cpu&X264_CPU_MSA ) |
1031 | | { |
1032 | | pf[I_PRED_8x8_DDL] = x264_intra_predict_ddl_8x8_msa; |
1033 | | } |
1034 | | #endif |
1035 | | #endif |
1036 | | |
1037 | | #if HAVE_LSX |
1038 | | x264_predict_8x8_init_loongarch( cpu, pf, predict_filter ); |
1039 | | #endif |
1040 | 169 | } Line | Count | Source | 1001 | 169 | { | 1002 | 169 | pf[I_PRED_8x8_V] = x264_predict_8x8_v_c; | 1003 | 169 | pf[I_PRED_8x8_H] = x264_predict_8x8_h_c; | 1004 | 169 | pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_c; | 1005 | 169 | pf[I_PRED_8x8_DDL] = predict_8x8_ddl_c; | 1006 | 169 | pf[I_PRED_8x8_DDR] = predict_8x8_ddr_c; | 1007 | 169 | pf[I_PRED_8x8_VR] = predict_8x8_vr_c; | 1008 | 169 | pf[I_PRED_8x8_HD] = predict_8x8_hd_c; | 1009 | 169 | pf[I_PRED_8x8_VL] = predict_8x8_vl_c; | 1010 | 169 | pf[I_PRED_8x8_HU] = predict_8x8_hu_c; | 1011 | 169 | pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_c; | 1012 | 169 | pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_c; | 1013 | 169 | pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128_c; | 1014 | 169 | *predict_filter = predict_8x8_filter_c; | 1015 | | | 1016 | | #if HAVE_MMX | 1017 | | x264_predict_8x8_init_mmx( cpu, pf, predict_filter ); | 1018 | | #endif | 1019 | | | 1020 | | #if HAVE_ARMV6 | 1021 | | x264_predict_8x8_init_arm( cpu, pf, predict_filter ); | 1022 | | #endif | 1023 | | | 1024 | | #if HAVE_AARCH64 | 1025 | | x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); | 1026 | | #endif | 1027 | | | 1028 | 169 | #if !HIGH_BIT_DEPTH | 1029 | | #if HAVE_MSA | 1030 | | if( cpu&X264_CPU_MSA ) | 1031 | | { | 1032 | | pf[I_PRED_8x8_DDL] = x264_intra_predict_ddl_8x8_msa; | 1033 | | } | 1034 | | #endif | 1035 | 169 | #endif | 1036 | | | 1037 | | #if HAVE_LSX | 1038 | | x264_predict_8x8_init_loongarch( cpu, pf, predict_filter ); | 1039 | | #endif | 1040 | 169 | } |
Unexecuted instantiation: x264_10_predict_8x8_init |
1041 | | |
1042 | | void x264_predict_4x4_init( uint32_t cpu, x264_predict_t pf[12] ) |
1043 | 169 | { |
1044 | 169 | pf[I_PRED_4x4_V] = x264_predict_4x4_v_c; |
1045 | 169 | pf[I_PRED_4x4_H] = x264_predict_4x4_h_c; |
1046 | 169 | pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_c; |
1047 | 169 | pf[I_PRED_4x4_DDL] = predict_4x4_ddl_c; |
1048 | 169 | pf[I_PRED_4x4_DDR] = predict_4x4_ddr_c; |
1049 | 169 | pf[I_PRED_4x4_VR] = predict_4x4_vr_c; |
1050 | 169 | pf[I_PRED_4x4_HD] = predict_4x4_hd_c; |
1051 | 169 | pf[I_PRED_4x4_VL] = predict_4x4_vl_c; |
1052 | 169 | pf[I_PRED_4x4_HU] = predict_4x4_hu_c; |
1053 | 169 | pf[I_PRED_4x4_DC_LEFT]= predict_4x4_dc_left_c; |
1054 | 169 | pf[I_PRED_4x4_DC_TOP] = predict_4x4_dc_top_c; |
1055 | 169 | pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128_c; |
1056 | | |
1057 | | #if HAVE_MMX |
1058 | | x264_predict_4x4_init_mmx( cpu, pf ); |
1059 | | #endif |
1060 | | |
1061 | | #if HAVE_ARMV6 |
1062 | | x264_predict_4x4_init_arm( cpu, pf ); |
1063 | | #endif |
1064 | | |
1065 | | #if HAVE_AARCH64 |
1066 | | x264_predict_4x4_init_aarch64( cpu, pf ); |
1067 | | #endif |
1068 | | |
1069 | | #if HAVE_LSX |
1070 | | x264_predict_4x4_init_loongarch( cpu, pf ); |
1071 | | #endif |
1072 | 169 | } Line | Count | Source | 1043 | 169 | { | 1044 | 169 | pf[I_PRED_4x4_V] = x264_predict_4x4_v_c; | 1045 | 169 | pf[I_PRED_4x4_H] = x264_predict_4x4_h_c; | 1046 | 169 | pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_c; | 1047 | 169 | pf[I_PRED_4x4_DDL] = predict_4x4_ddl_c; | 1048 | 169 | pf[I_PRED_4x4_DDR] = predict_4x4_ddr_c; | 1049 | 169 | pf[I_PRED_4x4_VR] = predict_4x4_vr_c; | 1050 | 169 | pf[I_PRED_4x4_HD] = predict_4x4_hd_c; | 1051 | 169 | pf[I_PRED_4x4_VL] = predict_4x4_vl_c; | 1052 | 169 | pf[I_PRED_4x4_HU] = predict_4x4_hu_c; | 1053 | 169 | pf[I_PRED_4x4_DC_LEFT]= predict_4x4_dc_left_c; | 1054 | 169 | pf[I_PRED_4x4_DC_TOP] = predict_4x4_dc_top_c; | 1055 | 169 | pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128_c; | 1056 | | | 1057 | | #if HAVE_MMX | 1058 | | x264_predict_4x4_init_mmx( cpu, pf ); | 1059 | | #endif | 1060 | | | 1061 | | #if HAVE_ARMV6 | 1062 | | x264_predict_4x4_init_arm( cpu, pf ); | 1063 | | #endif | 1064 | | | 1065 | | #if HAVE_AARCH64 | 1066 | | x264_predict_4x4_init_aarch64( cpu, pf ); | 1067 | | #endif | 1068 | | | 1069 | | #if HAVE_LSX | 1070 | | x264_predict_4x4_init_loongarch( cpu, pf ); | 1071 | | #endif | 1072 | 169 | } |
Unexecuted instantiation: x264_10_predict_4x4_init |
1073 | | |