/src/ffmpeg/libavcodec/h264idct_template.c
Line | Count | Source |
1 | | /* |
2 | | * H.264 IDCT |
3 | | * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at> |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | /** |
23 | | * @file |
24 | | * H.264 IDCT. |
25 | | * @author Michael Niedermayer <michaelni@gmx.at> |
26 | | */ |
27 | | |
28 | | #include "bit_depth_template.c" |
29 | | #include "libavutil/common.h" |
30 | | #include "h264_parse.h" |
31 | | #include "h264idct.h" |
32 | | |
33 | | void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride) |
34 | 68.5M | { |
35 | 68.5M | int i; |
36 | 68.5M | pixel *dst = (pixel*)_dst; |
37 | 68.5M | dctcoef *block = (dctcoef*)_block; |
38 | 68.5M | stride >>= sizeof(pixel)-1; |
39 | | |
40 | 68.5M | block[0] += 1 << 5; |
41 | | |
42 | 342M | for(i=0; i<4; i++){ |
43 | 274M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; |
44 | 274M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; |
45 | 274M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; |
46 | 274M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); |
47 | | |
48 | 274M | block[i + 4*0]= z0 + z3; |
49 | 274M | block[i + 4*1]= z1 + z2; |
50 | 274M | block[i + 4*2]= z1 - z2; |
51 | 274M | block[i + 4*3]= z0 - z3; |
52 | 274M | } |
53 | | |
54 | 342M | for(i=0; i<4; i++){ |
55 | 274M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; |
56 | 274M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; |
57 | 274M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; |
58 | 274M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); |
59 | | |
60 | 274M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); |
61 | 274M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); |
62 | 274M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); |
63 | 274M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); |
64 | 274M | } |
65 | | |
66 | 68.5M | memset(block, 0, 16 * sizeof(dctcoef)); |
67 | 68.5M | } Line | Count | Source | 34 | 17.5M | { | 35 | 17.5M | int i; | 36 | 17.5M | pixel *dst = (pixel*)_dst; | 37 | 17.5M | dctcoef *block = (dctcoef*)_block; | 38 | 17.5M | stride >>= sizeof(pixel)-1; | 39 | | | 40 | 17.5M | block[0] += 1 << 5; | 41 | | | 42 | 87.5M | for(i=0; i<4; i++){ | 43 | 70.0M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; | 44 | 70.0M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; | 45 | 70.0M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; | 46 | 70.0M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); | 47 | | | 48 | 70.0M | block[i + 4*0]= z0 + z3; | 49 | 70.0M | block[i + 4*1]= z1 + z2; | 50 | 70.0M | block[i + 4*2]= z1 - z2; | 51 | 70.0M | block[i + 4*3]= z0 - z3; | 52 | 70.0M | } | 53 | | | 54 | 87.5M | for(i=0; i<4; i++){ | 55 | 70.0M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; | 56 | 70.0M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; | 57 | 70.0M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; | 58 | 70.0M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); | 59 | | | 60 | 70.0M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); | 61 | 70.0M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); | 62 | 70.0M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); | 63 | 70.0M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); | 64 | 70.0M | } | 65 | | | 66 | 17.5M | memset(block, 0, 16 * sizeof(dctcoef)); | 67 | 17.5M | } |
Line | Count | Source | 34 | 14.1M | { | 35 | 14.1M | int i; | 36 | 14.1M | pixel *dst = (pixel*)_dst; | 37 | 14.1M | dctcoef *block = (dctcoef*)_block; | 38 | 14.1M | stride >>= sizeof(pixel)-1; | 39 | | | 40 | 14.1M | block[0] += 1 << 5; | 41 | | | 42 | 70.9M | for(i=0; i<4; i++){ | 43 | 56.7M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; | 44 | 56.7M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; | 45 | 56.7M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; | 46 | 56.7M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); | 47 | | | 48 | 56.7M | block[i + 4*0]= z0 + z3; | 49 | 56.7M | block[i + 4*1]= z1 + z2; | 50 | 56.7M | block[i + 4*2]= z1 - z2; | 51 | 56.7M | block[i + 4*3]= z0 - z3; | 52 | 56.7M | } | 53 | | | 54 | 70.9M | for(i=0; i<4; i++){ | 55 | 56.7M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; | 56 | 56.7M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; | 57 | 56.7M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; | 58 | 56.7M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); | 59 | | | 60 | 56.7M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); | 61 | 56.7M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); | 62 | 56.7M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); | 63 | 56.7M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); | 64 | 56.7M | } | 65 | | | 66 | 14.1M | memset(block, 0, 16 * sizeof(dctcoef)); | 67 | 14.1M | } |
Line | Count | Source | 34 | 14.4M | { | 35 | 14.4M | int i; | 36 | 14.4M | pixel *dst = (pixel*)_dst; | 37 | 14.4M | dctcoef *block = (dctcoef*)_block; | 38 | 14.4M | stride >>= sizeof(pixel)-1; | 39 | | | 40 | 14.4M | block[0] += 1 << 5; | 41 | | | 42 | 72.0M | for(i=0; i<4; i++){ | 43 | 57.6M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; | 44 | 57.6M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; | 45 | 57.6M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; | 46 | 57.6M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); | 47 | | | 48 | 57.6M | block[i + 4*0]= z0 + z3; | 49 | 57.6M | block[i + 4*1]= z1 + z2; | 50 | 57.6M | block[i + 4*2]= z1 - z2; | 51 | 57.6M | block[i + 4*3]= z0 - z3; | 52 | 57.6M | } | 53 | | | 54 | 72.0M | for(i=0; i<4; i++){ | 55 | 57.6M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; | 56 | 57.6M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; | 57 | 57.6M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; | 58 | 57.6M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); | 59 | | | 60 | 57.6M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); | 61 | 57.6M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); | 62 | 57.6M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); | 63 | 57.6M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); | 64 | 57.6M | } | 65 | | | 66 | 14.4M | memset(block, 0, 16 * sizeof(dctcoef)); | 67 | 14.4M | } |
Line | Count | Source | 34 | 11.5M | { | 35 | 11.5M | int i; | 36 | 11.5M | pixel *dst = (pixel*)_dst; | 37 | 11.5M | dctcoef *block = (dctcoef*)_block; | 38 | 11.5M | stride >>= sizeof(pixel)-1; | 39 | | | 40 | 11.5M | block[0] += 1 << 5; | 41 | | | 42 | 57.9M | for(i=0; i<4; i++){ | 43 | 46.3M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; | 44 | 46.3M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; | 45 | 46.3M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; | 46 | 46.3M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); | 47 | | | 48 | 46.3M | block[i + 4*0]= z0 + z3; | 49 | 46.3M | block[i + 4*1]= z1 + z2; | 50 | 46.3M | block[i + 4*2]= z1 - z2; | 51 | 46.3M | block[i + 4*3]= z0 - z3; | 52 | 46.3M | } | 53 | | | 54 | 57.9M | for(i=0; i<4; i++){ | 55 | 46.3M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; | 56 | 46.3M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; | 57 | 46.3M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; | 58 | 46.3M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); | 59 | | | 60 | 46.3M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); | 61 | 46.3M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); | 62 | 46.3M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); | 63 | 46.3M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); | 64 | 46.3M | } | 65 | | | 66 | 11.5M | memset(block, 0, 16 * sizeof(dctcoef)); | 67 | 11.5M | } |
Line | Count | Source | 34 | 10.8M | { | 35 | 10.8M | int i; | 36 | 10.8M | pixel *dst = (pixel*)_dst; | 37 | 10.8M | dctcoef *block = (dctcoef*)_block; | 38 | 10.8M | stride >>= sizeof(pixel)-1; | 39 | | | 40 | 10.8M | block[0] += 1 << 5; | 41 | | | 42 | 54.0M | for(i=0; i<4; i++){ | 43 | 43.2M | const SUINT z0= block[i + 4*0] + (unsigned)block[i + 4*2]; | 44 | 43.2M | const SUINT z1= block[i + 4*0] - (unsigned)block[i + 4*2]; | 45 | 43.2M | const SUINT z2= (block[i + 4*1]>>1) - (unsigned)block[i + 4*3]; | 46 | 43.2M | const SUINT z3= block[i + 4*1] + (unsigned)(block[i + 4*3]>>1); | 47 | | | 48 | 43.2M | block[i + 4*0]= z0 + z3; | 49 | 43.2M | block[i + 4*1]= z1 + z2; | 50 | 43.2M | block[i + 4*2]= z1 - z2; | 51 | 43.2M | block[i + 4*3]= z0 - z3; | 52 | 43.2M | } | 53 | | | 54 | 54.0M | for(i=0; i<4; i++){ | 55 | 43.2M | const SUINT z0= block[0 + 4*i] + (SUINT)block[2 + 4*i]; | 56 | 43.2M | const SUINT z1= block[0 + 4*i] - (SUINT)block[2 + 4*i]; | 57 | 43.2M | const SUINT z2= (block[1 + 4*i]>>1) - (SUINT)block[3 + 4*i]; | 58 | 43.2M | const SUINT z3= block[1 + 4*i] + (SUINT)(block[3 + 4*i]>>1); | 59 | | | 60 | 43.2M | dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((int)(z0 + z3) >> 6)); | 61 | 43.2M | dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((int)(z1 + z2) >> 6)); | 62 | 43.2M | dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((int)(z1 - z2) >> 6)); | 63 | 43.2M | dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((int)(z0 - z3) >> 6)); | 64 | 43.2M | } | 65 | | | 66 | 10.8M | memset(block, 0, 16 * sizeof(dctcoef)); | 67 | 10.8M | } |
|
68 | | |
69 | 8.19M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ |
70 | 8.19M | int i; |
71 | 8.19M | pixel *dst = (pixel*)_dst; |
72 | 8.19M | dctcoef *block = (dctcoef*)_block; |
73 | 8.19M | stride >>= sizeof(pixel)-1; |
74 | | |
75 | 8.19M | block[0] += 32; |
76 | | |
77 | 73.7M | for( i = 0; i < 8; i++ ) |
78 | 65.5M | { |
79 | 65.5M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; |
80 | 65.5M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; |
81 | 65.5M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; |
82 | 65.5M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; |
83 | | |
84 | 65.5M | const unsigned int b0 = a0 + a6; |
85 | 65.5M | const unsigned int b2 = a2 + a4; |
86 | 65.5M | const unsigned int b4 = a2 - a4; |
87 | 65.5M | const unsigned int b6 = a0 - a6; |
88 | | |
89 | 65.5M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); |
90 | 65.5M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); |
91 | 65.5M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); |
92 | 65.5M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); |
93 | | |
94 | 65.5M | const int b1 = (a7>>2) + (unsigned)a1; |
95 | 65.5M | const int b3 = (unsigned)a3 + (a5>>2); |
96 | 65.5M | const int b5 = (a3>>2) - (unsigned)a5; |
97 | 65.5M | const int b7 = (unsigned)a7 - (a1>>2); |
98 | | |
99 | 65.5M | block[i+0*8] = b0 + b7; |
100 | 65.5M | block[i+7*8] = b0 - b7; |
101 | 65.5M | block[i+1*8] = b2 + b5; |
102 | 65.5M | block[i+6*8] = b2 - b5; |
103 | 65.5M | block[i+2*8] = b4 + b3; |
104 | 65.5M | block[i+5*8] = b4 - b3; |
105 | 65.5M | block[i+3*8] = b6 + b1; |
106 | 65.5M | block[i+4*8] = b6 - b1; |
107 | 65.5M | } |
108 | 73.7M | for( i = 0; i < 8; i++ ) |
109 | 65.5M | { |
110 | 65.5M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; |
111 | 65.5M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; |
112 | 65.5M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; |
113 | 65.5M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; |
114 | | |
115 | 65.5M | const unsigned b0 = a0 + a6; |
116 | 65.5M | const unsigned b2 = a2 + a4; |
117 | 65.5M | const unsigned b4 = a2 - a4; |
118 | 65.5M | const unsigned b6 = a0 - a6; |
119 | | |
120 | 65.5M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); |
121 | 65.5M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); |
122 | 65.5M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); |
123 | 65.5M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); |
124 | | |
125 | 65.5M | const unsigned b1 = (a7>>2) + (unsigned)a1; |
126 | 65.5M | const unsigned b3 = (unsigned)a3 + (a5>>2); |
127 | 65.5M | const unsigned b5 = (a3>>2) - (unsigned)a5; |
128 | 65.5M | const unsigned b7 = (unsigned)a7 - (a1>>2); |
129 | | |
130 | 65.5M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); |
131 | 65.5M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); |
132 | 65.5M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); |
133 | 65.5M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); |
134 | 65.5M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); |
135 | 65.5M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); |
136 | 65.5M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); |
137 | 65.5M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); |
138 | 65.5M | } |
139 | | |
140 | 8.19M | memset(block, 0, 64 * sizeof(dctcoef)); |
141 | 8.19M | } Line | Count | Source | 69 | 1.94M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ | 70 | 1.94M | int i; | 71 | 1.94M | pixel *dst = (pixel*)_dst; | 72 | 1.94M | dctcoef *block = (dctcoef*)_block; | 73 | 1.94M | stride >>= sizeof(pixel)-1; | 74 | | | 75 | 1.94M | block[0] += 32; | 76 | | | 77 | 17.5M | for( i = 0; i < 8; i++ ) | 78 | 15.5M | { | 79 | 15.5M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; | 80 | 15.5M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; | 81 | 15.5M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; | 82 | 15.5M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; | 83 | | | 84 | 15.5M | const unsigned int b0 = a0 + a6; | 85 | 15.5M | const unsigned int b2 = a2 + a4; | 86 | 15.5M | const unsigned int b4 = a2 - a4; | 87 | 15.5M | const unsigned int b6 = a0 - a6; | 88 | | | 89 | 15.5M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); | 90 | 15.5M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); | 91 | 15.5M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); | 92 | 15.5M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); | 93 | | | 94 | 15.5M | const int b1 = (a7>>2) + (unsigned)a1; | 95 | 15.5M | const int b3 = (unsigned)a3 + (a5>>2); | 96 | 15.5M | const int b5 = (a3>>2) - (unsigned)a5; | 97 | 15.5M | const int b7 = (unsigned)a7 - (a1>>2); | 98 | | | 99 | 15.5M | block[i+0*8] = b0 + b7; | 100 | 15.5M | block[i+7*8] = b0 - b7; | 101 | 15.5M | block[i+1*8] = b2 + b5; | 102 | 15.5M | block[i+6*8] = b2 - b5; | 103 | 15.5M | block[i+2*8] = b4 + b3; | 104 | 15.5M | block[i+5*8] = b4 - b3; | 105 | 15.5M | block[i+3*8] = b6 + b1; | 106 | 15.5M | block[i+4*8] = b6 - b1; | 107 | 15.5M | } | 108 | 17.5M | for( i = 0; i < 8; i++ ) | 109 | 15.5M | { | 110 | 15.5M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; | 111 | 15.5M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; | 112 | 15.5M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; | 113 | 15.5M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; | 114 | | | 115 | 15.5M | const unsigned b0 = a0 + a6; | 116 | 15.5M | const unsigned b2 = a2 + a4; | 117 | 15.5M | const unsigned b4 = a2 - a4; | 118 | 15.5M | const unsigned b6 = a0 - a6; | 119 | | | 120 | 15.5M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); | 121 | 15.5M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); | 122 | 15.5M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); | 123 | 15.5M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); | 124 | | | 125 | 15.5M | const unsigned b1 = (a7>>2) + (unsigned)a1; | 126 | 15.5M | const unsigned b3 = (unsigned)a3 + (a5>>2); | 127 | 15.5M | const unsigned b5 = (a3>>2) - (unsigned)a5; | 128 | 15.5M | const unsigned b7 = (unsigned)a7 - (a1>>2); | 129 | | | 130 | 15.5M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); | 131 | 15.5M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); | 132 | 15.5M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); | 133 | 15.5M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); | 134 | 15.5M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); | 135 | 15.5M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); | 136 | 15.5M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); | 137 | 15.5M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); | 138 | 15.5M | } | 139 | | | 140 | 1.94M | memset(block, 0, 64 * sizeof(dctcoef)); | 141 | 1.94M | } |
Line | Count | Source | 69 | 1.22M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ | 70 | 1.22M | int i; | 71 | 1.22M | pixel *dst = (pixel*)_dst; | 72 | 1.22M | dctcoef *block = (dctcoef*)_block; | 73 | 1.22M | stride >>= sizeof(pixel)-1; | 74 | | | 75 | 1.22M | block[0] += 32; | 76 | | | 77 | 11.0M | for( i = 0; i < 8; i++ ) | 78 | 9.81M | { | 79 | 9.81M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; | 80 | 9.81M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; | 81 | 9.81M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; | 82 | 9.81M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; | 83 | | | 84 | 9.81M | const unsigned int b0 = a0 + a6; | 85 | 9.81M | const unsigned int b2 = a2 + a4; | 86 | 9.81M | const unsigned int b4 = a2 - a4; | 87 | 9.81M | const unsigned int b6 = a0 - a6; | 88 | | | 89 | 9.81M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); | 90 | 9.81M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); | 91 | 9.81M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); | 92 | 9.81M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); | 93 | | | 94 | 9.81M | const int b1 = (a7>>2) + (unsigned)a1; | 95 | 9.81M | const int b3 = (unsigned)a3 + (a5>>2); | 96 | 9.81M | const int b5 = (a3>>2) - (unsigned)a5; | 97 | 9.81M | const int b7 = (unsigned)a7 - (a1>>2); | 98 | | | 99 | 9.81M | block[i+0*8] = b0 + b7; | 100 | 9.81M | block[i+7*8] = b0 - b7; | 101 | 9.81M | block[i+1*8] = b2 + b5; | 102 | 9.81M | block[i+6*8] = b2 - b5; | 103 | 9.81M | block[i+2*8] = b4 + b3; | 104 | 9.81M | block[i+5*8] = b4 - b3; | 105 | 9.81M | block[i+3*8] = b6 + b1; | 106 | 9.81M | block[i+4*8] = b6 - b1; | 107 | 9.81M | } | 108 | 11.0M | for( i = 0; i < 8; i++ ) | 109 | 9.81M | { | 110 | 9.81M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; | 111 | 9.81M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; | 112 | 9.81M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; | 113 | 9.81M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; | 114 | | | 115 | 9.81M | const unsigned b0 = a0 + a6; | 116 | 9.81M | const unsigned b2 = a2 + a4; | 117 | 9.81M | const unsigned b4 = a2 - a4; | 118 | 9.81M | const unsigned b6 = a0 - a6; | 119 | | | 120 | 9.81M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); | 121 | 9.81M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); | 122 | 9.81M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); | 123 | 9.81M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); | 124 | | | 125 | 9.81M | const unsigned b1 = (a7>>2) + (unsigned)a1; | 126 | 9.81M | const unsigned b3 = (unsigned)a3 + (a5>>2); | 127 | 9.81M | const unsigned b5 = (a3>>2) - (unsigned)a5; | 128 | 9.81M | const unsigned b7 = (unsigned)a7 - (a1>>2); | 129 | | | 130 | 9.81M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); | 131 | 9.81M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); | 132 | 9.81M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); | 133 | 9.81M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); | 134 | 9.81M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); | 135 | 9.81M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); | 136 | 9.81M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); | 137 | 9.81M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); | 138 | 9.81M | } | 139 | | | 140 | 1.22M | memset(block, 0, 64 * sizeof(dctcoef)); | 141 | 1.22M | } |
Line | Count | Source | 69 | 2.60M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ | 70 | 2.60M | int i; | 71 | 2.60M | pixel *dst = (pixel*)_dst; | 72 | 2.60M | dctcoef *block = (dctcoef*)_block; | 73 | 2.60M | stride >>= sizeof(pixel)-1; | 74 | | | 75 | 2.60M | block[0] += 32; | 76 | | | 77 | 23.4M | for( i = 0; i < 8; i++ ) | 78 | 20.8M | { | 79 | 20.8M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; | 80 | 20.8M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; | 81 | 20.8M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; | 82 | 20.8M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; | 83 | | | 84 | 20.8M | const unsigned int b0 = a0 + a6; | 85 | 20.8M | const unsigned int b2 = a2 + a4; | 86 | 20.8M | const unsigned int b4 = a2 - a4; | 87 | 20.8M | const unsigned int b6 = a0 - a6; | 88 | | | 89 | 20.8M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); | 90 | 20.8M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); | 91 | 20.8M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); | 92 | 20.8M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); | 93 | | | 94 | 20.8M | const int b1 = (a7>>2) + (unsigned)a1; | 95 | 20.8M | const int b3 = (unsigned)a3 + (a5>>2); | 96 | 20.8M | const int b5 = (a3>>2) - (unsigned)a5; | 97 | 20.8M | const int b7 = (unsigned)a7 - (a1>>2); | 98 | | | 99 | 20.8M | block[i+0*8] = b0 + b7; | 100 | 20.8M | block[i+7*8] = b0 - b7; | 101 | 20.8M | block[i+1*8] = b2 + b5; | 102 | 20.8M | block[i+6*8] = b2 - b5; | 103 | 20.8M | block[i+2*8] = b4 + b3; | 104 | 20.8M | block[i+5*8] = b4 - b3; | 105 | 20.8M | block[i+3*8] = b6 + b1; | 106 | 20.8M | block[i+4*8] = b6 - b1; | 107 | 20.8M | } | 108 | 23.4M | for( i = 0; i < 8; i++ ) | 109 | 20.8M | { | 110 | 20.8M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; | 111 | 20.8M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; | 112 | 20.8M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; | 113 | 20.8M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; | 114 | | | 115 | 20.8M | const unsigned b0 = a0 + a6; | 116 | 20.8M | const unsigned b2 = a2 + a4; | 117 | 20.8M | const unsigned b4 = a2 - a4; | 118 | 20.8M | const unsigned b6 = a0 - a6; | 119 | | | 120 | 20.8M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); | 121 | 20.8M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); | 122 | 20.8M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); | 123 | 20.8M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); | 124 | | | 125 | 20.8M | const unsigned b1 = (a7>>2) + (unsigned)a1; | 126 | 20.8M | const unsigned b3 = (unsigned)a3 + (a5>>2); | 127 | 20.8M | const unsigned b5 = (a3>>2) - (unsigned)a5; | 128 | 20.8M | const unsigned b7 = (unsigned)a7 - (a1>>2); | 129 | | | 130 | 20.8M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); | 131 | 20.8M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); | 132 | 20.8M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); | 133 | 20.8M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); | 134 | 20.8M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); | 135 | 20.8M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); | 136 | 20.8M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); | 137 | 20.8M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); | 138 | 20.8M | } | 139 | | | 140 | 2.60M | memset(block, 0, 64 * sizeof(dctcoef)); | 141 | 2.60M | } |
Line | Count | Source | 69 | 1.17M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ | 70 | 1.17M | int i; | 71 | 1.17M | pixel *dst = (pixel*)_dst; | 72 | 1.17M | dctcoef *block = (dctcoef*)_block; | 73 | 1.17M | stride >>= sizeof(pixel)-1; | 74 | | | 75 | 1.17M | block[0] += 32; | 76 | | | 77 | 10.5M | for( i = 0; i < 8; i++ ) | 78 | 9.36M | { | 79 | 9.36M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; | 80 | 9.36M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; | 81 | 9.36M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; | 82 | 9.36M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; | 83 | | | 84 | 9.36M | const unsigned int b0 = a0 + a6; | 85 | 9.36M | const unsigned int b2 = a2 + a4; | 86 | 9.36M | const unsigned int b4 = a2 - a4; | 87 | 9.36M | const unsigned int b6 = a0 - a6; | 88 | | | 89 | 9.36M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); | 90 | 9.36M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); | 91 | 9.36M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); | 92 | 9.36M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); | 93 | | | 94 | 9.36M | const int b1 = (a7>>2) + (unsigned)a1; | 95 | 9.36M | const int b3 = (unsigned)a3 + (a5>>2); | 96 | 9.36M | const int b5 = (a3>>2) - (unsigned)a5; | 97 | 9.36M | const int b7 = (unsigned)a7 - (a1>>2); | 98 | | | 99 | 9.36M | block[i+0*8] = b0 + b7; | 100 | 9.36M | block[i+7*8] = b0 - b7; | 101 | 9.36M | block[i+1*8] = b2 + b5; | 102 | 9.36M | block[i+6*8] = b2 - b5; | 103 | 9.36M | block[i+2*8] = b4 + b3; | 104 | 9.36M | block[i+5*8] = b4 - b3; | 105 | 9.36M | block[i+3*8] = b6 + b1; | 106 | 9.36M | block[i+4*8] = b6 - b1; | 107 | 9.36M | } | 108 | 10.5M | for( i = 0; i < 8; i++ ) | 109 | 9.36M | { | 110 | 9.36M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; | 111 | 9.36M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; | 112 | 9.36M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; | 113 | 9.36M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; | 114 | | | 115 | 9.36M | const unsigned b0 = a0 + a6; | 116 | 9.36M | const unsigned b2 = a2 + a4; | 117 | 9.36M | const unsigned b4 = a2 - a4; | 118 | 9.36M | const unsigned b6 = a0 - a6; | 119 | | | 120 | 9.36M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); | 121 | 9.36M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); | 122 | 9.36M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); | 123 | 9.36M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); | 124 | | | 125 | 9.36M | const unsigned b1 = (a7>>2) + (unsigned)a1; | 126 | 9.36M | const unsigned b3 = (unsigned)a3 + (a5>>2); | 127 | 9.36M | const unsigned b5 = (a3>>2) - (unsigned)a5; | 128 | 9.36M | const unsigned b7 = (unsigned)a7 - (a1>>2); | 129 | | | 130 | 9.36M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); | 131 | 9.36M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); | 132 | 9.36M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); | 133 | 9.36M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); | 134 | 9.36M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); | 135 | 9.36M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); | 136 | 9.36M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); | 137 | 9.36M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); | 138 | 9.36M | } | 139 | | | 140 | 1.17M | memset(block, 0, 64 * sizeof(dctcoef)); | 141 | 1.17M | } |
Line | Count | Source | 69 | 1.24M | void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){ | 70 | 1.24M | int i; | 71 | 1.24M | pixel *dst = (pixel*)_dst; | 72 | 1.24M | dctcoef *block = (dctcoef*)_block; | 73 | 1.24M | stride >>= sizeof(pixel)-1; | 74 | | | 75 | 1.24M | block[0] += 32; | 76 | | | 77 | 11.1M | for( i = 0; i < 8; i++ ) | 78 | 9.92M | { | 79 | 9.92M | const unsigned int a0 = block[i+0*8] + (unsigned)block[i+4*8]; | 80 | 9.92M | const unsigned int a2 = block[i+0*8] - (unsigned)block[i+4*8]; | 81 | 9.92M | const unsigned int a4 = (block[i+2*8]>>1) - (unsigned)block[i+6*8]; | 82 | 9.92M | const unsigned int a6 = (block[i+6*8]>>1) + (unsigned)block[i+2*8]; | 83 | | | 84 | 9.92M | const unsigned int b0 = a0 + a6; | 85 | 9.92M | const unsigned int b2 = a2 + a4; | 86 | 9.92M | const unsigned int b4 = a2 - a4; | 87 | 9.92M | const unsigned int b6 = a0 - a6; | 88 | | | 89 | 9.92M | const int a1 = -block[i+3*8] + (unsigned)block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1); | 90 | 9.92M | const int a3 = block[i+1*8] + (unsigned)block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1); | 91 | 9.92M | const int a5 = -block[i+1*8] + (unsigned)block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1); | 92 | 9.92M | const int a7 = block[i+3*8] + (unsigned)block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1); | 93 | | | 94 | 9.92M | const int b1 = (a7>>2) + (unsigned)a1; | 95 | 9.92M | const int b3 = (unsigned)a3 + (a5>>2); | 96 | 9.92M | const int b5 = (a3>>2) - (unsigned)a5; | 97 | 9.92M | const int b7 = (unsigned)a7 - (a1>>2); | 98 | | | 99 | 9.92M | block[i+0*8] = b0 + b7; | 100 | 9.92M | block[i+7*8] = b0 - b7; | 101 | 9.92M | block[i+1*8] = b2 + b5; | 102 | 9.92M | block[i+6*8] = b2 - b5; | 103 | 9.92M | block[i+2*8] = b4 + b3; | 104 | 9.92M | block[i+5*8] = b4 - b3; | 105 | 9.92M | block[i+3*8] = b6 + b1; | 106 | 9.92M | block[i+4*8] = b6 - b1; | 107 | 9.92M | } | 108 | 11.1M | for( i = 0; i < 8; i++ ) | 109 | 9.92M | { | 110 | 9.92M | const unsigned a0 = block[0+i*8] + (unsigned)block[4+i*8]; | 111 | 9.92M | const unsigned a2 = block[0+i*8] - (unsigned)block[4+i*8]; | 112 | 9.92M | const unsigned a4 = (block[2+i*8]>>1) - (unsigned)block[6+i*8]; | 113 | 9.92M | const unsigned a6 = (block[6+i*8]>>1) + (unsigned)block[2+i*8]; | 114 | | | 115 | 9.92M | const unsigned b0 = a0 + a6; | 116 | 9.92M | const unsigned b2 = a2 + a4; | 117 | 9.92M | const unsigned b4 = a2 - a4; | 118 | 9.92M | const unsigned b6 = a0 - a6; | 119 | | | 120 | 9.92M | const int a1 = -(unsigned)block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1); | 121 | 9.92M | const int a3 = (unsigned)block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1); | 122 | 9.92M | const int a5 = -(unsigned)block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1); | 123 | 9.92M | const int a7 = (unsigned)block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1); | 124 | | | 125 | 9.92M | const unsigned b1 = (a7>>2) + (unsigned)a1; | 126 | 9.92M | const unsigned b3 = (unsigned)a3 + (a5>>2); | 127 | 9.92M | const unsigned b5 = (a3>>2) - (unsigned)a5; | 128 | 9.92M | const unsigned b7 = (unsigned)a7 - (a1>>2); | 129 | | | 130 | 9.92M | dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((int)(b0 + b7) >> 6) ); | 131 | 9.92M | dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((int)(b2 + b5) >> 6) ); | 132 | 9.92M | dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((int)(b4 + b3) >> 6) ); | 133 | 9.92M | dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((int)(b6 + b1) >> 6) ); | 134 | 9.92M | dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((int)(b6 - b1) >> 6) ); | 135 | 9.92M | dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((int)(b4 - b3) >> 6) ); | 136 | 9.92M | dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((int)(b2 - b5) >> 6) ); | 137 | 9.92M | dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((int)(b0 - b7) >> 6) ); | 138 | 9.92M | } | 139 | | | 140 | 1.24M | memset(block, 0, 64 * sizeof(dctcoef)); | 141 | 1.24M | } |
|
142 | | |
143 | | // assumes all AC coefs are 0 |
144 | 29.8M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ |
145 | 29.8M | int i, j; |
146 | 29.8M | pixel *dst = (pixel*)_dst; |
147 | 29.8M | dctcoef *block = (dctcoef*)_block; |
148 | 29.8M | int dc = (block[0] + 32) >> 6; |
149 | 29.8M | stride /= sizeof(pixel); |
150 | 29.8M | block[0] = 0; |
151 | 149M | for( j = 0; j < 4; j++ ) |
152 | 119M | { |
153 | 596M | for( i = 0; i < 4; i++ ) |
154 | 477M | dst[i] = av_clip_pixel( dst[i] + dc ); |
155 | 119M | dst += stride; |
156 | 119M | } |
157 | 29.8M | } Line | Count | Source | 144 | 17.3M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 145 | 17.3M | int i, j; | 146 | 17.3M | pixel *dst = (pixel*)_dst; | 147 | 17.3M | dctcoef *block = (dctcoef*)_block; | 148 | 17.3M | int dc = (block[0] + 32) >> 6; | 149 | 17.3M | stride /= sizeof(pixel); | 150 | 17.3M | block[0] = 0; | 151 | 86.8M | for( j = 0; j < 4; j++ ) | 152 | 69.4M | { | 153 | 347M | for( i = 0; i < 4; i++ ) | 154 | 277M | dst[i] = av_clip_pixel( dst[i] + dc ); | 155 | 69.4M | dst += stride; | 156 | 69.4M | } | 157 | 17.3M | } |
Line | Count | Source | 144 | 2.94M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 145 | 2.94M | int i, j; | 146 | 2.94M | pixel *dst = (pixel*)_dst; | 147 | 2.94M | dctcoef *block = (dctcoef*)_block; | 148 | 2.94M | int dc = (block[0] + 32) >> 6; | 149 | 2.94M | stride /= sizeof(pixel); | 150 | 2.94M | block[0] = 0; | 151 | 14.7M | for( j = 0; j < 4; j++ ) | 152 | 11.7M | { | 153 | 58.8M | for( i = 0; i < 4; i++ ) | 154 | 47.1M | dst[i] = av_clip_pixel( dst[i] + dc ); | 155 | 11.7M | dst += stride; | 156 | 11.7M | } | 157 | 2.94M | } |
Line | Count | Source | 144 | 4.87M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 145 | 4.87M | int i, j; | 146 | 4.87M | pixel *dst = (pixel*)_dst; | 147 | 4.87M | dctcoef *block = (dctcoef*)_block; | 148 | 4.87M | int dc = (block[0] + 32) >> 6; | 149 | 4.87M | stride /= sizeof(pixel); | 150 | 4.87M | block[0] = 0; | 151 | 24.3M | for( j = 0; j < 4; j++ ) | 152 | 19.4M | { | 153 | 97.4M | for( i = 0; i < 4; i++ ) | 154 | 77.9M | dst[i] = av_clip_pixel( dst[i] + dc ); | 155 | 19.4M | dst += stride; | 156 | 19.4M | } | 157 | 4.87M | } |
Line | Count | Source | 144 | 2.78M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 145 | 2.78M | int i, j; | 146 | 2.78M | pixel *dst = (pixel*)_dst; | 147 | 2.78M | dctcoef *block = (dctcoef*)_block; | 148 | 2.78M | int dc = (block[0] + 32) >> 6; | 149 | 2.78M | stride /= sizeof(pixel); | 150 | 2.78M | block[0] = 0; | 151 | 13.9M | for( j = 0; j < 4; j++ ) | 152 | 11.1M | { | 153 | 55.7M | for( i = 0; i < 4; i++ ) | 154 | 44.6M | dst[i] = av_clip_pixel( dst[i] + dc ); | 155 | 11.1M | dst += stride; | 156 | 11.1M | } | 157 | 2.78M | } |
Line | Count | Source | 144 | 1.86M | void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 145 | 1.86M | int i, j; | 146 | 1.86M | pixel *dst = (pixel*)_dst; | 147 | 1.86M | dctcoef *block = (dctcoef*)_block; | 148 | 1.86M | int dc = (block[0] + 32) >> 6; | 149 | 1.86M | stride /= sizeof(pixel); | 150 | 1.86M | block[0] = 0; | 151 | 9.30M | for( j = 0; j < 4; j++ ) | 152 | 7.44M | { | 153 | 37.2M | for( i = 0; i < 4; i++ ) | 154 | 29.7M | dst[i] = av_clip_pixel( dst[i] + dc ); | 155 | 7.44M | dst += stride; | 156 | 7.44M | } | 157 | 1.86M | } |
|
158 | | |
159 | 808k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ |
160 | 808k | int i, j; |
161 | 808k | pixel *dst = (pixel*)_dst; |
162 | 808k | dctcoef *block = (dctcoef*)_block; |
163 | 808k | int dc = (block[0] + 32) >> 6; |
164 | 808k | block[0] = 0; |
165 | 808k | stride /= sizeof(pixel); |
166 | 7.27M | for( j = 0; j < 8; j++ ) |
167 | 6.47M | { |
168 | 58.2M | for( i = 0; i < 8; i++ ) |
169 | 51.7M | dst[i] = av_clip_pixel( dst[i] + dc ); |
170 | 6.47M | dst += stride; |
171 | 6.47M | } |
172 | 808k | } Line | Count | Source | 159 | 323k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 160 | 323k | int i, j; | 161 | 323k | pixel *dst = (pixel*)_dst; | 162 | 323k | dctcoef *block = (dctcoef*)_block; | 163 | 323k | int dc = (block[0] + 32) >> 6; | 164 | 323k | block[0] = 0; | 165 | 323k | stride /= sizeof(pixel); | 166 | 2.91M | for( j = 0; j < 8; j++ ) | 167 | 2.58M | { | 168 | 23.2M | for( i = 0; i < 8; i++ ) | 169 | 20.6M | dst[i] = av_clip_pixel( dst[i] + dc ); | 170 | 2.58M | dst += stride; | 171 | 2.58M | } | 172 | 323k | } |
Line | Count | Source | 159 | 38.1k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 160 | 38.1k | int i, j; | 161 | 38.1k | pixel *dst = (pixel*)_dst; | 162 | 38.1k | dctcoef *block = (dctcoef*)_block; | 163 | 38.1k | int dc = (block[0] + 32) >> 6; | 164 | 38.1k | block[0] = 0; | 165 | 38.1k | stride /= sizeof(pixel); | 166 | 342k | for( j = 0; j < 8; j++ ) | 167 | 304k | { | 168 | 2.74M | for( i = 0; i < 8; i++ ) | 169 | 2.43M | dst[i] = av_clip_pixel( dst[i] + dc ); | 170 | 304k | dst += stride; | 171 | 304k | } | 172 | 38.1k | } |
ff_h264_idct8_dc_add_10_c Line | Count | Source | 159 | 53.4k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 160 | 53.4k | int i, j; | 161 | 53.4k | pixel *dst = (pixel*)_dst; | 162 | 53.4k | dctcoef *block = (dctcoef*)_block; | 163 | 53.4k | int dc = (block[0] + 32) >> 6; | 164 | 53.4k | block[0] = 0; | 165 | 53.4k | stride /= sizeof(pixel); | 166 | 480k | for( j = 0; j < 8; j++ ) | 167 | 427k | { | 168 | 3.84M | for( i = 0; i < 8; i++ ) | 169 | 3.41M | dst[i] = av_clip_pixel( dst[i] + dc ); | 170 | 427k | dst += stride; | 171 | 427k | } | 172 | 53.4k | } |
ff_h264_idct8_dc_add_12_c Line | Count | Source | 159 | 131k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 160 | 131k | int i, j; | 161 | 131k | pixel *dst = (pixel*)_dst; | 162 | 131k | dctcoef *block = (dctcoef*)_block; | 163 | 131k | int dc = (block[0] + 32) >> 6; | 164 | 131k | block[0] = 0; | 165 | 131k | stride /= sizeof(pixel); | 166 | 1.18M | for( j = 0; j < 8; j++ ) | 167 | 1.05M | { | 168 | 9.48M | for( i = 0; i < 8; i++ ) | 169 | 8.42M | dst[i] = av_clip_pixel( dst[i] + dc ); | 170 | 1.05M | dst += stride; | 171 | 1.05M | } | 172 | 131k | } |
ff_h264_idct8_dc_add_14_c Line | Count | Source | 159 | 262k | void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ | 160 | 262k | int i, j; | 161 | 262k | pixel *dst = (pixel*)_dst; | 162 | 262k | dctcoef *block = (dctcoef*)_block; | 163 | 262k | int dc = (block[0] + 32) >> 6; | 164 | 262k | block[0] = 0; | 165 | 262k | stride /= sizeof(pixel); | 166 | 2.36M | for( j = 0; j < 8; j++ ) | 167 | 2.09M | { | 168 | 18.8M | for( i = 0; i < 8; i++ ) | 169 | 16.7M | dst[i] = av_clip_pixel( dst[i] + dc ); | 170 | 2.09M | dst += stride; | 171 | 2.09M | } | 172 | 262k | } |
|
173 | | |
174 | | void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, |
175 | | int16_t *block, int stride, |
176 | | const uint8_t nnzc[5 * 8]) |
177 | 40.5M | { |
178 | 40.5M | int i; |
179 | 689M | for(i=0; i<16; i++){ |
180 | 649M | int nnz = nnzc[ scan8[i] ]; |
181 | 649M | if(nnz){ |
182 | 60.4M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
183 | 58.5M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
184 | 60.4M | } |
185 | 649M | } |
186 | 40.5M | } Line | Count | Source | 177 | 14.0M | { | 178 | 14.0M | int i; | 179 | 238M | for(i=0; i<16; i++){ | 180 | 224M | int nnz = nnzc[ scan8[i] ]; | 181 | 224M | if(nnz){ | 182 | 15.3M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 183 | 14.5M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 184 | 15.3M | } | 185 | 224M | } | 186 | 14.0M | } |
Line | Count | Source | 177 | 11.1M | { | 178 | 11.1M | int i; | 179 | 190M | for(i=0; i<16; i++){ | 180 | 178M | int nnz = nnzc[ scan8[i] ]; | 181 | 178M | if(nnz){ | 182 | 13.4M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 183 | 13.2M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 184 | 13.4M | } | 185 | 178M | } | 186 | 11.1M | } |
Line | Count | Source | 177 | 11.7M | { | 178 | 11.7M | int i; | 179 | 198M | for(i=0; i<16; i++){ | 180 | 187M | int nnz = nnzc[ scan8[i] ]; | 181 | 187M | if(nnz){ | 182 | 12.7M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 183 | 12.3M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 184 | 12.7M | } | 185 | 187M | } | 186 | 11.7M | } |
Line | Count | Source | 177 | 1.75M | { | 178 | 1.75M | int i; | 179 | 29.8M | for(i=0; i<16; i++){ | 180 | 28.1M | int nnz = nnzc[ scan8[i] ]; | 181 | 28.1M | if(nnz){ | 182 | 9.07M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 183 | 8.76M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 184 | 9.07M | } | 185 | 28.1M | } | 186 | 1.75M | } |
Line | Count | Source | 177 | 1.91M | { | 178 | 1.91M | int i; | 179 | 32.6M | for(i=0; i<16; i++){ | 180 | 30.7M | int nnz = nnzc[ scan8[i] ]; | 181 | 30.7M | if(nnz){ | 182 | 9.83M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 183 | 9.64M | else FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 184 | 9.83M | } | 185 | 30.7M | } | 186 | 1.91M | } |
|
187 | | |
188 | | void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, |
189 | | int16_t *block, int stride, |
190 | | const uint8_t nnzc[5 * 8]) |
191 | 1.81M | { |
192 | 1.81M | int i; |
193 | 30.8M | for(i=0; i<16; i++){ |
194 | 29.0M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
195 | 27.9M | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
196 | 29.0M | } |
197 | 1.81M | } ff_h264_idct_add16intra_8_c Line | Count | Source | 191 | 1.24M | { | 192 | 1.24M | int i; | 193 | 21.1M | for(i=0; i<16; i++){ | 194 | 19.9M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 195 | 19.7M | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 196 | 19.9M | } | 197 | 1.24M | } |
ff_h264_idct_add16intra_9_c Line | Count | Source | 191 | 174k | { | 192 | 174k | int i; | 193 | 2.97M | for(i=0; i<16; i++){ | 194 | 2.79M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 195 | 2.58M | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 196 | 2.79M | } | 197 | 174k | } |
ff_h264_idct_add16intra_10_c Line | Count | Source | 191 | 249k | { | 192 | 249k | int i; | 193 | 4.24M | for(i=0; i<16; i++){ | 194 | 3.99M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 195 | 3.66M | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 196 | 3.99M | } | 197 | 249k | } |
ff_h264_idct_add16intra_12_c Line | Count | Source | 191 | 70.1k | { | 192 | 70.1k | int i; | 193 | 1.19M | for(i=0; i<16; i++){ | 194 | 1.12M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 195 | 925k | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 196 | 1.12M | } | 197 | 70.1k | } |
ff_h264_idct_add16intra_14_c Line | Count | Source | 191 | 73.4k | { | 192 | 73.4k | int i; | 193 | 1.24M | for(i=0; i<16; i++){ | 194 | 1.17M | if(nnzc[ scan8[i] ]) FUNCC(ff_h264_idct_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 195 | 1.05M | else if(((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 196 | 1.17M | } | 197 | 73.4k | } |
|
198 | | |
199 | | void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, |
200 | | int16_t *block, int stride, |
201 | | const uint8_t nnzc[5 * 8]) |
202 | 2.18M | { |
203 | 2.18M | int i; |
204 | 10.9M | for(i=0; i<16; i+=4){ |
205 | 8.73M | int nnz = nnzc[ scan8[i] ]; |
206 | 8.73M | if(nnz){ |
207 | 5.26M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
208 | 5.08M | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); |
209 | 5.26M | } |
210 | 8.73M | } |
211 | 2.18M | } Line | Count | Source | 202 | 432k | { | 203 | 432k | int i; | 204 | 2.16M | for(i=0; i<16; i+=4){ | 205 | 1.73M | int nnz = nnzc[ scan8[i] ]; | 206 | 1.73M | if(nnz){ | 207 | 869k | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 208 | 779k | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 209 | 869k | } | 210 | 1.73M | } | 211 | 432k | } |
Line | Count | Source | 202 | 310k | { | 203 | 310k | int i; | 204 | 1.55M | for(i=0; i<16; i+=4){ | 205 | 1.24M | int nnz = nnzc[ scan8[i] ]; | 206 | 1.24M | if(nnz){ | 207 | 601k | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 208 | 591k | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 209 | 601k | } | 210 | 1.24M | } | 211 | 310k | } |
Line | Count | Source | 202 | 788k | { | 203 | 788k | int i; | 204 | 3.94M | for(i=0; i<16; i+=4){ | 205 | 3.15M | int nnz = nnzc[ scan8[i] ]; | 206 | 3.15M | if(nnz){ | 207 | 2.42M | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 208 | 2.39M | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 209 | 2.42M | } | 210 | 3.15M | } | 211 | 788k | } |
Line | Count | Source | 202 | 331k | { | 203 | 331k | int i; | 204 | 1.65M | for(i=0; i<16; i+=4){ | 205 | 1.32M | int nnz = nnzc[ scan8[i] ]; | 206 | 1.32M | if(nnz){ | 207 | 718k | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 208 | 705k | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 209 | 718k | } | 210 | 1.32M | } | 211 | 331k | } |
Line | Count | Source | 202 | 321k | { | 203 | 321k | int i; | 204 | 1.60M | for(i=0; i<16; i+=4){ | 205 | 1.28M | int nnz = nnzc[ scan8[i] ]; | 206 | 1.28M | if(nnz){ | 207 | 652k | if(nnz==1 && ((dctcoef*)block)[i*16]) FUNCC(ff_h264_idct8_dc_add)(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 208 | 617k | else FUNCC(ff_h264_idct8_add )(dst + block_offset[i], block + i*16*sizeof(pixel), stride); | 209 | 652k | } | 210 | 1.28M | } | 211 | 321k | } |
|
212 | | |
213 | 9.94M | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ |
214 | 9.94M | int i, j; |
215 | 29.8M | for(j=1; j<3; j++){ |
216 | 99.4M | for(i=j*16; i<j*16+4; i++){ |
217 | 79.5M | if(nnzc[ scan8[i] ]) |
218 | 2.69M | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
219 | 76.8M | else if(((dctcoef*)block)[i*16]) |
220 | 4.88M | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
221 | 79.5M | } |
222 | 19.8M | } |
223 | 9.94M | } Line | Count | Source | 213 | 4.52M | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 214 | 4.52M | int i, j; | 215 | 13.5M | for(j=1; j<3; j++){ | 216 | 45.2M | for(i=j*16; i<j*16+4; i++){ | 217 | 36.1M | if(nnzc[ scan8[i] ]) | 218 | 788k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 219 | 35.4M | else if(((dctcoef*)block)[i*16]) | 220 | 2.56M | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 221 | 36.1M | } | 222 | 9.04M | } | 223 | 4.52M | } |
Line | Count | Source | 213 | 872k | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 214 | 872k | int i, j; | 215 | 2.61M | for(j=1; j<3; j++){ | 216 | 8.72M | for(i=j*16; i<j*16+4; i++){ | 217 | 6.98M | if(nnzc[ scan8[i] ]) | 218 | 231k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 219 | 6.75M | else if(((dctcoef*)block)[i*16]) | 220 | 459k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 221 | 6.98M | } | 222 | 1.74M | } | 223 | 872k | } |
Line | Count | Source | 213 | 3.97M | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 214 | 3.97M | int i, j; | 215 | 11.9M | for(j=1; j<3; j++){ | 216 | 39.7M | for(i=j*16; i<j*16+4; i++){ | 217 | 31.8M | if(nnzc[ scan8[i] ]) | 218 | 1.36M | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 219 | 30.4M | else if(((dctcoef*)block)[i*16]) | 220 | 938k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 221 | 31.8M | } | 222 | 7.95M | } | 223 | 3.97M | } |
Line | Count | Source | 213 | 332k | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 214 | 332k | int i, j; | 215 | 998k | for(j=1; j<3; j++){ | 216 | 3.32M | for(i=j*16; i<j*16+4; i++){ | 217 | 2.66M | if(nnzc[ scan8[i] ]) | 218 | 290k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 219 | 2.37M | else if(((dctcoef*)block)[i*16]) | 220 | 567k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 221 | 2.66M | } | 222 | 665k | } | 223 | 332k | } |
Line | Count | Source | 213 | 235k | void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 214 | 235k | int i, j; | 215 | 706k | for(j=1; j<3; j++){ | 216 | 2.35M | for(i=j*16; i<j*16+4; i++){ | 217 | 1.88M | if(nnzc[ scan8[i] ]) | 218 | 24.4k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 219 | 1.85M | else if(((dctcoef*)block)[i*16]) | 220 | 358k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 221 | 1.88M | } | 222 | 470k | } | 223 | 235k | } |
|
224 | | |
225 | 4.75M | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ |
226 | 4.75M | int i, j; |
227 | | |
228 | 14.2M | for(j=1; j<3; j++){ |
229 | 47.5M | for(i=j*16; i<j*16+4; i++){ |
230 | 38.0M | if(nnzc[ scan8[i] ]) |
231 | 1.76M | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
232 | 36.2M | else if(((dctcoef*)block)[i*16]) |
233 | 2.40M | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
234 | 38.0M | } |
235 | 9.51M | } |
236 | | |
237 | 14.2M | for(j=1; j<3; j++){ |
238 | 47.5M | for(i=j*16+4; i<j*16+8; i++){ |
239 | 38.0M | if(nnzc[ scan8[i+4] ]) |
240 | 2.10M | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); |
241 | 35.9M | else if(((dctcoef*)block)[i*16]) |
242 | 2.34M | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); |
243 | 38.0M | } |
244 | 9.51M | } |
245 | 4.75M | } ff_h264_idct_add8_422_8_c Line | Count | Source | 225 | 2.19M | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 226 | 2.19M | int i, j; | 227 | | | 228 | 6.58M | for(j=1; j<3; j++){ | 229 | 21.9M | for(i=j*16; i<j*16+4; i++){ | 230 | 17.5M | if(nnzc[ scan8[i] ]) | 231 | 142k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 232 | 17.4M | else if(((dctcoef*)block)[i*16]) | 233 | 162k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 234 | 17.5M | } | 235 | 4.39M | } | 236 | | | 237 | 6.58M | for(j=1; j<3; j++){ | 238 | 21.9M | for(i=j*16+4; i<j*16+8; i++){ | 239 | 17.5M | if(nnzc[ scan8[i+4] ]) | 240 | 157k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 241 | 17.4M | else if(((dctcoef*)block)[i*16]) | 242 | 155k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 243 | 17.5M | } | 244 | 4.39M | } | 245 | 2.19M | } |
ff_h264_idct_add8_422_9_c Line | Count | Source | 225 | 697k | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 226 | 697k | int i, j; | 227 | | | 228 | 2.09M | for(j=1; j<3; j++){ | 229 | 6.97M | for(i=j*16; i<j*16+4; i++){ | 230 | 5.57M | if(nnzc[ scan8[i] ]) | 231 | 123k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 232 | 5.45M | else if(((dctcoef*)block)[i*16]) | 233 | 514k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 234 | 5.57M | } | 235 | 1.39M | } | 236 | | | 237 | 2.09M | for(j=1; j<3; j++){ | 238 | 6.97M | for(i=j*16+4; i<j*16+8; i++){ | 239 | 5.57M | if(nnzc[ scan8[i+4] ]) | 240 | 154k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 241 | 5.42M | else if(((dctcoef*)block)[i*16]) | 242 | 494k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 243 | 5.57M | } | 244 | 1.39M | } | 245 | 697k | } |
ff_h264_idct_add8_422_10_c Line | Count | Source | 225 | 974k | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 226 | 974k | int i, j; | 227 | | | 228 | 2.92M | for(j=1; j<3; j++){ | 229 | 9.74M | for(i=j*16; i<j*16+4; i++){ | 230 | 7.79M | if(nnzc[ scan8[i] ]) | 231 | 134k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 232 | 7.66M | else if(((dctcoef*)block)[i*16]) | 233 | 713k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 234 | 7.79M | } | 235 | 1.94M | } | 236 | | | 237 | 2.92M | for(j=1; j<3; j++){ | 238 | 9.74M | for(i=j*16+4; i<j*16+8; i++){ | 239 | 7.79M | if(nnzc[ scan8[i+4] ]) | 240 | 123k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 241 | 7.67M | else if(((dctcoef*)block)[i*16]) | 242 | 721k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 243 | 7.79M | } | 244 | 1.94M | } | 245 | 974k | } |
ff_h264_idct_add8_422_12_c Line | Count | Source | 225 | 574k | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 226 | 574k | int i, j; | 227 | | | 228 | 1.72M | for(j=1; j<3; j++){ | 229 | 5.74M | for(i=j*16; i<j*16+4; i++){ | 230 | 4.59M | if(nnzc[ scan8[i] ]) | 231 | 987k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 232 | 3.60M | else if(((dctcoef*)block)[i*16]) | 233 | 634k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 234 | 4.59M | } | 235 | 1.14M | } | 236 | | | 237 | 1.72M | for(j=1; j<3; j++){ | 238 | 5.74M | for(i=j*16+4; i<j*16+8; i++){ | 239 | 4.59M | if(nnzc[ scan8[i+4] ]) | 240 | 1.23M | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 241 | 3.36M | else if(((dctcoef*)block)[i*16]) | 242 | 614k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 243 | 4.59M | } | 244 | 1.14M | } | 245 | 574k | } |
ff_h264_idct_add8_422_14_c Line | Count | Source | 225 | 315k | void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]){ | 226 | 315k | int i, j; | 227 | | | 228 | 947k | for(j=1; j<3; j++){ | 229 | 3.15M | for(i=j*16; i<j*16+4; i++){ | 230 | 2.52M | if(nnzc[ scan8[i] ]) | 231 | 377k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 232 | 2.14M | else if(((dctcoef*)block)[i*16]) | 233 | 384k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | 234 | 2.52M | } | 235 | 631k | } | 236 | | | 237 | 947k | for(j=1; j<3; j++){ | 238 | 3.15M | for(i=j*16+4; i<j*16+8; i++){ | 239 | 2.52M | if(nnzc[ scan8[i+4] ]) | 240 | 436k | FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 241 | 2.09M | else if(((dctcoef*)block)[i*16]) | 242 | 362k | FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | 243 | 2.52M | } | 244 | 631k | } | 245 | 315k | } |
|
246 | | |
247 | | #if BIT_DEPTH == 8 || BIT_DEPTH == 9 |
248 | | /** |
249 | | * IDCT transforms the 16 dc values and dequantizes them. |
250 | | * @param qmul quantization parameter |
251 | | */ |
252 | | void FUNCC2(ff_h264_luma_dc_dequant_idct)(int16_t *_output, int16_t *_input, int qmul) |
253 | 1.20M | { |
254 | 22.9M | #define stride 16 |
255 | 1.20M | int i; |
256 | 1.20M | int temp[16]; |
257 | 1.20M | static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride}; |
258 | 1.20M | dctcoef *input = (dctcoef*)_input; |
259 | 1.20M | dctcoef *output = (dctcoef*)_output; |
260 | | |
261 | 6.04M | for(i=0; i<4; i++){ |
262 | 4.83M | const int z0= input[4*i+0] + input[4*i+1]; |
263 | 4.83M | const int z1= input[4*i+0] - input[4*i+1]; |
264 | 4.83M | const int z2= input[4*i+2] - input[4*i+3]; |
265 | 4.83M | const int z3= input[4*i+2] + input[4*i+3]; |
266 | | |
267 | 4.83M | temp[4*i+0]= z0+z3; |
268 | 4.83M | temp[4*i+1]= z0-z3; |
269 | 4.83M | temp[4*i+2]= z1-z2; |
270 | 4.83M | temp[4*i+3]= z1+z2; |
271 | 4.83M | } |
272 | | |
273 | 6.04M | for(i=0; i<4; i++){ |
274 | 4.83M | const int offset= x_offset[i]; |
275 | 4.83M | const SUINT z0= temp[4*0+i] + temp[4*2+i]; |
276 | 4.83M | const SUINT z1= temp[4*0+i] - temp[4*2+i]; |
277 | 4.83M | const SUINT z2= temp[4*1+i] - temp[4*3+i]; |
278 | 4.83M | const SUINT z3= temp[4*1+i] + temp[4*3+i]; |
279 | | |
280 | 4.83M | output[stride* 0+offset]= (int)((z0 + z3)*qmul + 128 ) >> 8; |
281 | 4.83M | output[stride* 1+offset]= (int)((z1 + z2)*qmul + 128 ) >> 8; |
282 | 4.83M | output[stride* 4+offset]= (int)((z1 - z2)*qmul + 128 ) >> 8; |
283 | 4.83M | output[stride* 5+offset]= (int)((z0 - z3)*qmul + 128 ) >> 8; |
284 | 4.83M | } |
285 | 1.20M | #undef stride |
286 | 1.20M | } ff_h264_luma_dc_dequant_idct_8_c Line | Count | Source | 253 | 842k | { | 254 | 842k | #define stride 16 | 255 | 842k | int i; | 256 | 842k | int temp[16]; | 257 | 842k | static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride}; | 258 | 842k | dctcoef *input = (dctcoef*)_input; | 259 | 842k | dctcoef *output = (dctcoef*)_output; | 260 | | | 261 | 4.21M | for(i=0; i<4; i++){ | 262 | 3.37M | const int z0= input[4*i+0] + input[4*i+1]; | 263 | 3.37M | const int z1= input[4*i+0] - input[4*i+1]; | 264 | 3.37M | const int z2= input[4*i+2] - input[4*i+3]; | 265 | 3.37M | const int z3= input[4*i+2] + input[4*i+3]; | 266 | | | 267 | 3.37M | temp[4*i+0]= z0+z3; | 268 | 3.37M | temp[4*i+1]= z0-z3; | 269 | 3.37M | temp[4*i+2]= z1-z2; | 270 | 3.37M | temp[4*i+3]= z1+z2; | 271 | 3.37M | } | 272 | | | 273 | 4.21M | for(i=0; i<4; i++){ | 274 | 3.37M | const int offset= x_offset[i]; | 275 | 3.37M | const SUINT z0= temp[4*0+i] + temp[4*2+i]; | 276 | 3.37M | const SUINT z1= temp[4*0+i] - temp[4*2+i]; | 277 | 3.37M | const SUINT z2= temp[4*1+i] - temp[4*3+i]; | 278 | 3.37M | const SUINT z3= temp[4*1+i] + temp[4*3+i]; | 279 | | | 280 | 3.37M | output[stride* 0+offset]= (int)((z0 + z3)*qmul + 128 ) >> 8; | 281 | 3.37M | output[stride* 1+offset]= (int)((z1 + z2)*qmul + 128 ) >> 8; | 282 | 3.37M | output[stride* 4+offset]= (int)((z1 - z2)*qmul + 128 ) >> 8; | 283 | 3.37M | output[stride* 5+offset]= (int)((z0 - z3)*qmul + 128 ) >> 8; | 284 | 3.37M | } | 285 | 842k | #undef stride | 286 | 842k | } |
ff_h264_luma_dc_dequant_idct_16_c Line | Count | Source | 253 | 366k | { | 254 | 366k | #define stride 16 | 255 | 366k | int i; | 256 | 366k | int temp[16]; | 257 | 366k | static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride}; | 258 | 366k | dctcoef *input = (dctcoef*)_input; | 259 | 366k | dctcoef *output = (dctcoef*)_output; | 260 | | | 261 | 1.83M | for(i=0; i<4; i++){ | 262 | 1.46M | const int z0= input[4*i+0] + input[4*i+1]; | 263 | 1.46M | const int z1= input[4*i+0] - input[4*i+1]; | 264 | 1.46M | const int z2= input[4*i+2] - input[4*i+3]; | 265 | 1.46M | const int z3= input[4*i+2] + input[4*i+3]; | 266 | | | 267 | 1.46M | temp[4*i+0]= z0+z3; | 268 | 1.46M | temp[4*i+1]= z0-z3; | 269 | 1.46M | temp[4*i+2]= z1-z2; | 270 | 1.46M | temp[4*i+3]= z1+z2; | 271 | 1.46M | } | 272 | | | 273 | 1.83M | for(i=0; i<4; i++){ | 274 | 1.46M | const int offset= x_offset[i]; | 275 | 1.46M | const SUINT z0= temp[4*0+i] + temp[4*2+i]; | 276 | 1.46M | const SUINT z1= temp[4*0+i] - temp[4*2+i]; | 277 | 1.46M | const SUINT z2= temp[4*1+i] - temp[4*3+i]; | 278 | 1.46M | const SUINT z3= temp[4*1+i] + temp[4*3+i]; | 279 | | | 280 | 1.46M | output[stride* 0+offset]= (int)((z0 + z3)*qmul + 128 ) >> 8; | 281 | 1.46M | output[stride* 1+offset]= (int)((z1 + z2)*qmul + 128 ) >> 8; | 282 | 1.46M | output[stride* 4+offset]= (int)((z1 - z2)*qmul + 128 ) >> 8; | 283 | 1.46M | output[stride* 5+offset]= (int)((z0 - z3)*qmul + 128 ) >> 8; | 284 | 1.46M | } | 285 | 366k | #undef stride | 286 | 366k | } |
|
287 | | |
288 | | void FUNCC2(ff_h264_chroma422_dc_dequant_idct)(int16_t *_block, int qmul) |
289 | 1.39M | { |
290 | 1.39M | const int stride= 16*2; |
291 | 1.39M | const int xStride= 16; |
292 | 1.39M | int i; |
293 | 1.39M | unsigned temp[8]; |
294 | 1.39M | static const uint8_t x_offset[2]={0, 16}; |
295 | 1.39M | dctcoef *block = (dctcoef*)_block; |
296 | | |
297 | 6.95M | for(i=0; i<4; i++){ |
298 | 5.56M | temp[2*i+0] = block[stride*i + xStride*0] + (unsigned)block[stride*i + xStride*1]; |
299 | 5.56M | temp[2*i+1] = block[stride*i + xStride*0] - (unsigned)block[stride*i + xStride*1]; |
300 | 5.56M | } |
301 | | |
302 | 4.17M | for(i=0; i<2; i++){ |
303 | 2.78M | const int offset= x_offset[i]; |
304 | 2.78M | const SUINT z0= temp[2*0+i] + temp[2*2+i]; |
305 | 2.78M | const SUINT z1= temp[2*0+i] - temp[2*2+i]; |
306 | 2.78M | const SUINT z2= temp[2*1+i] - temp[2*3+i]; |
307 | 2.78M | const SUINT z3= temp[2*1+i] + temp[2*3+i]; |
308 | | |
309 | 2.78M | block[stride*0+offset]= (int)((z0 + z3)*qmul + 128) >> 8; |
310 | 2.78M | block[stride*1+offset]= (int)((z1 + z2)*qmul + 128) >> 8; |
311 | 2.78M | block[stride*2+offset]= (int)((z1 - z2)*qmul + 128) >> 8; |
312 | 2.78M | block[stride*3+offset]= (int)((z0 - z3)*qmul + 128) >> 8; |
313 | 2.78M | } |
314 | 1.39M | } ff_h264_chroma422_dc_dequant_idct_8_c Line | Count | Source | 289 | 94.2k | { | 290 | 94.2k | const int stride= 16*2; | 291 | 94.2k | const int xStride= 16; | 292 | 94.2k | int i; | 293 | 94.2k | unsigned temp[8]; | 294 | 94.2k | static const uint8_t x_offset[2]={0, 16}; | 295 | 94.2k | dctcoef *block = (dctcoef*)_block; | 296 | | | 297 | 471k | for(i=0; i<4; i++){ | 298 | 376k | temp[2*i+0] = block[stride*i + xStride*0] + (unsigned)block[stride*i + xStride*1]; | 299 | 376k | temp[2*i+1] = block[stride*i + xStride*0] - (unsigned)block[stride*i + xStride*1]; | 300 | 376k | } | 301 | | | 302 | 282k | for(i=0; i<2; i++){ | 303 | 188k | const int offset= x_offset[i]; | 304 | 188k | const SUINT z0= temp[2*0+i] + temp[2*2+i]; | 305 | 188k | const SUINT z1= temp[2*0+i] - temp[2*2+i]; | 306 | 188k | const SUINT z2= temp[2*1+i] - temp[2*3+i]; | 307 | 188k | const SUINT z3= temp[2*1+i] + temp[2*3+i]; | 308 | | | 309 | 188k | block[stride*0+offset]= (int)((z0 + z3)*qmul + 128) >> 8; | 310 | 188k | block[stride*1+offset]= (int)((z1 + z2)*qmul + 128) >> 8; | 311 | 188k | block[stride*2+offset]= (int)((z1 - z2)*qmul + 128) >> 8; | 312 | 188k | block[stride*3+offset]= (int)((z0 - z3)*qmul + 128) >> 8; | 313 | 188k | } | 314 | 94.2k | } |
ff_h264_chroma422_dc_dequant_idct_16_c Line | Count | Source | 289 | 1.29M | { | 290 | 1.29M | const int stride= 16*2; | 291 | 1.29M | const int xStride= 16; | 292 | 1.29M | int i; | 293 | 1.29M | unsigned temp[8]; | 294 | 1.29M | static const uint8_t x_offset[2]={0, 16}; | 295 | 1.29M | dctcoef *block = (dctcoef*)_block; | 296 | | | 297 | 6.48M | for(i=0; i<4; i++){ | 298 | 5.18M | temp[2*i+0] = block[stride*i + xStride*0] + (unsigned)block[stride*i + xStride*1]; | 299 | 5.18M | temp[2*i+1] = block[stride*i + xStride*0] - (unsigned)block[stride*i + xStride*1]; | 300 | 5.18M | } | 301 | | | 302 | 3.89M | for(i=0; i<2; i++){ | 303 | 2.59M | const int offset= x_offset[i]; | 304 | 2.59M | const SUINT z0= temp[2*0+i] + temp[2*2+i]; | 305 | 2.59M | const SUINT z1= temp[2*0+i] - temp[2*2+i]; | 306 | 2.59M | const SUINT z2= temp[2*1+i] - temp[2*3+i]; | 307 | 2.59M | const SUINT z3= temp[2*1+i] + temp[2*3+i]; | 308 | | | 309 | 2.59M | block[stride*0+offset]= (int)((z0 + z3)*qmul + 128) >> 8; | 310 | 2.59M | block[stride*1+offset]= (int)((z1 + z2)*qmul + 128) >> 8; | 311 | 2.59M | block[stride*2+offset]= (int)((z1 - z2)*qmul + 128) >> 8; | 312 | 2.59M | block[stride*3+offset]= (int)((z0 - z3)*qmul + 128) >> 8; | 313 | 2.59M | } | 314 | 1.29M | } |
|
315 | | |
316 | | void FUNCC2(ff_h264_chroma_dc_dequant_idct)(int16_t *_block, int qmul) |
317 | 3.17M | { |
318 | 3.17M | const int stride= 16*2; |
319 | 3.17M | const int xStride= 16; |
320 | 3.17M | SUINT a,b,c,d,e; |
321 | 3.17M | dctcoef *block = (dctcoef*)_block; |
322 | | |
323 | 3.17M | a= block[stride*0 + xStride*0]; |
324 | 3.17M | b= block[stride*0 + xStride*1]; |
325 | 3.17M | c= block[stride*1 + xStride*0]; |
326 | 3.17M | d= block[stride*1 + xStride*1]; |
327 | | |
328 | 3.17M | e= a-b; |
329 | 3.17M | a= a+b; |
330 | 3.17M | b= c-d; |
331 | 3.17M | c= c+d; |
332 | | |
333 | 3.17M | block[stride*0 + xStride*0]= (int)((a+c)*qmul) >> 7; |
334 | 3.17M | block[stride*0 + xStride*1]= (int)((e+b)*qmul) >> 7; |
335 | 3.17M | block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7; |
336 | 3.17M | block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7; |
337 | 3.17M | } ff_h264_chroma_dc_dequant_idct_8_c Line | Count | Source | 317 | 1.27M | { | 318 | 1.27M | const int stride= 16*2; | 319 | 1.27M | const int xStride= 16; | 320 | 1.27M | SUINT a,b,c,d,e; | 321 | 1.27M | dctcoef *block = (dctcoef*)_block; | 322 | | | 323 | 1.27M | a= block[stride*0 + xStride*0]; | 324 | 1.27M | b= block[stride*0 + xStride*1]; | 325 | 1.27M | c= block[stride*1 + xStride*0]; | 326 | 1.27M | d= block[stride*1 + xStride*1]; | 327 | | | 328 | 1.27M | e= a-b; | 329 | 1.27M | a= a+b; | 330 | 1.27M | b= c-d; | 331 | 1.27M | c= c+d; | 332 | | | 333 | 1.27M | block[stride*0 + xStride*0]= (int)((a+c)*qmul) >> 7; | 334 | 1.27M | block[stride*0 + xStride*1]= (int)((e+b)*qmul) >> 7; | 335 | 1.27M | block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7; | 336 | 1.27M | block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7; | 337 | 1.27M | } |
ff_h264_chroma_dc_dequant_idct_16_c Line | Count | Source | 317 | 1.89M | { | 318 | 1.89M | const int stride= 16*2; | 319 | 1.89M | const int xStride= 16; | 320 | 1.89M | SUINT a,b,c,d,e; | 321 | 1.89M | dctcoef *block = (dctcoef*)_block; | 322 | | | 323 | 1.89M | a= block[stride*0 + xStride*0]; | 324 | 1.89M | b= block[stride*0 + xStride*1]; | 325 | 1.89M | c= block[stride*1 + xStride*0]; | 326 | 1.89M | d= block[stride*1 + xStride*1]; | 327 | | | 328 | 1.89M | e= a-b; | 329 | 1.89M | a= a+b; | 330 | 1.89M | b= c-d; | 331 | 1.89M | c= c+d; | 332 | | | 333 | 1.89M | block[stride*0 + xStride*0]= (int)((a+c)*qmul) >> 7; | 334 | 1.89M | block[stride*0 + xStride*1]= (int)((e+b)*qmul) >> 7; | 335 | 1.89M | block[stride*1 + xStride*0]= (int)((a-c)*qmul) >> 7; | 336 | 1.89M | block[stride*1 + xStride*1]= (int)((e-b)*qmul) >> 7; | 337 | 1.89M | } |
|
338 | | #endif |