/src/mozilla-central/media/libtheora/lib/idct.c
Line | Count | Source (jump to first uncovered line) |
1 | | /******************************************************************** |
2 | | * * |
3 | | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | | * * |
8 | | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
10 | | * * |
11 | | ******************************************************************** |
12 | | |
13 | | function: |
14 | | last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $ |
15 | | |
16 | | ********************************************************************/ |
17 | | |
18 | | #include <string.h> |
19 | | #include "internal.h" |
20 | | #include "dct.h" |
21 | | |
22 | | /*Performs an inverse 8 point Type-II DCT transform. |
23 | | The output is scaled by a factor of 2 relative to the orthonormal version of |
24 | | the transform. |
25 | | _y: The buffer to store the result in. |
26 | | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
27 | | block). |
28 | | _x: The input coefficients. |
29 | | The first 8 entries are used (e.g., from a row of an 8x8 block).*/ |
30 | 0 | static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
31 | 0 | ogg_int32_t t[8]; |
32 | 0 | ogg_int32_t r; |
33 | 0 | /*Stage 1:*/ |
34 | 0 | /*0-1 butterfly.*/ |
35 | 0 | t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; |
36 | 0 | t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; |
37 | 0 | /*2-3 rotation by 6pi/16.*/ |
38 | 0 | t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); |
39 | 0 | t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); |
40 | 0 | /*4-7 rotation by 7pi/16.*/ |
41 | 0 | t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); |
42 | 0 | /*5-6 rotation by 3pi/16.*/ |
43 | 0 | t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); |
44 | 0 | t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); |
45 | 0 | t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); |
46 | 0 | /*Stage 2:*/ |
47 | 0 | /*4-5 butterfly.*/ |
48 | 0 | r=t[4]+t[5]; |
49 | 0 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
50 | 0 | t[4]=r; |
51 | 0 | /*7-6 butterfly.*/ |
52 | 0 | r=t[7]+t[6]; |
53 | 0 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
54 | 0 | t[7]=r; |
55 | 0 | /*Stage 3:*/ |
56 | 0 | /*0-3 butterfly.*/ |
57 | 0 | r=t[0]+t[3]; |
58 | 0 | t[3]=t[0]-t[3]; |
59 | 0 | t[0]=r; |
60 | 0 | /*1-2 butterfly.*/ |
61 | 0 | r=t[1]+t[2]; |
62 | 0 | t[2]=t[1]-t[2]; |
63 | 0 | t[1]=r; |
64 | 0 | /*6-5 butterfly.*/ |
65 | 0 | r=t[6]+t[5]; |
66 | 0 | t[5]=t[6]-t[5]; |
67 | 0 | t[6]=r; |
68 | 0 | /*Stage 4:*/ |
69 | 0 | /*0-7 butterfly.*/ |
70 | 0 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
71 | 0 | /*1-6 butterfly.*/ |
72 | 0 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
73 | 0 | /*2-5 butterfly.*/ |
74 | 0 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
75 | 0 | /*3-4 butterfly.*/ |
76 | 0 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
77 | 0 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
78 | 0 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
79 | 0 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
80 | 0 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
81 | 0 | } |
82 | | |
83 | | /*Performs an inverse 8 point Type-II DCT transform. |
84 | | The output is scaled by a factor of 2 relative to the orthonormal version of |
85 | | the transform. |
86 | | _y: The buffer to store the result in. |
87 | | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
88 | | block). |
89 | | _x: The input coefficients. |
90 | | Only the first 4 entries are used. |
91 | | The other 4 are assumed to be 0.*/ |
92 | 0 | static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
93 | 0 | ogg_int32_t t[8]; |
94 | 0 | ogg_int32_t r; |
95 | 0 | /*Stage 1:*/ |
96 | 0 | t[0]=OC_C4S4*_x[0]>>16; |
97 | 0 | t[2]=OC_C6S2*_x[2]>>16; |
98 | 0 | t[3]=OC_C2S6*_x[2]>>16; |
99 | 0 | t[4]=OC_C7S1*_x[1]>>16; |
100 | 0 | t[5]=-(OC_C5S3*_x[3]>>16); |
101 | 0 | t[6]=OC_C3S5*_x[3]>>16; |
102 | 0 | t[7]=OC_C1S7*_x[1]>>16; |
103 | 0 | /*Stage 2:*/ |
104 | 0 | r=t[4]+t[5]; |
105 | 0 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
106 | 0 | t[4]=r; |
107 | 0 | r=t[7]+t[6]; |
108 | 0 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
109 | 0 | t[7]=r; |
110 | 0 | /*Stage 3:*/ |
111 | 0 | t[1]=t[0]+t[2]; |
112 | 0 | t[2]=t[0]-t[2]; |
113 | 0 | r=t[0]+t[3]; |
114 | 0 | t[3]=t[0]-t[3]; |
115 | 0 | t[0]=r; |
116 | 0 | r=t[6]+t[5]; |
117 | 0 | t[5]=t[6]-t[5]; |
118 | 0 | t[6]=r; |
119 | 0 | /*Stage 4:*/ |
120 | 0 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
121 | 0 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
122 | 0 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
123 | 0 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
124 | 0 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
125 | 0 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
126 | 0 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
127 | 0 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
128 | 0 | } |
129 | | |
130 | | /*Performs an inverse 8 point Type-II DCT transform. |
131 | | The output is scaled by a factor of 2 relative to the orthonormal version of |
132 | | the transform. |
133 | | _y: The buffer to store the result in. |
134 | | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
135 | | block). |
136 | | _x: The input coefficients. |
137 | | Only the first 3 entries are used. |
138 | | The other 5 are assumed to be 0.*/ |
139 | 0 | static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
140 | 0 | ogg_int32_t t[8]; |
141 | 0 | ogg_int32_t r; |
142 | 0 | /*Stage 1:*/ |
143 | 0 | t[0]=OC_C4S4*_x[0]>>16; |
144 | 0 | t[2]=OC_C6S2*_x[2]>>16; |
145 | 0 | t[3]=OC_C2S6*_x[2]>>16; |
146 | 0 | t[4]=OC_C7S1*_x[1]>>16; |
147 | 0 | t[7]=OC_C1S7*_x[1]>>16; |
148 | 0 | /*Stage 2:*/ |
149 | 0 | t[5]=OC_C4S4*t[4]>>16; |
150 | 0 | t[6]=OC_C4S4*t[7]>>16; |
151 | 0 | /*Stage 3:*/ |
152 | 0 | t[1]=t[0]+t[2]; |
153 | 0 | t[2]=t[0]-t[2]; |
154 | 0 | r=t[0]+t[3]; |
155 | 0 | t[3]=t[0]-t[3]; |
156 | 0 | t[0]=r; |
157 | 0 | r=t[6]+t[5]; |
158 | 0 | t[5]=t[6]-t[5]; |
159 | 0 | t[6]=r; |
160 | 0 | /*Stage 4:*/ |
161 | 0 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
162 | 0 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
163 | 0 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
164 | 0 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
165 | 0 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
166 | 0 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
167 | 0 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
168 | 0 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
169 | 0 | } |
170 | | |
171 | | /*Performs an inverse 8 point Type-II DCT transform. |
172 | | The output is scaled by a factor of 2 relative to the orthonormal version of |
173 | | the transform. |
174 | | _y: The buffer to store the result in. |
175 | | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
176 | | block). |
177 | | _x: The input coefficients. |
178 | | Only the first 2 entries are used. |
179 | | The other 6 are assumed to be 0.*/ |
180 | 0 | static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
181 | 0 | ogg_int32_t t[8]; |
182 | 0 | ogg_int32_t r; |
183 | 0 | /*Stage 1:*/ |
184 | 0 | t[0]=OC_C4S4*_x[0]>>16; |
185 | 0 | t[4]=OC_C7S1*_x[1]>>16; |
186 | 0 | t[7]=OC_C1S7*_x[1]>>16; |
187 | 0 | /*Stage 2:*/ |
188 | 0 | t[5]=OC_C4S4*t[4]>>16; |
189 | 0 | t[6]=OC_C4S4*t[7]>>16; |
190 | 0 | /*Stage 3:*/ |
191 | 0 | r=t[6]+t[5]; |
192 | 0 | t[5]=t[6]-t[5]; |
193 | 0 | t[6]=r; |
194 | 0 | /*Stage 4:*/ |
195 | 0 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
196 | 0 | _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); |
197 | 0 | _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); |
198 | 0 | _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); |
199 | 0 | _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); |
200 | 0 | _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); |
201 | 0 | _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); |
202 | 0 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
203 | 0 | } |
204 | | |
205 | | /*Performs an inverse 8 point Type-II DCT transform. |
206 | | The output is scaled by a factor of 2 relative to the orthonormal version of |
207 | | the transform. |
208 | | _y: The buffer to store the result in. |
209 | | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
210 | | block). |
211 | | _x: The input coefficients. |
212 | | Only the first entry is used. |
213 | | The other 7 are assumed to be 0.*/ |
214 | 0 | static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ |
215 | 0 | _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= |
216 | 0 | _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); |
217 | 0 | } |
218 | | |
219 | | /*Performs an inverse 8x8 Type-II DCT transform. |
220 | | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
221 | | version of the transform. |
222 | | All coefficients but the first 3 in zig-zag scan order are assumed to be 0: |
223 | | x x 0 0 0 0 0 0 |
224 | | x 0 0 0 0 0 0 0 |
225 | | 0 0 0 0 0 0 0 0 |
226 | | 0 0 0 0 0 0 0 0 |
227 | | 0 0 0 0 0 0 0 0 |
228 | | 0 0 0 0 0 0 0 0 |
229 | | 0 0 0 0 0 0 0 0 |
230 | | 0 0 0 0 0 0 0 0 |
231 | | _y: The buffer to store the result in. |
232 | | This may be the same as _x. |
233 | | _x: The input coefficients.*/ |
234 | 0 | static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
235 | 0 | ogg_int16_t w[64]; |
236 | 0 | int i; |
237 | 0 | /*Transform rows of x into columns of w.*/ |
238 | 0 | idct8_2(w,_x); |
239 | 0 | idct8_1(w+1,_x+8); |
240 | 0 | /*Transform rows of w into columns of y.*/ |
241 | 0 | for(i=0;i<8;i++)idct8_2(_y+i,w+i*8); |
242 | 0 | /*Adjust for the scale factor.*/ |
243 | 0 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
244 | 0 | /*Clear input data for next block (decoder only).*/ |
245 | 0 | if(_x!=_y)_x[0]=_x[1]=_x[8]=0; |
246 | 0 | } |
247 | | |
248 | | /*Performs an inverse 8x8 Type-II DCT transform. |
249 | | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
250 | | version of the transform. |
251 | | All coefficients but the first 10 in zig-zag scan order are assumed to be 0: |
252 | | x x x x 0 0 0 0 |
253 | | x x x 0 0 0 0 0 |
254 | | x x 0 0 0 0 0 0 |
255 | | x 0 0 0 0 0 0 0 |
256 | | 0 0 0 0 0 0 0 0 |
257 | | 0 0 0 0 0 0 0 0 |
258 | | 0 0 0 0 0 0 0 0 |
259 | | 0 0 0 0 0 0 0 0 |
260 | | _y: The buffer to store the result in. |
261 | | This may be the same as _x. |
262 | | _x: The input coefficients.*/ |
263 | 0 | static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
264 | 0 | ogg_int16_t w[64]; |
265 | 0 | int i; |
266 | 0 | /*Transform rows of x into columns of w.*/ |
267 | 0 | idct8_4(w,_x); |
268 | 0 | idct8_3(w+1,_x+8); |
269 | 0 | idct8_2(w+2,_x+16); |
270 | 0 | idct8_1(w+3,_x+24); |
271 | 0 | /*Transform rows of w into columns of y.*/ |
272 | 0 | for(i=0;i<8;i++)idct8_4(_y+i,w+i*8); |
273 | 0 | /*Adjust for the scale factor.*/ |
274 | 0 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
275 | 0 | /*Clear input data for next block (decoder only).*/ |
276 | 0 | if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0; |
277 | 0 | } |
278 | | |
279 | | /*Performs an inverse 8x8 Type-II DCT transform. |
280 | | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
281 | | version of the transform. |
282 | | _y: The buffer to store the result in. |
283 | | This may be the same as _x. |
284 | | _x: The input coefficients.*/ |
285 | 0 | static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
286 | 0 | ogg_int16_t w[64]; |
287 | 0 | int i; |
288 | 0 | /*Transform rows of x into columns of w.*/ |
289 | 0 | for(i=0;i<8;i++)idct8(w+i,_x+i*8); |
290 | 0 | /*Transform rows of w into columns of y.*/ |
291 | 0 | for(i=0;i<8;i++)idct8(_y+i,w+i*8); |
292 | 0 | /*Adjust for the scale factor.*/ |
293 | 0 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
294 | 0 | if(_x!=_y)for(i=0;i<64;i++)_x[i]=0; |
295 | 0 | } |
296 | | |
297 | | /*Performs an inverse 8x8 Type-II DCT transform. |
298 | | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
299 | | version of the transform.*/ |
300 | 0 | void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ |
301 | 0 | /*_last_zzi is subtly different from an actual count of the number of |
302 | 0 | coefficients we decoded for this block. |
303 | 0 | It contains the value of zzi BEFORE the final token in the block was |
304 | 0 | decoded. |
305 | 0 | In most cases this is an EOB token (the continuation of an EOB run from a |
306 | 0 | previous block counts), and so this is the same as the coefficient count. |
307 | 0 | However, in the case that the last token was NOT an EOB token, but filled |
308 | 0 | the block up with exactly 64 coefficients, _last_zzi will be less than 64. |
309 | 0 | Provided the last token was not a pure zero run, the minimum value it can |
310 | 0 | be is 46, and so that doesn't affect any of the cases in this routine. |
311 | 0 | However, if the last token WAS a pure zero run of length 63, then _last_zzi |
312 | 0 | will be 1 while the number of coefficients decoded is 64. |
313 | 0 | Thus, we will trigger the following special case, where the real |
314 | 0 | coefficient count would not. |
315 | 0 | Note also that a zero run of length 64 will give _last_zzi a value of 0, |
316 | 0 | but we still process the DC coefficient, which might have a non-zero value |
317 | 0 | due to DC prediction. |
318 | 0 | Although convoluted, this is arguably the correct behavior: it allows us to |
319 | 0 | use a smaller transform when the block ends with a long zero run instead |
320 | 0 | of a normal EOB token. |
321 | 0 | It could be smarter... multiple separate zero runs at the end of a block |
322 | 0 | will fool it, but an encoder that generates these really deserves what it |
323 | 0 | gets. |
324 | 0 | Needless to say we inherited this approach from VP3.*/ |
325 | 0 | /*Then perform the iDCT.*/ |
326 | 0 | if(_last_zzi<=3)oc_idct8x8_3(_y,_x); |
327 | 0 | else if(_last_zzi<=10)oc_idct8x8_10(_y,_x); |
328 | 0 | else oc_idct8x8_slow(_y,_x); |
329 | 0 | } |