/src/libjpeg-turbo.main/src/jidctred.c
Line | Count | Source |
1 | | /* |
2 | | * jidctred.c |
3 | | * |
4 | | * This file was part of the Independent JPEG Group's software: |
5 | | * Copyright (C) 1994-1998, Thomas G. Lane. |
6 | | * libjpeg-turbo Modifications: |
7 | | * Copyright (C) 2015, 2022, 2026, D. R. Commander. |
8 | | * For conditions of distribution and use, see the accompanying README.ijg |
9 | | * file. |
10 | | * |
11 | | * This file contains inverse-DCT routines that produce reduced-size output: |
12 | | * either 4x4, 2x2, or 1x1 samples from an 8x8 DCT block. |
13 | | * |
14 | | * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M) |
15 | | * algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step |
16 | | * with an 8-to-4 step that produces the four averages of two adjacent outputs |
17 | | * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output). |
18 | | * These steps were derived by computing the corresponding values at the end |
19 | | * of the normal LL&M code, then simplifying as much as possible. |
20 | | * |
21 | | * 1x1 is trivial: just take the DC coefficient divided by 8. |
22 | | * |
23 | | * See jidctint.c for additional comments. |
24 | | */ |
25 | | |
26 | | #define JPEG_INTERNALS |
27 | | #include "jinclude.h" |
28 | | #include "jpeglib.h" |
29 | | #include "jdct.h" /* Private declarations for DCT subsystem */ |
30 | | |
31 | | #ifdef IDCT_SCALING_SUPPORTED |
32 | | |
33 | | |
34 | | /* |
35 | | * This module is specialized to the case DCTSIZE = 8. |
36 | | */ |
37 | | |
38 | | #if DCTSIZE != 8 |
39 | | Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
40 | | #endif |
41 | | |
42 | | |
43 | | /* Scaling is the same as in jidctint.c. */ |
44 | | |
45 | | #if BITS_IN_JSAMPLE == 8 |
46 | | #define CONST_BITS 13 |
47 | | #define PASS1_BITS 2 |
48 | | #else |
49 | | #define CONST_BITS 13 |
50 | | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
51 | | #endif |
52 | | |
53 | | /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus |
54 | | * causing a lot of useless floating-point operations at run time. |
55 | | * To get around this we use the following pre-calculated constants. |
56 | | * If you change CONST_BITS you may want to add appropriate values. |
57 | | * (With a reasonable C compiler, you can just rely on the FIX() macro...) |
58 | | */ |
59 | | |
60 | | #if CONST_BITS == 13 |
61 | | #define FIX_0_211164243 ((JLONG)1730) /* FIX(0.211164243) */ |
62 | | #define FIX_0_509795579 ((JLONG)4176) /* FIX(0.509795579) */ |
63 | | #define FIX_0_601344887 ((JLONG)4926) /* FIX(0.601344887) */ |
64 | | #define FIX_0_720959822 ((JLONG)5906) /* FIX(0.720959822) */ |
65 | | #define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */ |
66 | | #define FIX_0_850430095 ((JLONG)6967) /* FIX(0.850430095) */ |
67 | | #define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */ |
68 | | #define FIX_1_061594337 ((JLONG)8697) /* FIX(1.061594337) */ |
69 | | #define FIX_1_272758580 ((JLONG)10426) /* FIX(1.272758580) */ |
70 | | #define FIX_1_451774981 ((JLONG)11893) /* FIX(1.451774981) */ |
71 | | #define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */ |
72 | | #define FIX_2_172734803 ((JLONG)17799) /* FIX(2.172734803) */ |
73 | | #define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */ |
74 | | #define FIX_3_624509785 ((JLONG)29692) /* FIX(3.624509785) */ |
75 | | #else |
76 | | #define FIX_0_211164243 FIX(0.211164243) |
77 | | #define FIX_0_509795579 FIX(0.509795579) |
78 | | #define FIX_0_601344887 FIX(0.601344887) |
79 | | #define FIX_0_720959822 FIX(0.720959822) |
80 | | #define FIX_0_765366865 FIX(0.765366865) |
81 | | #define FIX_0_850430095 FIX(0.850430095) |
82 | | #define FIX_0_899976223 FIX(0.899976223) |
83 | | #define FIX_1_061594337 FIX(1.061594337) |
84 | | #define FIX_1_272758580 FIX(1.272758580) |
85 | | #define FIX_1_451774981 FIX(1.451774981) |
86 | | #define FIX_1_847759065 FIX(1.847759065) |
87 | | #define FIX_2_172734803 FIX(2.172734803) |
88 | | #define FIX_2_562915447 FIX(2.562915447) |
89 | | #define FIX_3_624509785 FIX(3.624509785) |
90 | | #endif |
91 | | |
92 | | |
93 | | /* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. |
94 | | * For 8-bit samples with the recommended scaling, all the variable |
95 | | * and constant values involved are no more than 16 bits wide, so a |
96 | | * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. |
97 | | * For 12-bit samples, a full 32-bit multiplication will be needed. |
98 | | */ |
99 | | |
100 | | #if BITS_IN_JSAMPLE == 8 |
101 | 0 | #define MULTIPLY(var, const) MULTIPLY16C16(var, const) |
102 | | #else |
103 | 114M | #define MULTIPLY(var, const) ((var) * (const)) |
104 | | #endif |
105 | | |
106 | | |
107 | | /* When decompressing an 8-bit-per-sample lossy JPEG image, we allow the caller |
108 | | * to request 12-bit-per-sample output in order to facilitate shadow recovery |
109 | | * in underexposed images. This is accomplished by using the 12-bit-per-sample |
110 | | * decompression pipeline and multiplying the DCT coefficients from the |
111 | | * 8-bit-per-sample JPEG image by 16 (the equivalent of left shifting by 4 |
112 | | * bits.) |
113 | | */ |
114 | | |
115 | | #if BITS_IN_JSAMPLE == 12 |
116 | | #define SCALING_FACTOR \ |
117 | 16.3M | JLONG scaling_factor = (cinfo->master->jpeg_data_precision == 8 && \ |
118 | 16.3M | cinfo->data_precision == 12 ? 16 : 1); |
119 | | #else |
120 | | #define SCALING_FACTOR |
121 | | #endif |
122 | | |
123 | | |
124 | | /* Dequantize a coefficient by multiplying it by the multiplier-table |
125 | | * entry; produce an int result. In this module, both inputs and result |
126 | | * are 16 bits or less, so either int or short multiply will work. |
127 | | */ |
128 | | |
129 | | #if BITS_IN_JSAMPLE == 8 |
130 | 16.8M | #define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval)) |
131 | | #else |
132 | | #define DEQUANTIZE(coef, quantval) \ |
133 | 37.4M | (((ISLOW_MULT_TYPE)(coef)) * (quantval) * scaling_factor) |
134 | | #endif |
135 | | |
136 | | |
137 | | /* |
138 | | * Perform dequantization and inverse DCT on one block of coefficients, |
139 | | * producing a reduced-size 4x4 output block. |
140 | | */ |
141 | | |
142 | | GLOBAL(void) |
143 | | _jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
144 | | JCOEFPTR coef_block, _JSAMPARRAY output_buf, |
145 | | JDIMENSION output_col) |
146 | 6.22M | { |
147 | 6.22M | JLONG tmp0, tmp2, tmp10, tmp12; |
148 | 6.22M | JLONG z1, z2, z3, z4; |
149 | 6.22M | JCOEFPTR inptr; |
150 | 6.22M | ISLOW_MULT_TYPE *quantptr; |
151 | 6.22M | int *wsptr; |
152 | 6.22M | _JSAMPROW outptr; |
153 | 6.22M | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
154 | 6.22M | int ctr; |
155 | 6.22M | int workspace[DCTSIZE * 4]; /* buffers data between passes */ |
156 | | SHIFT_TEMPS |
157 | 6.22M | SCALING_FACTOR |
158 | | |
159 | | /* Pass 1: process columns from input, store into work array. */ |
160 | | |
161 | 6.22M | inptr = coef_block; |
162 | 6.22M | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; |
163 | 6.22M | wsptr = workspace; |
164 | 55.9M | for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { |
165 | | /* Don't bother to process column 4, because second pass won't use it */ |
166 | 49.7M | if (ctr == DCTSIZE - 4) |
167 | 6.22M | continue; |
168 | 43.5M | if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && |
169 | 39.7M | inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 5] == 0 && |
170 | 39.6M | inptr[DCTSIZE * 6] == 0 && inptr[DCTSIZE * 7] == 0) { |
171 | | /* AC terms all zero; we need not examine term 4 for 4x4 output */ |
172 | 39.6M | int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], |
173 | 39.6M | quantptr[DCTSIZE * 0]), PASS1_BITS); |
174 | | |
175 | 39.6M | wsptr[DCTSIZE * 0] = dcval; |
176 | 39.6M | wsptr[DCTSIZE * 1] = dcval; |
177 | 39.6M | wsptr[DCTSIZE * 2] = dcval; |
178 | 39.6M | wsptr[DCTSIZE * 3] = dcval; |
179 | | |
180 | 39.6M | continue; |
181 | 39.6M | } |
182 | | |
183 | | /* Even part */ |
184 | | |
185 | 3.91M | tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); |
186 | 3.91M | tmp0 = LEFT_SHIFT(tmp0, CONST_BITS + 1); |
187 | | |
188 | 3.91M | z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); |
189 | 3.91M | z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); |
190 | | |
191 | 3.91M | tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, -FIX_0_765366865); |
192 | | |
193 | 3.91M | tmp10 = tmp0 + tmp2; |
194 | 3.91M | tmp12 = tmp0 - tmp2; |
195 | | |
196 | | /* Odd part */ |
197 | | |
198 | 3.91M | z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); |
199 | 3.91M | z2 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); |
200 | 3.91M | z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); |
201 | 3.91M | z4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); |
202 | | |
203 | 3.91M | tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ |
204 | 3.91M | MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ |
205 | 3.91M | MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ |
206 | 3.91M | MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ |
207 | | |
208 | 3.91M | tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ |
209 | 3.91M | MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ |
210 | 3.91M | MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ |
211 | 3.91M | MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ |
212 | | |
213 | | /* Final output stage */ |
214 | | |
215 | 3.91M | wsptr[DCTSIZE * 0] = |
216 | 3.91M | (int)DESCALE(tmp10 + tmp2, CONST_BITS - PASS1_BITS + 1); |
217 | 3.91M | wsptr[DCTSIZE * 3] = |
218 | 3.91M | (int)DESCALE(tmp10 - tmp2, CONST_BITS - PASS1_BITS + 1); |
219 | 3.91M | wsptr[DCTSIZE * 1] = |
220 | 3.91M | (int)DESCALE(tmp12 + tmp0, CONST_BITS - PASS1_BITS + 1); |
221 | 3.91M | wsptr[DCTSIZE * 2] = |
222 | 3.91M | (int)DESCALE(tmp12 - tmp0, CONST_BITS - PASS1_BITS + 1); |
223 | 3.91M | } |
224 | | |
225 | | /* Pass 2: process 4 rows from work array, store into output array. */ |
226 | | |
227 | 6.22M | wsptr = workspace; |
228 | 31.1M | for (ctr = 0; ctr < 4; ctr++) { |
229 | 24.8M | outptr = output_buf[ctr] + output_col; |
230 | | /* It's not clear whether a zero row test is worthwhile here ... */ |
231 | | |
232 | 24.8M | #ifndef NO_ZERO_ROW_TEST |
233 | 24.8M | if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && |
234 | 17.6M | wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { |
235 | | /* AC terms all zero */ |
236 | 17.4M | _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], |
237 | 17.4M | PASS1_BITS + 3) & RANGE_MASK]; |
238 | | |
239 | 17.4M | outptr[0] = dcval; |
240 | 17.4M | outptr[1] = dcval; |
241 | 17.4M | outptr[2] = dcval; |
242 | 17.4M | outptr[3] = dcval; |
243 | | |
244 | 17.4M | wsptr += DCTSIZE; /* advance pointer to next row */ |
245 | 17.4M | continue; |
246 | 17.4M | } |
247 | 7.46M | #endif |
248 | | |
249 | | /* Even part */ |
250 | | |
251 | 7.46M | tmp0 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 1); |
252 | | |
253 | 7.46M | tmp2 = MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + |
254 | 7.46M | MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865); |
255 | | |
256 | 7.46M | tmp10 = tmp0 + tmp2; |
257 | 7.46M | tmp12 = tmp0 - tmp2; |
258 | | |
259 | | /* Odd part */ |
260 | | |
261 | 7.46M | z1 = (JLONG)wsptr[7]; |
262 | 7.46M | z2 = (JLONG)wsptr[5]; |
263 | 7.46M | z3 = (JLONG)wsptr[3]; |
264 | 7.46M | z4 = (JLONG)wsptr[1]; |
265 | | |
266 | 7.46M | tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ |
267 | 7.46M | MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ |
268 | 7.46M | MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ |
269 | 7.46M | MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ |
270 | | |
271 | 7.46M | tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ |
272 | 7.46M | MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ |
273 | 7.46M | MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ |
274 | 7.46M | MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ |
275 | | |
276 | | /* Final output stage */ |
277 | | |
278 | 7.46M | outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp2, |
279 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & |
280 | 7.46M | RANGE_MASK]; |
281 | 7.46M | outptr[3] = range_limit[(int)DESCALE(tmp10 - tmp2, |
282 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & |
283 | 7.46M | RANGE_MASK]; |
284 | 7.46M | outptr[1] = range_limit[(int)DESCALE(tmp12 + tmp0, |
285 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & |
286 | 7.46M | RANGE_MASK]; |
287 | 7.46M | outptr[2] = range_limit[(int)DESCALE(tmp12 - tmp0, |
288 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & |
289 | 7.46M | RANGE_MASK]; |
290 | | |
291 | 7.46M | wsptr += DCTSIZE; /* advance pointer to next row */ |
292 | 7.46M | } |
293 | 6.22M | } Unexecuted instantiation: jpeg_idct_4x4 Line | Count | Source | 146 | 6.22M | { | 147 | 6.22M | JLONG tmp0, tmp2, tmp10, tmp12; | 148 | 6.22M | JLONG z1, z2, z3, z4; | 149 | 6.22M | JCOEFPTR inptr; | 150 | 6.22M | ISLOW_MULT_TYPE *quantptr; | 151 | 6.22M | int *wsptr; | 152 | 6.22M | _JSAMPROW outptr; | 153 | 6.22M | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); | 154 | 6.22M | int ctr; | 155 | 6.22M | int workspace[DCTSIZE * 4]; /* buffers data between passes */ | 156 | 6.22M | SHIFT_TEMPS | 157 | 6.22M | SCALING_FACTOR | 158 | | | 159 | | /* Pass 1: process columns from input, store into work array. */ | 160 | | | 161 | 6.22M | inptr = coef_block; | 162 | 6.22M | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; | 163 | 6.22M | wsptr = workspace; | 164 | 55.9M | for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { | 165 | | /* Don't bother to process column 4, because second pass won't use it */ | 166 | 49.7M | if (ctr == DCTSIZE - 4) | 167 | 6.22M | continue; | 168 | 43.5M | if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && | 169 | 39.7M | inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 5] == 0 && | 170 | 39.6M | inptr[DCTSIZE * 6] == 0 && inptr[DCTSIZE * 7] == 0) { | 171 | | /* AC terms all zero; we need not examine term 4 for 4x4 output */ | 172 | 39.6M | int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], | 173 | 39.6M | quantptr[DCTSIZE * 0]), PASS1_BITS); | 174 | | | 175 | 39.6M | wsptr[DCTSIZE * 0] = dcval; | 176 | 39.6M | wsptr[DCTSIZE * 1] = dcval; | 177 | 39.6M | wsptr[DCTSIZE * 2] = dcval; | 178 | 39.6M | wsptr[DCTSIZE * 3] = dcval; | 179 | | | 180 | 39.6M | continue; | 181 | 39.6M | } | 182 | | | 183 | | /* Even part */ | 184 | | | 185 | 3.91M | tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); | 186 | 3.91M | tmp0 = LEFT_SHIFT(tmp0, CONST_BITS + 1); | 187 | | | 188 | 3.91M | z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); | 189 | 3.91M | z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); | 190 | | | 191 | 3.91M | tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, -FIX_0_765366865); | 192 | | | 193 | 3.91M | tmp10 = tmp0 + tmp2; | 194 | 3.91M | tmp12 = tmp0 - tmp2; | 195 | | | 196 | | /* Odd part */ | 197 | | | 198 | 3.91M | z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); | 199 | 3.91M | z2 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); | 200 | 3.91M | z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); | 201 | 3.91M | z4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); | 202 | | | 203 | 3.91M | tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ | 204 | 3.91M | MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ | 205 | 3.91M | MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ | 206 | 3.91M | MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ | 207 | | | 208 | 3.91M | tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ | 209 | 3.91M | MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ | 210 | 3.91M | MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ | 211 | 3.91M | MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ | 212 | | | 213 | | /* Final output stage */ | 214 | | | 215 | 3.91M | wsptr[DCTSIZE * 0] = | 216 | 3.91M | (int)DESCALE(tmp10 + tmp2, CONST_BITS - PASS1_BITS + 1); | 217 | 3.91M | wsptr[DCTSIZE * 3] = | 218 | 3.91M | (int)DESCALE(tmp10 - tmp2, CONST_BITS - PASS1_BITS + 1); | 219 | 3.91M | wsptr[DCTSIZE * 1] = | 220 | 3.91M | (int)DESCALE(tmp12 + tmp0, CONST_BITS - PASS1_BITS + 1); | 221 | 3.91M | wsptr[DCTSIZE * 2] = | 222 | 3.91M | (int)DESCALE(tmp12 - tmp0, CONST_BITS - PASS1_BITS + 1); | 223 | 3.91M | } | 224 | | | 225 | | /* Pass 2: process 4 rows from work array, store into output array. */ | 226 | | | 227 | 6.22M | wsptr = workspace; | 228 | 31.1M | for (ctr = 0; ctr < 4; ctr++) { | 229 | 24.8M | outptr = output_buf[ctr] + output_col; | 230 | | /* It's not clear whether a zero row test is worthwhile here ... */ | 231 | | | 232 | 24.8M | #ifndef NO_ZERO_ROW_TEST | 233 | 24.8M | if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && | 234 | 17.6M | wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { | 235 | | /* AC terms all zero */ | 236 | 17.4M | _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], | 237 | 17.4M | PASS1_BITS + 3) & RANGE_MASK]; | 238 | | | 239 | 17.4M | outptr[0] = dcval; | 240 | 17.4M | outptr[1] = dcval; | 241 | 17.4M | outptr[2] = dcval; | 242 | 17.4M | outptr[3] = dcval; | 243 | | | 244 | 17.4M | wsptr += DCTSIZE; /* advance pointer to next row */ | 245 | 17.4M | continue; | 246 | 17.4M | } | 247 | 7.46M | #endif | 248 | | | 249 | | /* Even part */ | 250 | | | 251 | 7.46M | tmp0 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 1); | 252 | | | 253 | 7.46M | tmp2 = MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + | 254 | 7.46M | MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865); | 255 | | | 256 | 7.46M | tmp10 = tmp0 + tmp2; | 257 | 7.46M | tmp12 = tmp0 - tmp2; | 258 | | | 259 | | /* Odd part */ | 260 | | | 261 | 7.46M | z1 = (JLONG)wsptr[7]; | 262 | 7.46M | z2 = (JLONG)wsptr[5]; | 263 | 7.46M | z3 = (JLONG)wsptr[3]; | 264 | 7.46M | z4 = (JLONG)wsptr[1]; | 265 | | | 266 | 7.46M | tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ | 267 | 7.46M | MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ | 268 | 7.46M | MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ | 269 | 7.46M | MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ | 270 | | | 271 | 7.46M | tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ | 272 | 7.46M | MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ | 273 | 7.46M | MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ | 274 | 7.46M | MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ | 275 | | | 276 | | /* Final output stage */ | 277 | | | 278 | 7.46M | outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp2, | 279 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & | 280 | 7.46M | RANGE_MASK]; | 281 | 7.46M | outptr[3] = range_limit[(int)DESCALE(tmp10 - tmp2, | 282 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & | 283 | 7.46M | RANGE_MASK]; | 284 | 7.46M | outptr[1] = range_limit[(int)DESCALE(tmp12 + tmp0, | 285 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & | 286 | 7.46M | RANGE_MASK]; | 287 | 7.46M | outptr[2] = range_limit[(int)DESCALE(tmp12 - tmp0, | 288 | 7.46M | CONST_BITS + PASS1_BITS + 3 + 1) & | 289 | 7.46M | RANGE_MASK]; | 290 | | | 291 | 7.46M | wsptr += DCTSIZE; /* advance pointer to next row */ | 292 | 7.46M | } | 293 | 6.22M | } |
|
294 | | |
295 | | |
296 | | /* |
297 | | * Perform dequantization and inverse DCT on one block of coefficients, |
298 | | * producing a reduced-size 2x2 output block. |
299 | | */ |
300 | | |
301 | | GLOBAL(void) |
302 | | _jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
303 | | JCOEFPTR coef_block, _JSAMPARRAY output_buf, |
304 | | JDIMENSION output_col) |
305 | 578k | { |
306 | 578k | JLONG tmp0, tmp10, z1; |
307 | 578k | JCOEFPTR inptr; |
308 | 578k | ISLOW_MULT_TYPE *quantptr; |
309 | 578k | int *wsptr; |
310 | 578k | _JSAMPROW outptr; |
311 | 578k | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
312 | 578k | int ctr; |
313 | 578k | int workspace[DCTSIZE * 2]; /* buffers data between passes */ |
314 | | SHIFT_TEMPS |
315 | 578k | SCALING_FACTOR |
316 | | |
317 | | /* Pass 1: process columns from input, store into work array. */ |
318 | | |
319 | 578k | inptr = coef_block; |
320 | 578k | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; |
321 | 578k | wsptr = workspace; |
322 | 5.20M | for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { |
323 | | /* Don't bother to process columns 2,4,6 */ |
324 | 4.62M | if (ctr == DCTSIZE - 2 || ctr == DCTSIZE - 4 || ctr == DCTSIZE - 6) |
325 | 1.73M | continue; |
326 | 2.89M | if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 3] == 0 && |
327 | 2.79M | inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 7] == 0) { |
328 | | /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ |
329 | 2.79M | int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], |
330 | 2.79M | quantptr[DCTSIZE * 0]), PASS1_BITS); |
331 | | |
332 | 2.79M | wsptr[DCTSIZE * 0] = dcval; |
333 | 2.79M | wsptr[DCTSIZE * 1] = dcval; |
334 | | |
335 | 2.79M | continue; |
336 | 2.79M | } |
337 | | |
338 | | /* Even part */ |
339 | | |
340 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); |
341 | 95.1k | tmp10 = LEFT_SHIFT(z1, CONST_BITS + 2); |
342 | | |
343 | | /* Odd part */ |
344 | | |
345 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); |
346 | 95.1k | tmp0 = MULTIPLY(z1, -FIX_0_720959822); /* sqrt(2) * ( c7-c5+c3-c1) */ |
347 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); |
348 | 95.1k | tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ |
349 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); |
350 | 95.1k | tmp0 += MULTIPLY(z1, -FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ |
351 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); |
352 | 95.1k | tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ |
353 | | |
354 | | /* Final output stage */ |
355 | | |
356 | 95.1k | wsptr[DCTSIZE * 0] = |
357 | 95.1k | (int)DESCALE(tmp10 + tmp0, CONST_BITS - PASS1_BITS + 2); |
358 | 95.1k | wsptr[DCTSIZE * 1] = |
359 | 95.1k | (int)DESCALE(tmp10 - tmp0, CONST_BITS - PASS1_BITS + 2); |
360 | 95.1k | } |
361 | | |
362 | | /* Pass 2: process 2 rows from work array, store into output array. */ |
363 | | |
364 | 578k | wsptr = workspace; |
365 | 1.73M | for (ctr = 0; ctr < 2; ctr++) { |
366 | 1.15M | outptr = output_buf[ctr] + output_col; |
367 | | /* It's not clear whether a zero row test is worthwhile here ... */ |
368 | | |
369 | 1.15M | #ifndef NO_ZERO_ROW_TEST |
370 | 1.15M | if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { |
371 | | /* AC terms all zero */ |
372 | 1.01M | _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], |
373 | 1.01M | PASS1_BITS + 3) & RANGE_MASK]; |
374 | | |
375 | 1.01M | outptr[0] = dcval; |
376 | 1.01M | outptr[1] = dcval; |
377 | | |
378 | 1.01M | wsptr += DCTSIZE; /* advance pointer to next row */ |
379 | 1.01M | continue; |
380 | 1.01M | } |
381 | 146k | #endif |
382 | | |
383 | | /* Even part */ |
384 | | |
385 | 146k | tmp10 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 2); |
386 | | |
387 | | /* Odd part */ |
388 | | |
389 | 146k | tmp0 = MULTIPLY((JLONG)wsptr[7], -FIX_0_720959822) + /* sqrt(2) * ( c7-c5+c3-c1) */ |
390 | 146k | MULTIPLY((JLONG)wsptr[5], FIX_0_850430095) + /* sqrt(2) * (-c1+c3+c5+c7) */ |
391 | 146k | MULTIPLY((JLONG)wsptr[3], -FIX_1_272758580) + /* sqrt(2) * (-c1+c3-c5-c7) */ |
392 | 146k | MULTIPLY((JLONG)wsptr[1], FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ |
393 | | |
394 | | /* Final output stage */ |
395 | | |
396 | 146k | outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp0, |
397 | 146k | CONST_BITS + PASS1_BITS + 3 + 2) & |
398 | 146k | RANGE_MASK]; |
399 | 146k | outptr[1] = range_limit[(int)DESCALE(tmp10 - tmp0, |
400 | 146k | CONST_BITS + PASS1_BITS + 3 + 2) & |
401 | 146k | RANGE_MASK]; |
402 | | |
403 | 146k | wsptr += DCTSIZE; /* advance pointer to next row */ |
404 | 146k | } |
405 | 578k | } Unexecuted instantiation: jpeg_idct_2x2 Line | Count | Source | 305 | 578k | { | 306 | 578k | JLONG tmp0, tmp10, z1; | 307 | 578k | JCOEFPTR inptr; | 308 | 578k | ISLOW_MULT_TYPE *quantptr; | 309 | 578k | int *wsptr; | 310 | 578k | _JSAMPROW outptr; | 311 | 578k | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); | 312 | 578k | int ctr; | 313 | 578k | int workspace[DCTSIZE * 2]; /* buffers data between passes */ | 314 | 578k | SHIFT_TEMPS | 315 | 578k | SCALING_FACTOR | 316 | | | 317 | | /* Pass 1: process columns from input, store into work array. */ | 318 | | | 319 | 578k | inptr = coef_block; | 320 | 578k | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; | 321 | 578k | wsptr = workspace; | 322 | 5.20M | for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { | 323 | | /* Don't bother to process columns 2,4,6 */ | 324 | 4.62M | if (ctr == DCTSIZE - 2 || ctr == DCTSIZE - 4 || ctr == DCTSIZE - 6) | 325 | 1.73M | continue; | 326 | 2.89M | if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 3] == 0 && | 327 | 2.79M | inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 7] == 0) { | 328 | | /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ | 329 | 2.79M | int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], | 330 | 2.79M | quantptr[DCTSIZE * 0]), PASS1_BITS); | 331 | | | 332 | 2.79M | wsptr[DCTSIZE * 0] = dcval; | 333 | 2.79M | wsptr[DCTSIZE * 1] = dcval; | 334 | | | 335 | 2.79M | continue; | 336 | 2.79M | } | 337 | | | 338 | | /* Even part */ | 339 | | | 340 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); | 341 | 95.1k | tmp10 = LEFT_SHIFT(z1, CONST_BITS + 2); | 342 | | | 343 | | /* Odd part */ | 344 | | | 345 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); | 346 | 95.1k | tmp0 = MULTIPLY(z1, -FIX_0_720959822); /* sqrt(2) * ( c7-c5+c3-c1) */ | 347 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); | 348 | 95.1k | tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ | 349 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); | 350 | 95.1k | tmp0 += MULTIPLY(z1, -FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ | 351 | 95.1k | z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); | 352 | 95.1k | tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ | 353 | | | 354 | | /* Final output stage */ | 355 | | | 356 | 95.1k | wsptr[DCTSIZE * 0] = | 357 | 95.1k | (int)DESCALE(tmp10 + tmp0, CONST_BITS - PASS1_BITS + 2); | 358 | 95.1k | wsptr[DCTSIZE * 1] = | 359 | 95.1k | (int)DESCALE(tmp10 - tmp0, CONST_BITS - PASS1_BITS + 2); | 360 | 95.1k | } | 361 | | | 362 | | /* Pass 2: process 2 rows from work array, store into output array. */ | 363 | | | 364 | 578k | wsptr = workspace; | 365 | 1.73M | for (ctr = 0; ctr < 2; ctr++) { | 366 | 1.15M | outptr = output_buf[ctr] + output_col; | 367 | | /* It's not clear whether a zero row test is worthwhile here ... */ | 368 | | | 369 | 1.15M | #ifndef NO_ZERO_ROW_TEST | 370 | 1.15M | if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { | 371 | | /* AC terms all zero */ | 372 | 1.01M | _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], | 373 | 1.01M | PASS1_BITS + 3) & RANGE_MASK]; | 374 | | | 375 | 1.01M | outptr[0] = dcval; | 376 | 1.01M | outptr[1] = dcval; | 377 | | | 378 | 1.01M | wsptr += DCTSIZE; /* advance pointer to next row */ | 379 | 1.01M | continue; | 380 | 1.01M | } | 381 | 146k | #endif | 382 | | | 383 | | /* Even part */ | 384 | | | 385 | 146k | tmp10 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 2); | 386 | | | 387 | | /* Odd part */ | 388 | | | 389 | 146k | tmp0 = MULTIPLY((JLONG)wsptr[7], -FIX_0_720959822) + /* sqrt(2) * ( c7-c5+c3-c1) */ | 390 | 146k | MULTIPLY((JLONG)wsptr[5], FIX_0_850430095) + /* sqrt(2) * (-c1+c3+c5+c7) */ | 391 | 146k | MULTIPLY((JLONG)wsptr[3], -FIX_1_272758580) + /* sqrt(2) * (-c1+c3-c5-c7) */ | 392 | 146k | MULTIPLY((JLONG)wsptr[1], FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ | 393 | | | 394 | | /* Final output stage */ | 395 | | | 396 | 146k | outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp0, | 397 | 146k | CONST_BITS + PASS1_BITS + 3 + 2) & | 398 | 146k | RANGE_MASK]; | 399 | 146k | outptr[1] = range_limit[(int)DESCALE(tmp10 - tmp0, | 400 | 146k | CONST_BITS + PASS1_BITS + 3 + 2) & | 401 | 146k | RANGE_MASK]; | 402 | | | 403 | 146k | wsptr += DCTSIZE; /* advance pointer to next row */ | 404 | 146k | } | 405 | 578k | } |
|
406 | | |
407 | | |
408 | | /* |
409 | | * Perform dequantization and inverse DCT on one block of coefficients, |
410 | | * producing a reduced-size 1x1 output block. |
411 | | */ |
412 | | |
413 | | GLOBAL(void) |
414 | | _jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
415 | | JCOEFPTR coef_block, _JSAMPARRAY output_buf, |
416 | | JDIMENSION output_col) |
417 | 26.3M | { |
418 | 26.3M | int dcval; |
419 | 26.3M | ISLOW_MULT_TYPE *quantptr; |
420 | 26.3M | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
421 | 26.3M | SHIFT_TEMPS |
422 | 26.3M | SCALING_FACTOR |
423 | | |
424 | | /* We hardly need an inverse DCT routine for this: just take the |
425 | | * average sample value, which is one-eighth of the DC coefficient. |
426 | | */ |
427 | 26.3M | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; |
428 | 26.3M | dcval = DEQUANTIZE(coef_block[0], quantptr[0]); |
429 | 26.3M | dcval = (int)DESCALE((JLONG)dcval, 3); |
430 | | |
431 | 26.3M | output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; |
432 | 26.3M | } Line | Count | Source | 417 | 16.8M | { | 418 | 16.8M | int dcval; | 419 | 16.8M | ISLOW_MULT_TYPE *quantptr; | 420 | 16.8M | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); | 421 | 16.8M | SHIFT_TEMPS | 422 | 16.8M | SCALING_FACTOR | 423 | | | 424 | | /* We hardly need an inverse DCT routine for this: just take the | 425 | | * average sample value, which is one-eighth of the DC coefficient. | 426 | | */ | 427 | 16.8M | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; | 428 | 16.8M | dcval = DEQUANTIZE(coef_block[0], quantptr[0]); | 429 | 16.8M | dcval = (int)DESCALE((JLONG)dcval, 3); | 430 | | | 431 | 16.8M | output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; | 432 | 16.8M | } |
Line | Count | Source | 417 | 9.56M | { | 418 | 9.56M | int dcval; | 419 | 9.56M | ISLOW_MULT_TYPE *quantptr; | 420 | 9.56M | _JSAMPLE *range_limit = IDCT_range_limit(cinfo); | 421 | 9.56M | SHIFT_TEMPS | 422 | 9.56M | SCALING_FACTOR | 423 | | | 424 | | /* We hardly need an inverse DCT routine for this: just take the | 425 | | * average sample value, which is one-eighth of the DC coefficient. | 426 | | */ | 427 | 9.56M | quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; | 428 | 9.56M | dcval = DEQUANTIZE(coef_block[0], quantptr[0]); | 429 | 9.56M | dcval = (int)DESCALE((JLONG)dcval, 3); | 430 | | | 431 | 9.56M | output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; | 432 | 9.56M | } |
|
433 | | |
434 | | #endif /* IDCT_SCALING_SUPPORTED */ |