Coverage Report

Created: 2026-04-01 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/obj/jidctint.c
Line
Count
Source
1
/*
2
 * jidctint.c
3
 *
4
 * Copyright (C) 1991-1998, Thomas G. Lane.
5
 * Modification developed 2002-2026 by Guido Vollbeding.
6
 * This file is part of the Independent JPEG Group's software.
7
 * For conditions of distribution and use, see the accompanying README file.
8
 *
9
 * This file contains a slow-but-accurate integer implementation of the
10
 * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
11
 * must also perform dequantization of the input coefficients.
12
 *
13
 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14
 * on each row (or vice versa, but it's more convenient to emit a row at
15
 * a time).  Direct algorithms are also available, but they are much more
16
 * complex and seem not to be any faster when reduced to code.
17
 *
18
 * This implementation is based on an algorithm described in
19
 *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20
 *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21
 *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22
 * The primary algorithm described there uses 11 multiplies and 29 adds.
23
 * We use their alternate method with 12 multiplies and 32 adds.
24
 * The advantage of this method is that no data path contains more than one
25
 * multiplication; this allows a very simple and accurate implementation in
26
 * scaled fixed-point arithmetic, with a minimal number of shifts.
27
 *
28
 * We also provide IDCT routines with various output sample block sizes for
29
 * direct resolution reduction or enlargement and for direct resolving the
30
 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31
 * (N=1...16), 2NxN, and Nx2N (N=1...8) samples for one 8x8 input DCT block.
32
 *
33
 * For N<8 we simply take the corresponding low-frequency coefficients of
34
 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35
 * to yield the downscaled outputs.
36
 * This can be seen as direct low-pass downsampling from the DCT domain
37
 * point of view rather than the usual spatial domain point of view,
38
 * yielding significant computational savings and results at least
39
 * as good as common bilinear (averaging) spatial downsampling.
40
 *
41
 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42
 * lower frequencies and higher frequencies assumed to be zero.
43
 * It turns out that the computational effort is similar to the 8x8 IDCT
44
 * regarding the output size.
45
 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46
 *
47
 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48
 * since there would be too many additional constants to pre-calculate.
49
 */
50
51
#define JPEG_INTERNALS
52
#include "jinclude.h"
53
#include "jpeglib.h"
54
#include "jdct.h"   /* Private declarations for DCT subsystem */
55
56
#ifdef DCT_ISLOW_SUPPORTED
57
58
59
/*
60
 * This module is specialized to the case DCTSIZE = 8.
61
 */
62
63
#if DCTSIZE != 8
64
  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65
#endif
66
67
68
/*
69
 * The poop on this scaling stuff is as follows:
70
 *
71
 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72
 * larger than the true IDCT outputs.  The final outputs are therefore
73
 * a factor of N larger than desired; since N=8 this can be cured by
74
 * a simple right shift at the end of the algorithm.  The advantage of
75
 * this arrangement is that we save two multiplications per 1-D IDCT,
76
 * because the y0 and y4 inputs need not be divided by sqrt(N).
77
 *
78
 * We have to do addition and subtraction of the integer inputs, which
79
 * is no problem, and multiplication by fractional constants, which is
80
 * a problem to do in integer arithmetic.  We multiply all the constants
81
 * by CONST_SCALE and convert them to integer constants (thus retaining
82
 * CONST_BITS bits of precision in the constants).  After doing a
83
 * multiplication we have to divide the product by CONST_SCALE, with
84
 * proper rounding, to produce the correct output.  This division can
85
 * be done cheaply as a right shift of CONST_BITS bits.  We postpone
86
 * shifting as long as possible so that partial sums can be added
87
 * together with full fractional precision.
88
 *
89
 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90
 * they are represented to better-than-integral precision.  These outputs
91
 * require JPEG_DATA_PRECISION + PASS1_BITS + 3 bits; this fits in a
92
 * 16-bit word with the recommended scaling.  (To scale up higher bit
93
 * depths further, an intermediate INT32 array would be needed.)
94
 *
95
 * To avoid overflow of the 32-bit intermediate results in pass 2, we
96
 * must have JPEG_DATA_PRECISION + CONST_BITS + PASS1_BITS <= 26.  Error
97
 * analysis shows that the values given below are the most effective.
98
 */
99
100
#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
101
289M
#define CONST_BITS  13
102
440M
#define PASS1_BITS  (10 - JPEG_DATA_PRECISION)
103
880M
#define PASS2_BITS  (13 - BITS_IN_JSAMPLE)
104
#else
105
#if JPEG_DATA_PRECISION <= 13 && BITS_IN_JSAMPLE <= 16
106
#define CONST_BITS  13
107
#define PASS1_BITS  (13 - JPEG_DATA_PRECISION)
108
#define PASS2_BITS  (16 - BITS_IN_JSAMPLE)
109
#endif
110
#endif
111
112
/* Some C compilers fail to reduce "FIX(constant)" at compile time,
113
 * thus causing a lot of useless floating-point operations at run time.
114
 * To get around this we use the following pre-calculated constants.
115
 * If you change CONST_BITS you may want to add appropriate values.
116
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
117
 */
118
119
#if CONST_BITS == 13
120
#define FIX_0_298631336  ((INT32)  2446)  /* FIX(0.298631336) */
121
#define FIX_0_390180644  ((INT32)  3196)  /* FIX(0.390180644) */
122
#define FIX_0_541196100  ((INT32)  4433)  /* FIX(0.541196100) */
123
#define FIX_0_765366865  ((INT32)  6270)  /* FIX(0.765366865) */
124
#define FIX_0_899976223  ((INT32)  7373)  /* FIX(0.899976223) */
125
#define FIX_1_175875602  ((INT32)  9633)  /* FIX(1.175875602) */
126
#define FIX_1_501321110  ((INT32)  12299) /* FIX(1.501321110) */
127
#define FIX_1_847759065  ((INT32)  15137) /* FIX(1.847759065) */
128
#define FIX_1_961570560  ((INT32)  16069) /* FIX(1.961570560) */
129
#define FIX_2_053119869  ((INT32)  16819) /* FIX(2.053119869) */
130
#define FIX_2_562915447  ((INT32)  20995) /* FIX(2.562915447) */
131
#define FIX_3_072711026  ((INT32)  25172) /* FIX(3.072711026) */
132
#else
133
#define FIX_0_298631336  FIX(0.298631336)
134
#define FIX_0_390180644  FIX(0.390180644)
135
#define FIX_0_541196100  FIX(0.541196100)
136
#define FIX_0_765366865  FIX(0.765366865)
137
#define FIX_0_899976223  FIX(0.899976223)
138
#define FIX_1_175875602  FIX(1.175875602)
139
#define FIX_1_501321110  FIX(1.501321110)
140
#define FIX_1_847759065  FIX(1.847759065)
141
#define FIX_1_961570560  FIX(1.961570560)
142
#define FIX_2_053119869  FIX(2.053119869)
143
#define FIX_2_562915447  FIX(2.562915447)
144
#define FIX_3_072711026  FIX(3.072711026)
145
#endif
146
147
148
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
149
 * For up to 10-bit data with the recommended scaling, all the variable
150
 * and constant values involved are no more than 16 bits wide, so a
151
 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
152
 * For higher bit depths, a full 32-bit multiplication will be needed.
153
 */
154
155
#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
156
1.47G
#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
157
#else
158
#define MULTIPLY(var,const)  ((var) * (const))
159
#endif
160
161
162
/* Dequantize a coefficient by multiplying it by the multiplier-table
163
 * entry; produce an int result.  In this module, both inputs and result
164
 * are 16 bits or less, so either int or short multiply will work.
165
 */
166
167
747M
#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
168
169
170
/* Pass 2 range center and fudge factor for final descale and range-limit. */
171
172
#if PASS2_BITS > 1
173
#define PASS2_OFFSET  \
174
440M
  ((((INT32) RANGE_CENTER) << PASS2_BITS) + (ONE << (PASS2_BITS-1)))
175
#else
176
#if PASS2_BITS > 0
177
#define PASS2_OFFSET  ((((INT32) RANGE_CENTER) << 1) + ONE)
178
#else
179
#define PASS2_OFFSET  (INT32) RANGE_CENTER
180
#endif
181
#endif
182
183
184
/*
185
 * Perform dequantization and inverse DCT on one block of coefficients.
186
 *
187
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
188
 * cK represents sqrt(2) * cos(K*pi/16).
189
 */
190
191
GLOBAL(void)
192
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
193
     JCOEFPTR coef_block,
194
     JSAMPARRAY output_buf, JDIMENSION output_col)
195
55.0M
{
196
55.0M
  INT32 tmp0, tmp1, tmp2, tmp3;
197
55.0M
  INT32 tmp10, tmp11, tmp12, tmp13;
198
55.0M
  INT32 z1, z2, z3;
199
55.0M
  JCOEFPTR inptr;
200
55.0M
  ISLOW_MULT_TYPE * quantptr;
201
55.0M
  int * wsptr;
202
55.0M
  JSAMPROW outptr;
203
55.0M
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
204
55.0M
  int ctr;
205
55.0M
  int workspace[DCTSIZE2];  /* buffers data between passes */
206
  SHIFT_TEMPS
207
208
  /* Pass 1: process columns from input, store into work array.
209
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
210
   * furthermore, we scale the results by 2**PASS1_BITS.
211
   */
212
213
55.0M
  inptr = coef_block;
214
55.0M
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
215
55.0M
  wsptr = workspace;
216
495M
  for (ctr = DCTSIZE; ctr > 0; ctr--) {
217
    /* Due to quantization, we will usually find that many of the input
218
     * coefficients are zero, especially the AC terms.  We can exploit this
219
     * by short-circuiting the IDCT calculation for any column in which all
220
     * the AC terms are zero.  In that case each output is equal to the
221
     * DC coefficient (with scale factor as needed).
222
     * With typical images and quantization tables, half or more of the
223
     * column DCT calculations can be simplified this way.
224
     */
225
226
440M
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
227
398M
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
228
397M
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
229
396M
  inptr[DCTSIZE*7] == 0) {
230
      /* AC terms all zero */
231
396M
#if PASS1_BITS > 0
232
396M
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
233
#else
234
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235
#endif
236
237
396M
      wsptr[DCTSIZE*0] = dcval;
238
396M
      wsptr[DCTSIZE*1] = dcval;
239
396M
      wsptr[DCTSIZE*2] = dcval;
240
396M
      wsptr[DCTSIZE*3] = dcval;
241
396M
      wsptr[DCTSIZE*4] = dcval;
242
396M
      wsptr[DCTSIZE*5] = dcval;
243
396M
      wsptr[DCTSIZE*6] = dcval;
244
396M
      wsptr[DCTSIZE*7] = dcval;
245
246
396M
      inptr++;      /* advance pointers to next column */
247
396M
      quantptr++;
248
396M
      wsptr++;
249
396M
      continue;
250
396M
    }
251
252
    /* Even part: reverse the even part of the forward DCT.
253
     * The rotator is c(-6).
254
     */
255
256
43.9M
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
257
43.9M
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
258
43.9M
    z2 <<= CONST_BITS;
259
43.9M
    z3 <<= CONST_BITS;
260
    /* Add fudge factor here for final descale. */
261
43.9M
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
262
263
43.9M
    tmp0 = z2 + z3;
264
43.9M
    tmp1 = z2 - z3;
265
266
43.9M
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
267
43.9M
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
268
269
43.9M
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
270
43.9M
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
271
43.9M
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
272
273
43.9M
    tmp10 = tmp0 + tmp2;
274
43.9M
    tmp13 = tmp0 - tmp2;
275
43.9M
    tmp11 = tmp1 + tmp3;
276
43.9M
    tmp12 = tmp1 - tmp3;
277
278
    /* Odd part per figure 8; the matrix is unitary and hence its
279
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
280
     */
281
282
43.9M
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
283
43.9M
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
284
43.9M
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
285
43.9M
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
286
287
43.9M
    z2 = tmp0 + tmp2;
288
43.9M
    z3 = tmp1 + tmp3;
289
290
43.9M
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
291
43.9M
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
292
43.9M
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
293
43.9M
    z2 += z1;
294
43.9M
    z3 += z1;
295
296
43.9M
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
297
43.9M
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
298
43.9M
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
299
43.9M
    tmp0 += z1 + z2;
300
43.9M
    tmp3 += z1 + z3;
301
302
43.9M
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
303
43.9M
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
304
43.9M
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
305
43.9M
    tmp1 += z1 + z3;
306
43.9M
    tmp2 += z1 + z2;
307
308
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
309
310
43.9M
    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
311
43.9M
    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
312
43.9M
    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
313
43.9M
    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
314
43.9M
    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
315
43.9M
    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
316
43.9M
    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
317
43.9M
    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
318
319
43.9M
    inptr++;      /* advance pointers to next column */
320
43.9M
    quantptr++;
321
43.9M
    wsptr++;
322
43.9M
  }
323
324
  /* Pass 2: process rows from work array, store into output array.
325
   * Note that we must descale the results by a factor of 8 == 2**3,
326
   * which is folded into the PASS2_BITS value.
327
   */
328
329
55.0M
  wsptr = workspace;
330
495M
  for (ctr = 0; ctr < DCTSIZE; ctr++) {
331
440M
    outptr = output_buf[ctr] + output_col;
332
333
    /* Add range center and fudge factor for final descale and range-limit. */
334
440M
    z2 = (INT32) wsptr[0] + PASS2_OFFSET;
335
336
    /* Rows of zeroes can be exploited in the same way as we did with columns.
337
     * However, the column calculation has created many nonzero AC terms, so
338
     * the simplification applies less often (typically 5% to 10% of the time).
339
     * On machines with very fast multiplication, it's possible that the
340
     * test takes more time than it's worth.  In that case this section
341
     * may be commented out.
342
     */
343
344
440M
#ifndef NO_ZERO_ROW_TEST
345
440M
    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
346
362M
  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
347
      /* AC terms all zero */
348
361M
#if PASS2_BITS > 0
349
361M
      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS2_BITS)
350
361M
          & RANGE_MASK];
351
#else
352
      JSAMPLE dcval = range_limit[(int) z2 & RANGE_MASK];
353
#endif
354
355
361M
      outptr[0] = dcval;
356
361M
      outptr[1] = dcval;
357
361M
      outptr[2] = dcval;
358
361M
      outptr[3] = dcval;
359
361M
      outptr[4] = dcval;
360
361M
      outptr[5] = dcval;
361
361M
      outptr[6] = dcval;
362
361M
      outptr[7] = dcval;
363
364
361M
      wsptr += DCTSIZE;   /* advance pointer to next row */
365
361M
      continue;
366
361M
    }
367
79.0M
#endif
368
369
    /* Even part: reverse the even part of the forward DCT.
370
     * The rotator is c(-6).
371
     */
372
373
79.0M
    z3 = (INT32) wsptr[4];
374
79.0M
    z2 <<= CONST_BITS;
375
79.0M
    z3 <<= CONST_BITS;
376
#if PASS2_BITS == 0
377
    /* Add fudge factor here for final descale. */
378
    z2 += ONE << (CONST_BITS-1);
379
#endif
380
381
79.0M
    tmp0 = z2 + z3;
382
79.0M
    tmp1 = z2 - z3;
383
384
79.0M
    z2 = (INT32) wsptr[2];
385
79.0M
    z3 = (INT32) wsptr[6];
386
387
79.0M
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
388
79.0M
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
389
79.0M
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
390
391
79.0M
    tmp10 = tmp0 + tmp2;
392
79.0M
    tmp13 = tmp0 - tmp2;
393
79.0M
    tmp11 = tmp1 + tmp3;
394
79.0M
    tmp12 = tmp1 - tmp3;
395
396
    /* Odd part per figure 8; the matrix is unitary and hence its
397
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
398
     */
399
400
79.0M
    tmp0 = (INT32) wsptr[7];
401
79.0M
    tmp1 = (INT32) wsptr[5];
402
79.0M
    tmp2 = (INT32) wsptr[3];
403
79.0M
    tmp3 = (INT32) wsptr[1];
404
405
79.0M
    z2 = tmp0 + tmp2;
406
79.0M
    z3 = tmp1 + tmp3;
407
408
79.0M
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
409
79.0M
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
410
79.0M
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
411
79.0M
    z2 += z1;
412
79.0M
    z3 += z1;
413
414
79.0M
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
415
79.0M
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
416
79.0M
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
417
79.0M
    tmp0 += z1 + z2;
418
79.0M
    tmp3 += z1 + z3;
419
420
79.0M
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
421
79.0M
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
422
79.0M
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
423
79.0M
    tmp1 += z1 + z3;
424
79.0M
    tmp2 += z1 + z2;
425
426
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
427
428
79.0M
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
429
79.0M
                CONST_BITS+PASS2_BITS)
430
79.0M
          & RANGE_MASK];
431
79.0M
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
432
79.0M
                CONST_BITS+PASS2_BITS)
433
79.0M
          & RANGE_MASK];
434
79.0M
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
435
79.0M
                CONST_BITS+PASS2_BITS)
436
79.0M
          & RANGE_MASK];
437
79.0M
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
438
79.0M
                CONST_BITS+PASS2_BITS)
439
79.0M
          & RANGE_MASK];
440
79.0M
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
441
79.0M
                CONST_BITS+PASS2_BITS)
442
79.0M
          & RANGE_MASK];
443
79.0M
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
444
79.0M
                CONST_BITS+PASS2_BITS)
445
79.0M
          & RANGE_MASK];
446
79.0M
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
447
79.0M
                CONST_BITS+PASS2_BITS)
448
79.0M
          & RANGE_MASK];
449
79.0M
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
450
79.0M
                CONST_BITS+PASS2_BITS)
451
79.0M
          & RANGE_MASK];
452
453
79.0M
    wsptr += DCTSIZE;   /* advance pointer to next row */
454
79.0M
  }
455
55.0M
}
456
457
#ifdef IDCT_SCALING_SUPPORTED
458
459
460
/*
461
 * Perform dequantization and inverse DCT on one block of coefficients,
462
 * producing a reduced-size 7x7 output block.
463
 *
464
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
465
 * cK represents sqrt(2) * cos(K*pi/14).
466
 */
467
468
GLOBAL(void)
469
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
470
         JCOEFPTR coef_block,
471
         JSAMPARRAY output_buf, JDIMENSION output_col)
472
{
473
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
474
  INT32 z1, z2, z3;
475
  JCOEFPTR inptr;
476
  ISLOW_MULT_TYPE * quantptr;
477
  int * wsptr;
478
  JSAMPROW outptr;
479
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
480
  int ctr;
481
  int workspace[7*7]; /* buffers data between passes */
482
  SHIFT_TEMPS
483
484
  /* Pass 1: process columns from input, store into work array. */
485
486
  inptr = coef_block;
487
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
488
  wsptr = workspace;
489
  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
490
    /* Even part */
491
492
    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
493
    tmp13 <<= CONST_BITS;
494
    /* Add fudge factor here for final descale. */
495
    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
496
497
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
498
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
499
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
500
501
    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
502
    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
503
    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
504
    tmp0 = z1 + z3;
505
    z2 -= tmp0;
506
    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
507
    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
508
    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
509
    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
510
511
    /* Odd part */
512
513
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
514
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
515
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
516
517
    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
518
    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
519
    tmp0 = tmp1 - tmp2;
520
    tmp1 += tmp2;
521
    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
522
    tmp1 += tmp2;
523
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
524
    tmp0 += z2;
525
    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
526
527
    /* Final output stage */
528
529
    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
530
    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
531
    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
532
    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
533
    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
534
    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
535
    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
536
  }
537
538
  /* Pass 2: process 7 rows from work array, store into output array. */
539
540
  wsptr = workspace;
541
  for (ctr = 0; ctr < 7; ctr++) {
542
    outptr = output_buf[ctr] + output_col;
543
544
    /* Even part */
545
546
    /* Add range center and fudge factor for final descale and range-limit. */
547
    tmp13 = (INT32) wsptr[0] + PASS2_OFFSET;
548
    tmp13 <<= CONST_BITS;
549
#if PASS2_BITS == 0
550
    tmp13 += ONE << (CONST_BITS-1);
551
#endif
552
553
    z1 = (INT32) wsptr[2];
554
    z2 = (INT32) wsptr[4];
555
    z3 = (INT32) wsptr[6];
556
557
    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
558
    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
559
    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
560
    tmp0 = z1 + z3;
561
    z2 -= tmp0;
562
    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
563
    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
564
    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
565
    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
566
567
    /* Odd part */
568
569
    z1 = (INT32) wsptr[1];
570
    z2 = (INT32) wsptr[3];
571
    z3 = (INT32) wsptr[5];
572
573
    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
574
    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
575
    tmp0 = tmp1 - tmp2;
576
    tmp1 += tmp2;
577
    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
578
    tmp1 += tmp2;
579
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
580
    tmp0 += z2;
581
    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
582
583
    /* Final output stage */
584
585
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
586
                CONST_BITS+PASS2_BITS)
587
          & RANGE_MASK];
588
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
589
                CONST_BITS+PASS2_BITS)
590
          & RANGE_MASK];
591
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
592
                CONST_BITS+PASS2_BITS)
593
          & RANGE_MASK];
594
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
595
                CONST_BITS+PASS2_BITS)
596
          & RANGE_MASK];
597
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
598
                CONST_BITS+PASS2_BITS)
599
          & RANGE_MASK];
600
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
601
                CONST_BITS+PASS2_BITS)
602
          & RANGE_MASK];
603
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
604
                CONST_BITS+PASS2_BITS)
605
          & RANGE_MASK];
606
607
    wsptr += 7;   /* advance pointer to next row */
608
  }
609
}
610
611
612
/*
613
 * Perform dequantization and inverse DCT on one block of coefficients,
614
 * producing a reduced-size 6x6 output block.
615
 *
616
 * Optimized algorithm with 3 multiplications in the 1-D kernel.
617
 * cK represents sqrt(2) * cos(K*pi/12).
618
 */
619
620
GLOBAL(void)
621
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
622
         JCOEFPTR coef_block,
623
         JSAMPARRAY output_buf, JDIMENSION output_col)
624
{
625
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
626
  INT32 z1, z2, z3;
627
  JCOEFPTR inptr;
628
  ISLOW_MULT_TYPE * quantptr;
629
  int * wsptr;
630
  JSAMPROW outptr;
631
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
632
  int ctr;
633
  int workspace[6*6]; /* buffers data between passes */
634
  SHIFT_TEMPS
635
636
  /* Pass 1: process columns from input, store into work array. */
637
638
  inptr = coef_block;
639
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
640
  wsptr = workspace;
641
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
642
    /* Even part */
643
644
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
645
    tmp0 <<= CONST_BITS;
646
    /* Add fudge factor here for final descale. */
647
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
648
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
649
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
650
    tmp1 = tmp0 + tmp10;
651
    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
652
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
653
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
654
    tmp10 = tmp1 + tmp0;
655
    tmp12 = tmp1 - tmp0;
656
657
    /* Odd part */
658
659
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
660
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
661
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
662
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
663
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
664
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
665
#if PASS1_BITS > 0
666
    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
667
#else
668
    tmp1 = z1 - z2 - z3;
669
#endif
670
671
    /* Final output stage */
672
673
    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
674
    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
675
    wsptr[6*1] = (int) (tmp11 + tmp1);
676
    wsptr[6*4] = (int) (tmp11 - tmp1);
677
    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
678
    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
679
  }
680
681
  /* Pass 2: process 6 rows from work array, store into output array. */
682
683
  wsptr = workspace;
684
  for (ctr = 0; ctr < 6; ctr++) {
685
    outptr = output_buf[ctr] + output_col;
686
687
    /* Even part */
688
689
    /* Add range center and fudge factor for final descale and range-limit. */
690
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
691
    tmp0 <<= CONST_BITS;
692
#if PASS2_BITS == 0
693
    tmp0 += ONE << (CONST_BITS-1);
694
#endif
695
    tmp2 = (INT32) wsptr[4];
696
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
697
    tmp1 = tmp0 + tmp10;
698
    tmp11 = tmp0 - tmp10 - tmp10;
699
    tmp10 = (INT32) wsptr[2];
700
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
701
    tmp10 = tmp1 + tmp0;
702
    tmp12 = tmp1 - tmp0;
703
704
    /* Odd part */
705
706
    z1 = (INT32) wsptr[1];
707
    z2 = (INT32) wsptr[3];
708
    z3 = (INT32) wsptr[5];
709
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
710
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
711
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
712
    tmp1 = (z1 - z2 - z3) << CONST_BITS;
713
714
    /* Final output stage */
715
716
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
717
                CONST_BITS+PASS2_BITS)
718
          & RANGE_MASK];
719
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
720
                CONST_BITS+PASS2_BITS)
721
          & RANGE_MASK];
722
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
723
                CONST_BITS+PASS2_BITS)
724
          & RANGE_MASK];
725
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
726
                CONST_BITS+PASS2_BITS)
727
          & RANGE_MASK];
728
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
729
                CONST_BITS+PASS2_BITS)
730
          & RANGE_MASK];
731
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
732
                CONST_BITS+PASS2_BITS)
733
          & RANGE_MASK];
734
735
    wsptr += 6;   /* advance pointer to next row */
736
  }
737
}
738
739
740
/*
741
 * Perform dequantization and inverse DCT on one block of coefficients,
742
 * producing a reduced-size 5x5 output block.
743
 *
744
 * Optimized algorithm with 5 multiplications in the 1-D kernel.
745
 * cK represents sqrt(2) * cos(K*pi/10).
746
 */
747
748
GLOBAL(void)
749
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
750
         JCOEFPTR coef_block,
751
         JSAMPARRAY output_buf, JDIMENSION output_col)
752
{
753
  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
754
  INT32 z1, z2, z3;
755
  JCOEFPTR inptr;
756
  ISLOW_MULT_TYPE * quantptr;
757
  int * wsptr;
758
  JSAMPROW outptr;
759
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
760
  int ctr;
761
  int workspace[5*5]; /* buffers data between passes */
762
  SHIFT_TEMPS
763
764
  /* Pass 1: process columns from input, store into work array. */
765
766
  inptr = coef_block;
767
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
768
  wsptr = workspace;
769
  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
770
    /* Even part */
771
772
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
773
    tmp12 <<= CONST_BITS;
774
    /* Add fudge factor here for final descale. */
775
    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
776
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
777
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
778
    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
779
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
780
    z3 = tmp12 + z2;
781
    tmp10 = z3 + z1;
782
    tmp11 = z3 - z1;
783
    tmp12 -= z2 << 2;
784
785
    /* Odd part */
786
787
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
788
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
789
790
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
791
    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
792
    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
793
794
    /* Final output stage */
795
796
    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
797
    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
798
    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
799
    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
800
    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
801
  }
802
803
  /* Pass 2: process 5 rows from work array, store into output array. */
804
805
  wsptr = workspace;
806
  for (ctr = 0; ctr < 5; ctr++) {
807
    outptr = output_buf[ctr] + output_col;
808
809
    /* Even part */
810
811
    /* Add range center and fudge factor for final descale and range-limit. */
812
    tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
813
    tmp12 <<= CONST_BITS;
814
#if PASS2_BITS == 0
815
    tmp12 += ONE << (CONST_BITS-1);
816
#endif
817
    tmp0 = (INT32) wsptr[2];
818
    tmp1 = (INT32) wsptr[4];
819
    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
820
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
821
    z3 = tmp12 + z2;
822
    tmp10 = z3 + z1;
823
    tmp11 = z3 - z1;
824
    tmp12 -= z2 << 2;
825
826
    /* Odd part */
827
828
    z2 = (INT32) wsptr[1];
829
    z3 = (INT32) wsptr[3];
830
831
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
832
    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
833
    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
834
835
    /* Final output stage */
836
837
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
838
                CONST_BITS+PASS2_BITS)
839
          & RANGE_MASK];
840
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
841
                CONST_BITS+PASS2_BITS)
842
          & RANGE_MASK];
843
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
844
                CONST_BITS+PASS2_BITS)
845
          & RANGE_MASK];
846
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
847
                CONST_BITS+PASS2_BITS)
848
          & RANGE_MASK];
849
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
850
                CONST_BITS+PASS2_BITS)
851
          & RANGE_MASK];
852
853
    wsptr += 5;   /* advance pointer to next row */
854
  }
855
}
856
857
858
/*
859
 * Perform dequantization and inverse DCT on one block of coefficients,
860
 * producing a reduced-size 4x4 output block.
861
 *
862
 * Optimized algorithm with 3 multiplications in the 1-D kernel.
863
 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
864
 */
865
866
GLOBAL(void)
867
jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
868
         JCOEFPTR coef_block,
869
         JSAMPARRAY output_buf, JDIMENSION output_col)
870
{
871
  INT32 tmp0, tmp2, tmp10, tmp12;
872
  INT32 z1, z2, z3;
873
  JCOEFPTR inptr;
874
  ISLOW_MULT_TYPE * quantptr;
875
  int * wsptr;
876
  JSAMPROW outptr;
877
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
878
  int ctr;
879
  int workspace[4*4]; /* buffers data between passes */
880
  SHIFT_TEMPS
881
882
  /* Pass 1: process columns from input, store into work array. */
883
884
  inptr = coef_block;
885
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
886
  wsptr = workspace;
887
  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
888
    /* Even part */
889
890
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
891
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
892
    
893
#if PASS1_BITS > 0
894
    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
895
    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
896
#else
897
    tmp10 = tmp0 + tmp2;
898
    tmp12 = tmp0 - tmp2;
899
#endif
900
901
    /* Odd part */
902
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
903
904
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
905
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
906
907
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
908
    /* Add fudge factor here for final descale. */
909
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
910
    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
911
           CONST_BITS-PASS1_BITS);
912
    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
913
           CONST_BITS-PASS1_BITS);
914
915
    /* Final output stage */
916
917
    wsptr[4*0] = (int) (tmp10 + tmp0);
918
    wsptr[4*3] = (int) (tmp10 - tmp0);
919
    wsptr[4*1] = (int) (tmp12 + tmp2);
920
    wsptr[4*2] = (int) (tmp12 - tmp2);
921
  }
922
923
  /* Pass 2: process 4 rows from work array, store into output array. */
924
925
  wsptr = workspace;
926
  for (ctr = 0; ctr < 4; ctr++) {
927
    outptr = output_buf[ctr] + output_col;
928
929
    /* Even part */
930
931
    /* Add range center and fudge factor for final descale and range-limit. */
932
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
933
    tmp2 = (INT32) wsptr[2];
934
    tmp0 <<= CONST_BITS;
935
    tmp2 <<= CONST_BITS;
936
#if PASS2_BITS == 0
937
    tmp0 += ONE << (CONST_BITS-1);
938
#endif
939
940
    tmp10 = tmp0 + tmp2;
941
    tmp12 = tmp0 - tmp2;
942
943
    /* Odd part */
944
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
945
946
    z2 = (INT32) wsptr[1];
947
    z3 = (INT32) wsptr[3];
948
949
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
950
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
951
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
952
953
    /* Final output stage */
954
955
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
956
                CONST_BITS+PASS2_BITS)
957
          & RANGE_MASK];
958
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
959
                CONST_BITS+PASS2_BITS)
960
          & RANGE_MASK];
961
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
962
                CONST_BITS+PASS2_BITS)
963
          & RANGE_MASK];
964
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
965
                CONST_BITS+PASS2_BITS)
966
          & RANGE_MASK];
967
968
    wsptr += 4;   /* advance pointer to next row */
969
  }
970
}
971
972
973
/*
974
 * Perform dequantization and inverse DCT on one block of coefficients,
975
 * producing a reduced-size 3x3 output block.
976
 *
977
 * Optimized algorithm with 2 multiplications in the 1-D kernel.
978
 * cK represents sqrt(2) * cos(K*pi/6).
979
 */
980
981
GLOBAL(void)
982
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
983
         JCOEFPTR coef_block,
984
         JSAMPARRAY output_buf, JDIMENSION output_col)
985
{
986
  INT32 tmp0, tmp2, tmp10, tmp12;
987
  JCOEFPTR inptr;
988
  ISLOW_MULT_TYPE * quantptr;
989
  int * wsptr;
990
  JSAMPROW outptr;
991
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
992
  int ctr;
993
  int workspace[3*3]; /* buffers data between passes */
994
  SHIFT_TEMPS
995
996
  /* Pass 1: process columns from input, store into work array. */
997
998
  inptr = coef_block;
999
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1000
  wsptr = workspace;
1001
  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
1002
    /* Even part */
1003
1004
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1005
    tmp0 <<= CONST_BITS;
1006
    /* Add fudge factor here for final descale. */
1007
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1008
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1009
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1010
    tmp10 = tmp0 + tmp12;
1011
    tmp2 = tmp0 - tmp12 - tmp12;
1012
1013
    /* Odd part */
1014
1015
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1016
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1017
1018
    /* Final output stage */
1019
1020
    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1021
    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1022
    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
1023
  }
1024
1025
  /* Pass 2: process 3 rows from work array, store into output array. */
1026
1027
  wsptr = workspace;
1028
  for (ctr = 0; ctr < 3; ctr++) {
1029
    outptr = output_buf[ctr] + output_col;
1030
1031
    /* Even part */
1032
1033
    /* Add range center and fudge factor for final descale and range-limit. */
1034
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1035
    tmp0 <<= CONST_BITS;
1036
#if PASS2_BITS == 0
1037
    tmp0 += ONE << (CONST_BITS-1);
1038
#endif
1039
    tmp2 = (INT32) wsptr[2];
1040
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1041
    tmp10 = tmp0 + tmp12;
1042
    tmp2 = tmp0 - tmp12 - tmp12;
1043
1044
    /* Odd part */
1045
1046
    tmp12 = (INT32) wsptr[1];
1047
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1048
1049
    /* Final output stage */
1050
1051
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1052
                CONST_BITS+PASS2_BITS)
1053
          & RANGE_MASK];
1054
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1055
                CONST_BITS+PASS2_BITS)
1056
          & RANGE_MASK];
1057
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1058
                CONST_BITS+PASS2_BITS)
1059
          & RANGE_MASK];
1060
1061
    wsptr += 3;   /* advance pointer to next row */
1062
  }
1063
}
1064
1065
1066
/*
1067
 * Perform dequantization and inverse DCT on one block of coefficients,
1068
 * producing a reduced-size 2x2 output block.
1069
 *
1070
 * Multiplication-less algorithm.
1071
 */
1072
1073
GLOBAL(void)
1074
jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1075
         JCOEFPTR coef_block,
1076
         JSAMPARRAY output_buf, JDIMENSION output_col)
1077
{
1078
  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1079
  ISLOW_MULT_TYPE * quantptr;
1080
  JSAMPROW outptr;
1081
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1082
  ISHIFT_TEMPS
1083
1084
  /* Pass 1: process columns from input. */
1085
1086
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1087
1088
  /* Column 0 */
1089
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1090
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1091
1092
#if PASS2_BITS > PASS1_BITS
1093
  /* Add range center and fudge factor for final downscale and range-limit. */
1094
#if PASS2_BITS > PASS1_BITS + 1
1095
  tmp4 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1096
    (1 << (PASS2_BITS-PASS1_BITS-1));
1097
#else
1098
  tmp4 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1099
#endif
1100
1101
  tmp0 = tmp4 + tmp5;
1102
  tmp2 = tmp4 - tmp5;
1103
1104
  /* Column 1 */
1105
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1106
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1107
1108
  tmp1 = tmp4 + tmp5;
1109
  tmp3 = tmp4 - tmp5;
1110
1111
  /* Pass 2: process 2 rows, store into output array. */
1112
1113
  /* Row 0 */
1114
  outptr = output_buf[0] + output_col;
1115
1116
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
1117
               PASS2_BITS-PASS1_BITS)
1118
        & RANGE_MASK];
1119
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
1120
               PASS2_BITS-PASS1_BITS)
1121
        & RANGE_MASK];
1122
1123
  /* Row 1 */
1124
  outptr = output_buf[1] + output_col;
1125
1126
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3,
1127
               PASS2_BITS-PASS1_BITS)
1128
        & RANGE_MASK];
1129
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3,
1130
               PASS2_BITS-PASS1_BITS)
1131
        & RANGE_MASK];
1132
#else
1133
#if PASS2_BITS == PASS1_BITS
1134
  tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1135
1136
  tmp0 = tmp4 + tmp5;
1137
  tmp2 = tmp4 - tmp5;
1138
1139
  /* Column 1 */
1140
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1141
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1142
#else
1143
  tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1144
  tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1145
1146
  tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1147
1148
  tmp0 = tmp4 + tmp5;
1149
  tmp2 = tmp4 - tmp5;
1150
1151
  /* Column 1 */
1152
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1153
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1154
1155
  tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1156
  tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1157
#endif
1158
1159
  tmp1 = tmp4 + tmp5;
1160
  tmp3 = tmp4 - tmp5;
1161
1162
  /* Pass 2: process 2 rows, store into output array. */
1163
1164
  /* Row 0 */
1165
  outptr = output_buf[0] + output_col;
1166
1167
  outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
1168
  outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
1169
1170
  /* Row 1 */
1171
  outptr = output_buf[1] + output_col;
1172
1173
  outptr[0] = range_limit[(int) (tmp2 + tmp3) & RANGE_MASK];
1174
  outptr[1] = range_limit[(int) (tmp2 - tmp3) & RANGE_MASK];
1175
#endif
1176
}
1177
1178
1179
/*
1180
 * Perform dequantization and inverse DCT on one block of coefficients,
1181
 * producing a reduced-size 1x1 output block.
1182
 *
1183
 * This is just a rescale of the DC coefficient.
1184
 */
1185
1186
GLOBAL(void)
1187
jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1188
         JCOEFPTR coef_block,
1189
         JSAMPARRAY output_buf, JDIMENSION output_col)
1190
{
1191
  DCTELEM dcval;
1192
  ISLOW_MULT_TYPE * quantptr;
1193
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1194
  ISHIFT_TEMPS
1195
1196
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1197
1198
  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1199
1200
#if PASS2_BITS > PASS1_BITS
1201
  /* Add range center and fudge factor for downscale and range-limit. */
1202
#if PASS2_BITS > PASS1_BITS + 1
1203
  dcval += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1204
     (1 << (PASS2_BITS-PASS1_BITS-1));
1205
#else
1206
  dcval += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1207
#endif
1208
1209
  output_buf[0][output_col] =
1210
    range_limit[(int) IRIGHT_SHIFT(dcval, PASS2_BITS-PASS1_BITS) & RANGE_MASK];
1211
#else
1212
#if PASS2_BITS < PASS1_BITS
1213
  dcval <<= (PASS1_BITS-PASS2_BITS);  /* upscale */
1214
#endif
1215
1216
  output_buf[0][output_col] =
1217
    range_limit[((int) dcval + RANGE_CENTER) & RANGE_MASK];
1218
#endif
1219
}
1220
1221
1222
/*
1223
 * Perform dequantization and inverse DCT on one block of coefficients,
1224
 * producing a 9x9 output block.
1225
 *
1226
 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1227
 * cK represents sqrt(2) * cos(K*pi/18).
1228
 */
1229
1230
GLOBAL(void)
1231
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1232
         JCOEFPTR coef_block,
1233
         JSAMPARRAY output_buf, JDIMENSION output_col)
1234
{
1235
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1236
  INT32 z1, z2, z3, z4;
1237
  JCOEFPTR inptr;
1238
  ISLOW_MULT_TYPE * quantptr;
1239
  int * wsptr;
1240
  JSAMPROW outptr;
1241
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1242
  int ctr;
1243
  int workspace[8*9]; /* buffers data between passes */
1244
  SHIFT_TEMPS
1245
1246
  /* Pass 1: process columns from input, store into work array. */
1247
1248
  inptr = coef_block;
1249
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1250
  wsptr = workspace;
1251
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1252
    /* Even part */
1253
1254
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1255
    tmp0 <<= CONST_BITS;
1256
    /* Add fudge factor here for final descale. */
1257
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1258
1259
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1260
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1261
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1262
1263
    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
1264
    tmp1 = tmp0 + tmp3;
1265
    tmp2 = tmp0 - tmp3 - tmp3;
1266
1267
    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1268
    tmp11 = tmp2 + tmp0;
1269
    tmp14 = tmp2 - tmp0 - tmp0;
1270
1271
    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1272
    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
1273
    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
1274
1275
    tmp10 = tmp1 + tmp0 - tmp3;
1276
    tmp12 = tmp1 - tmp0 + tmp2;
1277
    tmp13 = tmp1 - tmp2 + tmp3;
1278
1279
    /* Odd part */
1280
1281
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1282
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1283
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1284
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1285
1286
    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
1287
1288
    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
1289
    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
1290
    tmp0 = tmp2 + tmp3 - z2;
1291
    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
1292
    tmp2 += z2 - tmp1;
1293
    tmp3 += z2 + tmp1;
1294
    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1295
1296
    /* Final output stage */
1297
1298
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1299
    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1300
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1301
    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1302
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1303
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1304
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1305
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1306
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1307
  }
1308
1309
  /* Pass 2: process 9 rows from work array, store into output array. */
1310
1311
  wsptr = workspace;
1312
  for (ctr = 0; ctr < 9; ctr++) {
1313
    outptr = output_buf[ctr] + output_col;
1314
1315
    /* Even part */
1316
1317
    /* Add range center and fudge factor for final descale and range-limit. */
1318
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1319
    tmp0 <<= CONST_BITS;
1320
#if PASS2_BITS == 0
1321
    tmp0 += ONE << (CONST_BITS-1);
1322
#endif
1323
1324
    z1 = (INT32) wsptr[2];
1325
    z2 = (INT32) wsptr[4];
1326
    z3 = (INT32) wsptr[6];
1327
1328
    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
1329
    tmp1 = tmp0 + tmp3;
1330
    tmp2 = tmp0 - tmp3 - tmp3;
1331
1332
    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1333
    tmp11 = tmp2 + tmp0;
1334
    tmp14 = tmp2 - tmp0 - tmp0;
1335
1336
    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1337
    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
1338
    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
1339
1340
    tmp10 = tmp1 + tmp0 - tmp3;
1341
    tmp12 = tmp1 - tmp0 + tmp2;
1342
    tmp13 = tmp1 - tmp2 + tmp3;
1343
1344
    /* Odd part */
1345
1346
    z1 = (INT32) wsptr[1];
1347
    z2 = (INT32) wsptr[3];
1348
    z3 = (INT32) wsptr[5];
1349
    z4 = (INT32) wsptr[7];
1350
1351
    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
1352
1353
    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
1354
    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
1355
    tmp0 = tmp2 + tmp3 - z2;
1356
    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
1357
    tmp2 += z2 - tmp1;
1358
    tmp3 += z2 + tmp1;
1359
    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1360
1361
    /* Final output stage */
1362
1363
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1364
                CONST_BITS+PASS2_BITS)
1365
          & RANGE_MASK];
1366
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1367
                CONST_BITS+PASS2_BITS)
1368
          & RANGE_MASK];
1369
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1370
                CONST_BITS+PASS2_BITS)
1371
          & RANGE_MASK];
1372
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1373
                CONST_BITS+PASS2_BITS)
1374
          & RANGE_MASK];
1375
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1376
                CONST_BITS+PASS2_BITS)
1377
          & RANGE_MASK];
1378
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1379
                CONST_BITS+PASS2_BITS)
1380
          & RANGE_MASK];
1381
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1382
                CONST_BITS+PASS2_BITS)
1383
          & RANGE_MASK];
1384
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1385
                CONST_BITS+PASS2_BITS)
1386
          & RANGE_MASK];
1387
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1388
                CONST_BITS+PASS2_BITS)
1389
          & RANGE_MASK];
1390
1391
    wsptr += 8;   /* advance pointer to next row */
1392
  }
1393
}
1394
1395
1396
/*
1397
 * Perform dequantization and inverse DCT on one block of coefficients,
1398
 * producing a 10x10 output block.
1399
 *
1400
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1401
 * cK represents sqrt(2) * cos(K*pi/20).
1402
 */
1403
1404
GLOBAL(void)
1405
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1406
     JCOEFPTR coef_block,
1407
     JSAMPARRAY output_buf, JDIMENSION output_col)
1408
{
1409
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1410
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1411
  INT32 z1, z2, z3, z4, z5;
1412
  JCOEFPTR inptr;
1413
  ISLOW_MULT_TYPE * quantptr;
1414
  int * wsptr;
1415
  JSAMPROW outptr;
1416
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1417
  int ctr;
1418
  int workspace[8*10];  /* buffers data between passes */
1419
  SHIFT_TEMPS
1420
1421
  /* Pass 1: process columns from input, store into work array. */
1422
1423
  inptr = coef_block;
1424
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1425
  wsptr = workspace;
1426
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1427
    /* Even part */
1428
1429
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1430
    z3 <<= CONST_BITS;
1431
    /* Add fudge factor here for final descale. */
1432
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1433
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1434
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1435
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1436
    tmp10 = z3 + z1;
1437
    tmp11 = z3 - z2;
1438
1439
    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
1440
      CONST_BITS-PASS1_BITS);
1441
1442
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1443
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1444
1445
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1446
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1447
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1448
1449
    tmp20 = tmp10 + tmp12;
1450
    tmp24 = tmp10 - tmp12;
1451
    tmp21 = tmp11 + tmp13;
1452
    tmp23 = tmp11 - tmp13;
1453
1454
    /* Odd part */
1455
1456
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1457
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1458
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1459
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1460
1461
    tmp11 = z2 + z4;
1462
    tmp13 = z2 - z4;
1463
1464
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1465
    z5 = z3 << CONST_BITS;
1466
1467
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1468
    z4 = z5 + tmp12;
1469
1470
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1471
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1472
1473
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1474
    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1475
1476
#if PASS1_BITS > 0
1477
    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1478
#else
1479
    tmp12 = z1 - tmp13 - z3;
1480
#endif
1481
1482
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1483
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1484
1485
    /* Final output stage */
1486
1487
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1488
    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1489
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1490
    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1491
    wsptr[8*2] = (int) (tmp22 + tmp12);
1492
    wsptr[8*7] = (int) (tmp22 - tmp12);
1493
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1494
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1495
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1496
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1497
  }
1498
1499
  /* Pass 2: process 10 rows from work array, store into output array. */
1500
1501
  wsptr = workspace;
1502
  for (ctr = 0; ctr < 10; ctr++) {
1503
    outptr = output_buf[ctr] + output_col;
1504
1505
    /* Even part */
1506
1507
    /* Add range center and fudge factor for final descale and range-limit. */
1508
    z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1509
    z3 <<= CONST_BITS;
1510
#if PASS2_BITS == 0
1511
    z3 += ONE << (CONST_BITS-1);
1512
#endif
1513
    z4 = (INT32) wsptr[4];
1514
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1515
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1516
    tmp10 = z3 + z1;
1517
    tmp11 = z3 - z2;
1518
1519
    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
1520
1521
    z2 = (INT32) wsptr[2];
1522
    z3 = (INT32) wsptr[6];
1523
1524
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1525
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1526
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1527
1528
    tmp20 = tmp10 + tmp12;
1529
    tmp24 = tmp10 - tmp12;
1530
    tmp21 = tmp11 + tmp13;
1531
    tmp23 = tmp11 - tmp13;
1532
1533
    /* Odd part */
1534
1535
    z1 = (INT32) wsptr[1];
1536
    z2 = (INT32) wsptr[3];
1537
    z3 = (INT32) wsptr[5];
1538
    z3 <<= CONST_BITS;
1539
    z4 = (INT32) wsptr[7];
1540
1541
    tmp11 = z2 + z4;
1542
    tmp13 = z2 - z4;
1543
1544
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1545
1546
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1547
    z4 = z3 + tmp12;
1548
1549
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1550
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1551
1552
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1553
    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1554
1555
    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1556
1557
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1558
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1559
1560
    /* Final output stage */
1561
1562
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1563
                CONST_BITS+PASS2_BITS)
1564
          & RANGE_MASK];
1565
    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1566
                CONST_BITS+PASS2_BITS)
1567
          & RANGE_MASK];
1568
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1569
                CONST_BITS+PASS2_BITS)
1570
          & RANGE_MASK];
1571
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1572
                CONST_BITS+PASS2_BITS)
1573
          & RANGE_MASK];
1574
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1575
                CONST_BITS+PASS2_BITS)
1576
          & RANGE_MASK];
1577
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1578
                CONST_BITS+PASS2_BITS)
1579
          & RANGE_MASK];
1580
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1581
                CONST_BITS+PASS2_BITS)
1582
          & RANGE_MASK];
1583
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1584
                CONST_BITS+PASS2_BITS)
1585
          & RANGE_MASK];
1586
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1587
                CONST_BITS+PASS2_BITS)
1588
          & RANGE_MASK];
1589
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1590
                CONST_BITS+PASS2_BITS)
1591
          & RANGE_MASK];
1592
1593
    wsptr += 8;   /* advance pointer to next row */
1594
  }
1595
}
1596
1597
1598
/*
1599
 * Perform dequantization and inverse DCT on one block of coefficients,
1600
 * producing an 11x11 output block.
1601
 *
1602
 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1603
 * cK represents sqrt(2) * cos(K*pi/22).
1604
 */
1605
1606
GLOBAL(void)
1607
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1608
     JCOEFPTR coef_block,
1609
     JSAMPARRAY output_buf, JDIMENSION output_col)
1610
{
1611
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1612
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1613
  INT32 z1, z2, z3, z4;
1614
  JCOEFPTR inptr;
1615
  ISLOW_MULT_TYPE * quantptr;
1616
  int * wsptr;
1617
  JSAMPROW outptr;
1618
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1619
  int ctr;
1620
  int workspace[8*11];  /* buffers data between passes */
1621
  SHIFT_TEMPS
1622
1623
  /* Pass 1: process columns from input, store into work array. */
1624
1625
  inptr = coef_block;
1626
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1627
  wsptr = workspace;
1628
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1629
    /* Even part */
1630
1631
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1632
    tmp10 <<= CONST_BITS;
1633
    /* Add fudge factor here for final descale. */
1634
    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1635
1636
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1637
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1638
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1639
1640
    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1641
    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1642
    z4 = z1 + z3;
1643
    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1644
    z4 -= z2;
1645
    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1646
    tmp21 = tmp20 + tmp23 + tmp25 -
1647
      MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1648
    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1649
    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1650
    tmp24 += tmp25;
1651
    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1652
    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1653
       MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1654
    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1655
1656
    /* Odd part */
1657
1658
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1659
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1660
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1661
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1662
1663
    tmp11 = z1 + z2;
1664
    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1665
    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1666
    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1667
    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1668
    tmp10 = tmp11 + tmp12 + tmp13 -
1669
      MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1670
    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1671
    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1672
    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1673
    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1674
    tmp11 += z1;
1675
    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1676
    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1677
       MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1678
       MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1679
1680
    /* Final output stage */
1681
1682
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1683
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1684
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1685
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1686
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1687
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1688
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1689
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1690
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1691
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1692
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1693
  }
1694
1695
  /* Pass 2: process 11 rows from work array, store into output array. */
1696
1697
  wsptr = workspace;
1698
  for (ctr = 0; ctr < 11; ctr++) {
1699
    outptr = output_buf[ctr] + output_col;
1700
1701
    /* Even part */
1702
1703
    /* Add range center and fudge factor for final descale and range-limit. */
1704
    tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
1705
    tmp10 <<= CONST_BITS;
1706
#if PASS2_BITS == 0
1707
    tmp10 += ONE << (CONST_BITS-1);
1708
#endif
1709
1710
    z1 = (INT32) wsptr[2];
1711
    z2 = (INT32) wsptr[4];
1712
    z3 = (INT32) wsptr[6];
1713
1714
    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1715
    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1716
    z4 = z1 + z3;
1717
    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1718
    z4 -= z2;
1719
    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1720
    tmp21 = tmp20 + tmp23 + tmp25 -
1721
      MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1722
    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1723
    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1724
    tmp24 += tmp25;
1725
    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1726
    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1727
       MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1728
    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1729
1730
    /* Odd part */
1731
1732
    z1 = (INT32) wsptr[1];
1733
    z2 = (INT32) wsptr[3];
1734
    z3 = (INT32) wsptr[5];
1735
    z4 = (INT32) wsptr[7];
1736
1737
    tmp11 = z1 + z2;
1738
    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1739
    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1740
    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1741
    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1742
    tmp10 = tmp11 + tmp12 + tmp13 -
1743
      MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1744
    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1745
    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1746
    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1747
    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1748
    tmp11 += z1;
1749
    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1750
    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1751
       MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1752
       MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1753
1754
    /* Final output stage */
1755
1756
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1757
                 CONST_BITS+PASS2_BITS)
1758
           & RANGE_MASK];
1759
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1760
                 CONST_BITS+PASS2_BITS)
1761
           & RANGE_MASK];
1762
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1763
                 CONST_BITS+PASS2_BITS)
1764
           & RANGE_MASK];
1765
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1766
                 CONST_BITS+PASS2_BITS)
1767
           & RANGE_MASK];
1768
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1769
                 CONST_BITS+PASS2_BITS)
1770
           & RANGE_MASK];
1771
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1772
                 CONST_BITS+PASS2_BITS)
1773
           & RANGE_MASK];
1774
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1775
                 CONST_BITS+PASS2_BITS)
1776
           & RANGE_MASK];
1777
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1778
                 CONST_BITS+PASS2_BITS)
1779
           & RANGE_MASK];
1780
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1781
                 CONST_BITS+PASS2_BITS)
1782
           & RANGE_MASK];
1783
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1784
                 CONST_BITS+PASS2_BITS)
1785
           & RANGE_MASK];
1786
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
1787
                 CONST_BITS+PASS2_BITS)
1788
           & RANGE_MASK];
1789
1790
    wsptr += 8;   /* advance pointer to next row */
1791
  }
1792
}
1793
1794
1795
/*
1796
 * Perform dequantization and inverse DCT on one block of coefficients,
1797
 * producing a 12x12 output block.
1798
 *
1799
 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1800
 * cK represents sqrt(2) * cos(K*pi/24).
1801
 */
1802
1803
GLOBAL(void)
1804
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1805
     JCOEFPTR coef_block,
1806
     JSAMPARRAY output_buf, JDIMENSION output_col)
1807
{
1808
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1809
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1810
  INT32 z1, z2, z3, z4;
1811
  JCOEFPTR inptr;
1812
  ISLOW_MULT_TYPE * quantptr;
1813
  int * wsptr;
1814
  JSAMPROW outptr;
1815
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1816
  int ctr;
1817
  int workspace[8*12];  /* buffers data between passes */
1818
  SHIFT_TEMPS
1819
1820
  /* Pass 1: process columns from input, store into work array. */
1821
1822
  inptr = coef_block;
1823
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1824
  wsptr = workspace;
1825
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1826
    /* Even part */
1827
1828
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1829
    z3 <<= CONST_BITS;
1830
    /* Add fudge factor here for final descale. */
1831
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1832
1833
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1834
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1835
1836
    tmp10 = z3 + z4;
1837
    tmp11 = z3 - z4;
1838
1839
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1840
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1841
    z1 <<= CONST_BITS;
1842
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1843
    z2 <<= CONST_BITS;
1844
1845
    tmp12 = z1 - z2;
1846
1847
    tmp21 = z3 + tmp12;
1848
    tmp24 = z3 - tmp12;
1849
1850
    tmp12 = z4 + z2;
1851
1852
    tmp20 = tmp10 + tmp12;
1853
    tmp25 = tmp10 - tmp12;
1854
1855
    tmp12 = z4 - z1 - z2;
1856
1857
    tmp22 = tmp11 + tmp12;
1858
    tmp23 = tmp11 - tmp12;
1859
1860
    /* Odd part */
1861
1862
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1863
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1864
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1865
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1866
1867
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1868
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1869
1870
    tmp10 = z1 + z3;
1871
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1872
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1873
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1874
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1875
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1876
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1877
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1878
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1879
1880
    z1 -= z4;
1881
    z2 -= z3;
1882
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1883
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1884
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1885
1886
    /* Final output stage */
1887
1888
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1889
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1890
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1891
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1892
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1893
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1894
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1895
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1896
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1897
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1898
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1899
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1900
  }
1901
1902
  /* Pass 2: process 12 rows from work array, store into output array. */
1903
1904
  wsptr = workspace;
1905
  for (ctr = 0; ctr < 12; ctr++) {
1906
    outptr = output_buf[ctr] + output_col;
1907
1908
    /* Even part */
1909
1910
    /* Add range center and fudge factor for final descale and range-limit. */
1911
    z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1912
    z3 <<= CONST_BITS;
1913
#if PASS2_BITS == 0
1914
    z3 += ONE << (CONST_BITS-1);
1915
#endif
1916
1917
    z4 = (INT32) wsptr[4];
1918
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1919
1920
    tmp10 = z3 + z4;
1921
    tmp11 = z3 - z4;
1922
1923
    z1 = (INT32) wsptr[2];
1924
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1925
    z1 <<= CONST_BITS;
1926
    z2 = (INT32) wsptr[6];
1927
    z2 <<= CONST_BITS;
1928
1929
    tmp12 = z1 - z2;
1930
1931
    tmp21 = z3 + tmp12;
1932
    tmp24 = z3 - tmp12;
1933
1934
    tmp12 = z4 + z2;
1935
1936
    tmp20 = tmp10 + tmp12;
1937
    tmp25 = tmp10 - tmp12;
1938
1939
    tmp12 = z4 - z1 - z2;
1940
1941
    tmp22 = tmp11 + tmp12;
1942
    tmp23 = tmp11 - tmp12;
1943
1944
    /* Odd part */
1945
1946
    z1 = (INT32) wsptr[1];
1947
    z2 = (INT32) wsptr[3];
1948
    z3 = (INT32) wsptr[5];
1949
    z4 = (INT32) wsptr[7];
1950
1951
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1952
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1953
1954
    tmp10 = z1 + z3;
1955
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1956
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1957
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1958
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1959
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1960
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1961
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1962
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1963
1964
    z1 -= z4;
1965
    z2 -= z3;
1966
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1967
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1968
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1969
1970
    /* Final output stage */
1971
1972
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1973
                 CONST_BITS+PASS2_BITS)
1974
           & RANGE_MASK];
1975
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1976
                 CONST_BITS+PASS2_BITS)
1977
           & RANGE_MASK];
1978
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1979
                 CONST_BITS+PASS2_BITS)
1980
           & RANGE_MASK];
1981
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1982
                 CONST_BITS+PASS2_BITS)
1983
           & RANGE_MASK];
1984
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1985
                 CONST_BITS+PASS2_BITS)
1986
           & RANGE_MASK];
1987
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1988
                 CONST_BITS+PASS2_BITS)
1989
           & RANGE_MASK];
1990
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1991
                 CONST_BITS+PASS2_BITS)
1992
           & RANGE_MASK];
1993
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1994
                 CONST_BITS+PASS2_BITS)
1995
           & RANGE_MASK];
1996
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1997
                 CONST_BITS+PASS2_BITS)
1998
           & RANGE_MASK];
1999
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2000
                 CONST_BITS+PASS2_BITS)
2001
           & RANGE_MASK];
2002
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2003
                 CONST_BITS+PASS2_BITS)
2004
           & RANGE_MASK];
2005
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2006
                 CONST_BITS+PASS2_BITS)
2007
           & RANGE_MASK];
2008
2009
    wsptr += 8;   /* advance pointer to next row */
2010
  }
2011
}
2012
2013
2014
/*
2015
 * Perform dequantization and inverse DCT on one block of coefficients,
2016
 * producing a 13x13 output block.
2017
 *
2018
 * Optimized algorithm with 29 multiplications in the 1-D kernel.
2019
 * cK represents sqrt(2) * cos(K*pi/26).
2020
 */
2021
2022
GLOBAL(void)
2023
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2024
     JCOEFPTR coef_block,
2025
     JSAMPARRAY output_buf, JDIMENSION output_col)
2026
{
2027
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2028
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2029
  INT32 z1, z2, z3, z4;
2030
  JCOEFPTR inptr;
2031
  ISLOW_MULT_TYPE * quantptr;
2032
  int * wsptr;
2033
  JSAMPROW outptr;
2034
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2035
  int ctr;
2036
  int workspace[8*13];  /* buffers data between passes */
2037
  SHIFT_TEMPS
2038
2039
  /* Pass 1: process columns from input, store into work array. */
2040
2041
  inptr = coef_block;
2042
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2043
  wsptr = workspace;
2044
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2045
    /* Even part */
2046
2047
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2048
    z1 <<= CONST_BITS;
2049
    /* Add fudge factor here for final descale. */
2050
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2051
2052
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2053
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2054
    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2055
2056
    tmp10 = z3 + z4;
2057
    tmp11 = z3 - z4;
2058
2059
    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
2060
    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
2061
2062
    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
2063
    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
2064
2065
    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
2066
    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
2067
2068
    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
2069
    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2070
2071
    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
2072
    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
2073
2074
    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2075
    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2076
2077
    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
2078
2079
    /* Odd part */
2080
2081
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2082
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2083
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2084
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2085
2086
    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
2087
    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
2088
    tmp15 = z1 + z4;
2089
    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
2090
    tmp10 = tmp11 + tmp12 + tmp13 -
2091
      MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
2092
    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
2093
    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2094
    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2095
    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
2096
    tmp11 += tmp14;
2097
    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2098
    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
2099
    tmp12 += tmp14;
2100
    tmp13 += tmp14;
2101
    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
2102
    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2103
      MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
2104
    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
2105
    tmp14 += z1;
2106
    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
2107
       MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
2108
2109
    /* Final output stage */
2110
2111
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2112
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2113
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2114
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2115
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2116
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2117
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2118
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2119
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2120
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2121
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2122
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2123
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2124
  }
2125
2126
  /* Pass 2: process 13 rows from work array, store into output array. */
2127
2128
  wsptr = workspace;
2129
  for (ctr = 0; ctr < 13; ctr++) {
2130
    outptr = output_buf[ctr] + output_col;
2131
2132
    /* Even part */
2133
2134
    /* Add range center and fudge factor for final descale and range-limit. */
2135
    z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2136
    z1 <<= CONST_BITS;
2137
#if PASS2_BITS == 0
2138
    z1 += ONE << (CONST_BITS-1);
2139
#endif
2140
2141
    z2 = (INT32) wsptr[2];
2142
    z3 = (INT32) wsptr[4];
2143
    z4 = (INT32) wsptr[6];
2144
2145
    tmp10 = z3 + z4;
2146
    tmp11 = z3 - z4;
2147
2148
    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
2149
    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
2150
2151
    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
2152
    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
2153
2154
    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
2155
    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
2156
2157
    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
2158
    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2159
2160
    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
2161
    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
2162
2163
    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2164
    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2165
2166
    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
2167
2168
    /* Odd part */
2169
2170
    z1 = (INT32) wsptr[1];
2171
    z2 = (INT32) wsptr[3];
2172
    z3 = (INT32) wsptr[5];
2173
    z4 = (INT32) wsptr[7];
2174
2175
    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
2176
    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
2177
    tmp15 = z1 + z4;
2178
    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
2179
    tmp10 = tmp11 + tmp12 + tmp13 -
2180
      MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
2181
    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
2182
    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2183
    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2184
    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
2185
    tmp11 += tmp14;
2186
    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2187
    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
2188
    tmp12 += tmp14;
2189
    tmp13 += tmp14;
2190
    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
2191
    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2192
      MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
2193
    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
2194
    tmp14 += z1;
2195
    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
2196
       MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
2197
2198
    /* Final output stage */
2199
2200
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2201
                 CONST_BITS+PASS2_BITS)
2202
           & RANGE_MASK];
2203
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2204
                 CONST_BITS+PASS2_BITS)
2205
           & RANGE_MASK];
2206
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2207
                 CONST_BITS+PASS2_BITS)
2208
           & RANGE_MASK];
2209
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2210
                 CONST_BITS+PASS2_BITS)
2211
           & RANGE_MASK];
2212
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2213
                 CONST_BITS+PASS2_BITS)
2214
           & RANGE_MASK];
2215
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2216
                 CONST_BITS+PASS2_BITS)
2217
           & RANGE_MASK];
2218
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2219
                 CONST_BITS+PASS2_BITS)
2220
           & RANGE_MASK];
2221
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2222
                 CONST_BITS+PASS2_BITS)
2223
           & RANGE_MASK];
2224
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2225
                 CONST_BITS+PASS2_BITS)
2226
           & RANGE_MASK];
2227
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2228
                 CONST_BITS+PASS2_BITS)
2229
           & RANGE_MASK];
2230
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2231
                 CONST_BITS+PASS2_BITS)
2232
           & RANGE_MASK];
2233
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2234
                 CONST_BITS+PASS2_BITS)
2235
           & RANGE_MASK];
2236
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
2237
                 CONST_BITS+PASS2_BITS)
2238
           & RANGE_MASK];
2239
2240
    wsptr += 8;   /* advance pointer to next row */
2241
  }
2242
}
2243
2244
2245
/*
2246
 * Perform dequantization and inverse DCT on one block of coefficients,
2247
 * producing a 14x14 output block.
2248
 *
2249
 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2250
 * cK represents sqrt(2) * cos(K*pi/28).
2251
 */
2252
2253
GLOBAL(void)
2254
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2255
     JCOEFPTR coef_block,
2256
     JSAMPARRAY output_buf, JDIMENSION output_col)
2257
{
2258
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2259
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2260
  INT32 z1, z2, z3, z4;
2261
  JCOEFPTR inptr;
2262
  ISLOW_MULT_TYPE * quantptr;
2263
  int * wsptr;
2264
  JSAMPROW outptr;
2265
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2266
  int ctr;
2267
  int workspace[8*14];  /* buffers data between passes */
2268
  SHIFT_TEMPS
2269
2270
  /* Pass 1: process columns from input, store into work array. */
2271
2272
  inptr = coef_block;
2273
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2274
  wsptr = workspace;
2275
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2276
    /* Even part */
2277
2278
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2279
    z1 <<= CONST_BITS;
2280
    /* Add fudge factor here for final descale. */
2281
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2282
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2283
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
2284
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
2285
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
2286
2287
    tmp10 = z1 + z2;
2288
    tmp11 = z1 + z3;
2289
    tmp12 = z1 - z4;
2290
2291
    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2292
      CONST_BITS-PASS1_BITS);
2293
2294
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2295
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2296
2297
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
2298
2299
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2300
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2301
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
2302
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
2303
2304
    tmp20 = tmp10 + tmp13;
2305
    tmp26 = tmp10 - tmp13;
2306
    tmp21 = tmp11 + tmp14;
2307
    tmp25 = tmp11 - tmp14;
2308
    tmp22 = tmp12 + tmp15;
2309
    tmp24 = tmp12 - tmp15;
2310
2311
    /* Odd part */
2312
2313
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2314
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2315
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2316
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2317
    tmp13 = z4 << CONST_BITS;
2318
2319
    tmp14 = z1 + z3;
2320
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
2321
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
2322
    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2323
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
2324
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
2325
    z1    -= z2;
2326
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
2327
    tmp16 += tmp15;
2328
    z1    += z4;
2329
    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2330
    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
2331
    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
2332
    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
2333
    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2334
    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
2335
2336
#if PASS1_BITS > 0
2337
    tmp13 = (z1 - z3) << PASS1_BITS;
2338
#else
2339
    tmp13 = z1 - z3;
2340
#endif
2341
2342
    /* Final output stage */
2343
2344
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2345
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2346
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2347
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2348
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2349
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2350
    wsptr[8*3]  = (int) (tmp23 + tmp13);
2351
    wsptr[8*10] = (int) (tmp23 - tmp13);
2352
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2353
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2354
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2355
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2356
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2357
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2358
  }
2359
2360
  /* Pass 2: process 14 rows from work array, store into output array. */
2361
2362
  wsptr = workspace;
2363
  for (ctr = 0; ctr < 14; ctr++) {
2364
    outptr = output_buf[ctr] + output_col;
2365
2366
    /* Even part */
2367
2368
    /* Add range center and fudge factor for final descale and range-limit. */
2369
    z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2370
    z1 <<= CONST_BITS;
2371
#if PASS2_BITS == 0
2372
    z1 += ONE << (CONST_BITS-1);
2373
#endif
2374
    z4 = (INT32) wsptr[4];
2375
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
2376
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
2377
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
2378
2379
    tmp10 = z1 + z2;
2380
    tmp11 = z1 + z3;
2381
    tmp12 = z1 - z4;
2382
2383
    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
2384
2385
    z1 = (INT32) wsptr[2];
2386
    z2 = (INT32) wsptr[6];
2387
2388
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
2389
2390
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2391
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2392
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
2393
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
2394
2395
    tmp20 = tmp10 + tmp13;
2396
    tmp26 = tmp10 - tmp13;
2397
    tmp21 = tmp11 + tmp14;
2398
    tmp25 = tmp11 - tmp14;
2399
    tmp22 = tmp12 + tmp15;
2400
    tmp24 = tmp12 - tmp15;
2401
2402
    /* Odd part */
2403
2404
    z1 = (INT32) wsptr[1];
2405
    z2 = (INT32) wsptr[3];
2406
    z3 = (INT32) wsptr[5];
2407
    z4 = (INT32) wsptr[7];
2408
    z4 <<= CONST_BITS;
2409
2410
    tmp14 = z1 + z3;
2411
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
2412
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
2413
    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2414
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
2415
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
2416
    z1    -= z2;
2417
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
2418
    tmp16 += tmp15;
2419
    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
2420
    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
2421
    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
2422
    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
2423
    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2424
    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
2425
2426
    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2427
2428
    /* Final output stage */
2429
2430
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2431
                 CONST_BITS+PASS2_BITS)
2432
           & RANGE_MASK];
2433
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2434
                 CONST_BITS+PASS2_BITS)
2435
           & RANGE_MASK];
2436
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2437
                 CONST_BITS+PASS2_BITS)
2438
           & RANGE_MASK];
2439
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2440
                 CONST_BITS+PASS2_BITS)
2441
           & RANGE_MASK];
2442
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2443
                 CONST_BITS+PASS2_BITS)
2444
           & RANGE_MASK];
2445
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2446
                 CONST_BITS+PASS2_BITS)
2447
           & RANGE_MASK];
2448
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2449
                 CONST_BITS+PASS2_BITS)
2450
           & RANGE_MASK];
2451
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2452
                 CONST_BITS+PASS2_BITS)
2453
           & RANGE_MASK];
2454
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2455
                 CONST_BITS+PASS2_BITS)
2456
           & RANGE_MASK];
2457
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2458
                 CONST_BITS+PASS2_BITS)
2459
           & RANGE_MASK];
2460
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2461
                 CONST_BITS+PASS2_BITS)
2462
           & RANGE_MASK];
2463
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2464
                 CONST_BITS+PASS2_BITS)
2465
           & RANGE_MASK];
2466
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2467
                 CONST_BITS+PASS2_BITS)
2468
           & RANGE_MASK];
2469
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2470
                 CONST_BITS+PASS2_BITS)
2471
           & RANGE_MASK];
2472
2473
    wsptr += 8;   /* advance pointer to next row */
2474
  }
2475
}
2476
2477
2478
/*
2479
 * Perform dequantization and inverse DCT on one block of coefficients,
2480
 * producing a 15x15 output block.
2481
 *
2482
 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2483
 * cK represents sqrt(2) * cos(K*pi/30).
2484
 */
2485
2486
GLOBAL(void)
2487
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2488
     JCOEFPTR coef_block,
2489
     JSAMPARRAY output_buf, JDIMENSION output_col)
2490
{
2491
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2492
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2493
  INT32 z1, z2, z3, z4;
2494
  JCOEFPTR inptr;
2495
  ISLOW_MULT_TYPE * quantptr;
2496
  int * wsptr;
2497
  JSAMPROW outptr;
2498
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2499
  int ctr;
2500
  int workspace[8*15];  /* buffers data between passes */
2501
  SHIFT_TEMPS
2502
2503
  /* Pass 1: process columns from input, store into work array. */
2504
2505
  inptr = coef_block;
2506
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2507
  wsptr = workspace;
2508
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2509
    /* Even part */
2510
2511
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2512
    z1 <<= CONST_BITS;
2513
    /* Add fudge factor here for final descale. */
2514
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2515
2516
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2517
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2518
    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2519
2520
    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2521
    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2522
2523
    tmp12 = z1 - tmp10;
2524
    tmp13 = z1 + tmp11;
2525
    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2526
2527
    z4 = z2 - z3;
2528
    z3 += z2;
2529
    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2530
    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2531
    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2532
2533
    tmp20 = tmp13 + tmp10 + tmp11;
2534
    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2535
2536
    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2537
    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2538
2539
    tmp25 = tmp13 - tmp10 - tmp11;
2540
    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2541
2542
    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2543
    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2544
2545
    tmp21 = tmp12 + tmp10 + tmp11;
2546
    tmp24 = tmp13 - tmp10 + tmp11;
2547
    tmp11 += tmp11;
2548
    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2549
    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2550
2551
    /* Odd part */
2552
2553
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2554
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2555
    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2556
    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2557
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2558
2559
    tmp13 = z2 - z4;
2560
    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2561
    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2562
    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2563
2564
    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2565
    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2566
    z2 = z1 - z4;
2567
    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2568
2569
    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2570
    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2571
    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2572
    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2573
    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2574
    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2575
2576
    /* Final output stage */
2577
2578
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2579
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2580
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2581
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2582
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2583
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2584
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2585
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2586
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2587
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2588
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2589
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2590
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2591
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2592
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2593
  }
2594
2595
  /* Pass 2: process 15 rows from work array, store into output array. */
2596
2597
  wsptr = workspace;
2598
  for (ctr = 0; ctr < 15; ctr++) {
2599
    outptr = output_buf[ctr] + output_col;
2600
2601
    /* Even part */
2602
2603
    /* Add range center and fudge factor for final descale and range-limit. */
2604
    z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2605
    z1 <<= CONST_BITS;
2606
#if PASS2_BITS == 0
2607
    z1 += ONE << (CONST_BITS-1);
2608
#endif
2609
2610
    z2 = (INT32) wsptr[2];
2611
    z3 = (INT32) wsptr[4];
2612
    z4 = (INT32) wsptr[6];
2613
2614
    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2615
    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2616
2617
    tmp12 = z1 - tmp10;
2618
    tmp13 = z1 + tmp11;
2619
    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2620
2621
    z4 = z2 - z3;
2622
    z3 += z2;
2623
    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2624
    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2625
    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2626
2627
    tmp20 = tmp13 + tmp10 + tmp11;
2628
    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2629
2630
    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2631
    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2632
2633
    tmp25 = tmp13 - tmp10 - tmp11;
2634
    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2635
2636
    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2637
    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2638
2639
    tmp21 = tmp12 + tmp10 + tmp11;
2640
    tmp24 = tmp13 - tmp10 + tmp11;
2641
    tmp11 += tmp11;
2642
    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2643
    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2644
2645
    /* Odd part */
2646
2647
    z1 = (INT32) wsptr[1];
2648
    z2 = (INT32) wsptr[3];
2649
    z4 = (INT32) wsptr[5];
2650
    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2651
    z4 = (INT32) wsptr[7];
2652
2653
    tmp13 = z2 - z4;
2654
    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2655
    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2656
    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2657
2658
    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2659
    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2660
    z2 = z1 - z4;
2661
    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2662
2663
    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2664
    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2665
    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2666
    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2667
    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2668
    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2669
2670
    /* Final output stage */
2671
2672
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2673
                 CONST_BITS+PASS2_BITS)
2674
           & RANGE_MASK];
2675
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2676
                 CONST_BITS+PASS2_BITS)
2677
           & RANGE_MASK];
2678
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2679
                 CONST_BITS+PASS2_BITS)
2680
           & RANGE_MASK];
2681
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2682
                 CONST_BITS+PASS2_BITS)
2683
           & RANGE_MASK];
2684
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2685
                 CONST_BITS+PASS2_BITS)
2686
           & RANGE_MASK];
2687
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2688
                 CONST_BITS+PASS2_BITS)
2689
           & RANGE_MASK];
2690
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2691
                 CONST_BITS+PASS2_BITS)
2692
           & RANGE_MASK];
2693
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2694
                 CONST_BITS+PASS2_BITS)
2695
           & RANGE_MASK];
2696
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2697
                 CONST_BITS+PASS2_BITS)
2698
           & RANGE_MASK];
2699
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2700
                 CONST_BITS+PASS2_BITS)
2701
           & RANGE_MASK];
2702
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2703
                 CONST_BITS+PASS2_BITS)
2704
           & RANGE_MASK];
2705
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2706
                 CONST_BITS+PASS2_BITS)
2707
           & RANGE_MASK];
2708
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2709
                 CONST_BITS+PASS2_BITS)
2710
           & RANGE_MASK];
2711
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2712
                 CONST_BITS+PASS2_BITS)
2713
           & RANGE_MASK];
2714
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
2715
                 CONST_BITS+PASS2_BITS)
2716
           & RANGE_MASK];
2717
2718
    wsptr += 8;   /* advance pointer to next row */
2719
  }
2720
}
2721
2722
2723
/*
2724
 * Perform dequantization and inverse DCT on one block of coefficients,
2725
 * producing a 16x16 output block.
2726
 *
2727
 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2728
 * cK represents sqrt(2) * cos(K*pi/32).
2729
 */
2730
2731
GLOBAL(void)
2732
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2733
     JCOEFPTR coef_block,
2734
     JSAMPARRAY output_buf, JDIMENSION output_col)
2735
{
2736
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2737
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2738
  INT32 z1, z2, z3, z4;
2739
  JCOEFPTR inptr;
2740
  ISLOW_MULT_TYPE * quantptr;
2741
  int * wsptr;
2742
  JSAMPROW outptr;
2743
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2744
  int ctr;
2745
  int workspace[8*16];  /* buffers data between passes */
2746
  SHIFT_TEMPS
2747
2748
  /* Pass 1: process columns from input, store into work array. */
2749
2750
  inptr = coef_block;
2751
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2752
  wsptr = workspace;
2753
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2754
    /* Even part */
2755
2756
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2757
    tmp0 <<= CONST_BITS;
2758
    /* Add fudge factor here for final descale. */
2759
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2760
2761
    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2762
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2763
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2764
2765
    tmp10 = tmp0 + tmp1;
2766
    tmp11 = tmp0 - tmp1;
2767
    tmp12 = tmp0 + tmp2;
2768
    tmp13 = tmp0 - tmp2;
2769
2770
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2771
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2772
    z3 = z1 - z2;
2773
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2774
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2775
2776
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2777
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2778
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2779
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2780
2781
    tmp20 = tmp10 + tmp0;
2782
    tmp27 = tmp10 - tmp0;
2783
    tmp21 = tmp12 + tmp1;
2784
    tmp26 = tmp12 - tmp1;
2785
    tmp22 = tmp13 + tmp2;
2786
    tmp25 = tmp13 - tmp2;
2787
    tmp23 = tmp11 + tmp3;
2788
    tmp24 = tmp11 - tmp3;
2789
2790
    /* Odd part */
2791
2792
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2793
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2794
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2795
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2796
2797
    tmp11 = z1 + z3;
2798
2799
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2800
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2801
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2802
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2803
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2804
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2805
    tmp0  = tmp1 + tmp2 + tmp3 -
2806
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2807
    tmp13 = tmp10 + tmp11 + tmp12 -
2808
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2809
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2810
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2811
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2812
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2813
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2814
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2815
    z2    += z4;
2816
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2817
    tmp1  += z1;
2818
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2819
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2820
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2821
    tmp12 += z2;
2822
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2823
    tmp2  += z2;
2824
    tmp3  += z2;
2825
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2826
    tmp10 += z2;
2827
    tmp11 += z2;
2828
2829
    /* Final output stage */
2830
2831
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
2832
    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
2833
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
2834
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
2835
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
2836
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
2837
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
2838
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
2839
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2840
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2841
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2842
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2843
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2844
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2845
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2846
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2847
  }
2848
2849
  /* Pass 2: process 16 rows from work array, store into output array. */
2850
2851
  wsptr = workspace;
2852
  for (ctr = 0; ctr < 16; ctr++) {
2853
    outptr = output_buf[ctr] + output_col;
2854
2855
    /* Even part */
2856
2857
    /* Add range center and fudge factor for final descale and range-limit. */
2858
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
2859
    tmp0 <<= CONST_BITS;
2860
#if PASS2_BITS == 0
2861
    tmp0 += ONE << (CONST_BITS-1);
2862
#endif
2863
2864
    z1 = (INT32) wsptr[4];
2865
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2866
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2867
2868
    tmp10 = tmp0 + tmp1;
2869
    tmp11 = tmp0 - tmp1;
2870
    tmp12 = tmp0 + tmp2;
2871
    tmp13 = tmp0 - tmp2;
2872
2873
    z1 = (INT32) wsptr[2];
2874
    z2 = (INT32) wsptr[6];
2875
    z3 = z1 - z2;
2876
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2877
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2878
2879
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2880
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2881
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2882
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2883
2884
    tmp20 = tmp10 + tmp0;
2885
    tmp27 = tmp10 - tmp0;
2886
    tmp21 = tmp12 + tmp1;
2887
    tmp26 = tmp12 - tmp1;
2888
    tmp22 = tmp13 + tmp2;
2889
    tmp25 = tmp13 - tmp2;
2890
    tmp23 = tmp11 + tmp3;
2891
    tmp24 = tmp11 - tmp3;
2892
2893
    /* Odd part */
2894
2895
    z1 = (INT32) wsptr[1];
2896
    z2 = (INT32) wsptr[3];
2897
    z3 = (INT32) wsptr[5];
2898
    z4 = (INT32) wsptr[7];
2899
2900
    tmp11 = z1 + z3;
2901
2902
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2903
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2904
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2905
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2906
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2907
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2908
    tmp0  = tmp1 + tmp2 + tmp3 -
2909
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2910
    tmp13 = tmp10 + tmp11 + tmp12 -
2911
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2912
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2913
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2914
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2915
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2916
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2917
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2918
    z2    += z4;
2919
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2920
    tmp1  += z1;
2921
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2922
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2923
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2924
    tmp12 += z2;
2925
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2926
    tmp2  += z2;
2927
    tmp3  += z2;
2928
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2929
    tmp10 += z2;
2930
    tmp11 += z2;
2931
2932
    /* Final output stage */
2933
2934
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2935
                 CONST_BITS+PASS2_BITS)
2936
           & RANGE_MASK];
2937
    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2938
                 CONST_BITS+PASS2_BITS)
2939
           & RANGE_MASK];
2940
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2941
                 CONST_BITS+PASS2_BITS)
2942
           & RANGE_MASK];
2943
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2944
                 CONST_BITS+PASS2_BITS)
2945
           & RANGE_MASK];
2946
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2947
                 CONST_BITS+PASS2_BITS)
2948
           & RANGE_MASK];
2949
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2950
                 CONST_BITS+PASS2_BITS)
2951
           & RANGE_MASK];
2952
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2953
                 CONST_BITS+PASS2_BITS)
2954
           & RANGE_MASK];
2955
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2956
                 CONST_BITS+PASS2_BITS)
2957
           & RANGE_MASK];
2958
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2959
                 CONST_BITS+PASS2_BITS)
2960
           & RANGE_MASK];
2961
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2962
                 CONST_BITS+PASS2_BITS)
2963
           & RANGE_MASK];
2964
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2965
                 CONST_BITS+PASS2_BITS)
2966
           & RANGE_MASK];
2967
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2968
                 CONST_BITS+PASS2_BITS)
2969
           & RANGE_MASK];
2970
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2971
                 CONST_BITS+PASS2_BITS)
2972
           & RANGE_MASK];
2973
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2974
                 CONST_BITS+PASS2_BITS)
2975
           & RANGE_MASK];
2976
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2977
                 CONST_BITS+PASS2_BITS)
2978
           & RANGE_MASK];
2979
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2980
                 CONST_BITS+PASS2_BITS)
2981
           & RANGE_MASK];
2982
2983
    wsptr += 8;   /* advance pointer to next row */
2984
  }
2985
}
2986
2987
2988
/*
2989
 * Perform dequantization and inverse DCT on one block of coefficients,
2990
 * producing a 16x8 output block.
2991
 *
2992
 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2993
 */
2994
2995
GLOBAL(void)
2996
jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2997
    JCOEFPTR coef_block,
2998
    JSAMPARRAY output_buf, JDIMENSION output_col)
2999
{
3000
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
3001
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
3002
  INT32 z1, z2, z3, z4;
3003
  JCOEFPTR inptr;
3004
  ISLOW_MULT_TYPE * quantptr;
3005
  int * wsptr;
3006
  JSAMPROW outptr;
3007
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3008
  int ctr;
3009
  int workspace[8*8]; /* buffers data between passes */
3010
  SHIFT_TEMPS
3011
3012
  /* Pass 1: process columns from input, store into work array.
3013
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
3014
   * furthermore, we scale the results by 2**PASS1_BITS.
3015
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3016
   */
3017
3018
  inptr = coef_block;
3019
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3020
  wsptr = workspace;
3021
  for (ctr = DCTSIZE; ctr > 0; ctr--) {
3022
    /* Due to quantization, we will usually find that many of the input
3023
     * coefficients are zero, especially the AC terms.  We can exploit this
3024
     * by short-circuiting the IDCT calculation for any column in which all
3025
     * the AC terms are zero.  In that case each output is equal to the
3026
     * DC coefficient (with scale factor as needed).
3027
     * With typical images and quantization tables, half or more of the
3028
     * column DCT calculations can be simplified this way.
3029
     */
3030
3031
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
3032
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
3033
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
3034
  inptr[DCTSIZE*7] == 0) {
3035
      /* AC terms all zero */
3036
#if PASS1_BITS > 0
3037
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
3038
#else
3039
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3040
#endif
3041
3042
      wsptr[DCTSIZE*0] = dcval;
3043
      wsptr[DCTSIZE*1] = dcval;
3044
      wsptr[DCTSIZE*2] = dcval;
3045
      wsptr[DCTSIZE*3] = dcval;
3046
      wsptr[DCTSIZE*4] = dcval;
3047
      wsptr[DCTSIZE*5] = dcval;
3048
      wsptr[DCTSIZE*6] = dcval;
3049
      wsptr[DCTSIZE*7] = dcval;
3050
3051
      inptr++;      /* advance pointers to next column */
3052
      quantptr++;
3053
      wsptr++;
3054
      continue;
3055
    }
3056
3057
    /* Even part: reverse the even part of the forward DCT.
3058
     * The rotator is c(-6).
3059
     */
3060
3061
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3062
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3063
    z2 <<= CONST_BITS;
3064
    z3 <<= CONST_BITS;
3065
    /* Add fudge factor here for final descale. */
3066
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
3067
3068
    tmp0 = z2 + z3;
3069
    tmp1 = z2 - z3;
3070
3071
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3072
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3073
3074
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
3075
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
3076
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
3077
3078
    tmp10 = tmp0 + tmp2;
3079
    tmp13 = tmp0 - tmp2;
3080
    tmp11 = tmp1 + tmp3;
3081
    tmp12 = tmp1 - tmp3;
3082
3083
    /* Odd part per figure 8; the matrix is unitary and hence its
3084
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
3085
     */
3086
3087
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
3088
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3089
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3090
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3091
3092
    z2 = tmp0 + tmp2;
3093
    z3 = tmp1 + tmp3;
3094
3095
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
3096
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
3097
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
3098
    z2 += z1;
3099
    z3 += z1;
3100
3101
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3102
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
3103
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
3104
    tmp0 += z1 + z2;
3105
    tmp3 += z1 + z3;
3106
3107
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3108
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
3109
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
3110
    tmp1 += z1 + z3;
3111
    tmp2 += z1 + z2;
3112
3113
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3114
3115
    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
3116
    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
3117
    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
3118
    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
3119
    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
3120
    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
3121
    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
3122
    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
3123
3124
    inptr++;      /* advance pointers to next column */
3125
    quantptr++;
3126
    wsptr++;
3127
  }
3128
3129
  /* Pass 2: process 8 rows from work array, store into output array.
3130
   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3131
   */
3132
3133
  wsptr = workspace;
3134
  for (ctr = 0; ctr < 8; ctr++) {
3135
    outptr = output_buf[ctr] + output_col;
3136
3137
    /* Even part */
3138
3139
    /* Add range center and fudge factor for final descale and range-limit. */
3140
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
3141
    tmp0 <<= CONST_BITS;
3142
#if PASS2_BITS == 0
3143
    tmp0 += ONE << (CONST_BITS-1);
3144
#endif
3145
3146
    z1 = (INT32) wsptr[4];
3147
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
3148
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
3149
3150
    tmp10 = tmp0 + tmp1;
3151
    tmp11 = tmp0 - tmp1;
3152
    tmp12 = tmp0 + tmp2;
3153
    tmp13 = tmp0 - tmp2;
3154
3155
    z1 = (INT32) wsptr[2];
3156
    z2 = (INT32) wsptr[6];
3157
    z3 = z1 - z2;
3158
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
3159
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
3160
3161
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
3162
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
3163
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3164
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3165
3166
    tmp20 = tmp10 + tmp0;
3167
    tmp27 = tmp10 - tmp0;
3168
    tmp21 = tmp12 + tmp1;
3169
    tmp26 = tmp12 - tmp1;
3170
    tmp22 = tmp13 + tmp2;
3171
    tmp25 = tmp13 - tmp2;
3172
    tmp23 = tmp11 + tmp3;
3173
    tmp24 = tmp11 - tmp3;
3174
3175
    /* Odd part */
3176
3177
    z1 = (INT32) wsptr[1];
3178
    z2 = (INT32) wsptr[3];
3179
    z3 = (INT32) wsptr[5];
3180
    z4 = (INT32) wsptr[7];
3181
3182
    tmp11 = z1 + z3;
3183
3184
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
3185
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
3186
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
3187
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
3188
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
3189
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
3190
    tmp0  = tmp1 + tmp2 + tmp3 -
3191
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
3192
    tmp13 = tmp10 + tmp11 + tmp12 -
3193
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
3194
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
3195
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
3196
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
3197
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
3198
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
3199
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
3200
    z2    += z4;
3201
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
3202
    tmp1  += z1;
3203
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
3204
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
3205
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
3206
    tmp12 += z2;
3207
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3208
    tmp2  += z2;
3209
    tmp3  += z2;
3210
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
3211
    tmp10 += z2;
3212
    tmp11 += z2;
3213
3214
    /* Final output stage */
3215
3216
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3217
                 CONST_BITS+PASS2_BITS)
3218
           & RANGE_MASK];
3219
    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3220
                 CONST_BITS+PASS2_BITS)
3221
           & RANGE_MASK];
3222
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3223
                 CONST_BITS+PASS2_BITS)
3224
           & RANGE_MASK];
3225
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3226
                 CONST_BITS+PASS2_BITS)
3227
           & RANGE_MASK];
3228
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3229
                 CONST_BITS+PASS2_BITS)
3230
           & RANGE_MASK];
3231
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3232
                 CONST_BITS+PASS2_BITS)
3233
           & RANGE_MASK];
3234
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3235
                 CONST_BITS+PASS2_BITS)
3236
           & RANGE_MASK];
3237
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3238
                 CONST_BITS+PASS2_BITS)
3239
           & RANGE_MASK];
3240
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3241
                 CONST_BITS+PASS2_BITS)
3242
           & RANGE_MASK];
3243
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3244
                 CONST_BITS+PASS2_BITS)
3245
           & RANGE_MASK];
3246
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3247
                 CONST_BITS+PASS2_BITS)
3248
           & RANGE_MASK];
3249
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3250
                 CONST_BITS+PASS2_BITS)
3251
           & RANGE_MASK];
3252
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3253
                 CONST_BITS+PASS2_BITS)
3254
           & RANGE_MASK];
3255
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3256
                 CONST_BITS+PASS2_BITS)
3257
           & RANGE_MASK];
3258
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3259
                 CONST_BITS+PASS2_BITS)
3260
           & RANGE_MASK];
3261
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3262
                 CONST_BITS+PASS2_BITS)
3263
           & RANGE_MASK];
3264
3265
    wsptr += 8;   /* advance pointer to next row */
3266
  }
3267
}
3268
3269
3270
/*
3271
 * Perform dequantization and inverse DCT on one block of coefficients,
3272
 * producing a 14x7 output block.
3273
 *
3274
 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3275
 */
3276
3277
GLOBAL(void)
3278
jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3279
    JCOEFPTR coef_block,
3280
    JSAMPARRAY output_buf, JDIMENSION output_col)
3281
{
3282
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3283
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3284
  INT32 z1, z2, z3, z4;
3285
  JCOEFPTR inptr;
3286
  ISLOW_MULT_TYPE * quantptr;
3287
  int * wsptr;
3288
  JSAMPROW outptr;
3289
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3290
  int ctr;
3291
  int workspace[8*7]; /* buffers data between passes */
3292
  SHIFT_TEMPS
3293
3294
  /* Pass 1: process columns from input, store into work array.
3295
   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3296
   */
3297
3298
  inptr = coef_block;
3299
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3300
  wsptr = workspace;
3301
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3302
    /* Even part */
3303
3304
    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3305
    tmp23 <<= CONST_BITS;
3306
    /* Add fudge factor here for final descale. */
3307
    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3308
3309
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3310
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3311
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3312
3313
    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
3314
    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
3315
    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3316
    tmp10 = z1 + z3;
3317
    z2 -= tmp10;
3318
    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3319
    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
3320
    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
3321
    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
3322
3323
    /* Odd part */
3324
3325
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3326
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3327
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3328
3329
    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
3330
    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
3331
    tmp10 = tmp11 - tmp12;
3332
    tmp11 += tmp12;
3333
    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
3334
    tmp11 += tmp12;
3335
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
3336
    tmp10 += z2;
3337
    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
3338
3339
    /* Final output stage */
3340
3341
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3342
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3343
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3344
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3345
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3346
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3347
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3348
  }
3349
3350
  /* Pass 2: process 7 rows from work array, store into output array.
3351
   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3352
   */
3353
3354
  wsptr = workspace;
3355
  for (ctr = 0; ctr < 7; ctr++) {
3356
    outptr = output_buf[ctr] + output_col;
3357
3358
    /* Even part */
3359
3360
    /* Add range center and fudge factor for final descale and range-limit. */
3361
    z1 = (INT32) wsptr[0] + PASS2_OFFSET;
3362
    z1 <<= CONST_BITS;
3363
#if PASS2_BITS == 0
3364
    z1 += ONE << (CONST_BITS-1);
3365
#endif
3366
    z4 = (INT32) wsptr[4];
3367
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
3368
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
3369
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
3370
3371
    tmp10 = z1 + z2;
3372
    tmp11 = z1 + z3;
3373
    tmp12 = z1 - z4;
3374
3375
    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
3376
3377
    z1 = (INT32) wsptr[2];
3378
    z2 = (INT32) wsptr[6];
3379
3380
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
3381
3382
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3383
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3384
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
3385
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
3386
3387
    tmp20 = tmp10 + tmp13;
3388
    tmp26 = tmp10 - tmp13;
3389
    tmp21 = tmp11 + tmp14;
3390
    tmp25 = tmp11 - tmp14;
3391
    tmp22 = tmp12 + tmp15;
3392
    tmp24 = tmp12 - tmp15;
3393
3394
    /* Odd part */
3395
3396
    z1 = (INT32) wsptr[1];
3397
    z2 = (INT32) wsptr[3];
3398
    z3 = (INT32) wsptr[5];
3399
    z4 = (INT32) wsptr[7];
3400
    z4 <<= CONST_BITS;
3401
3402
    tmp14 = z1 + z3;
3403
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
3404
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
3405
    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3406
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
3407
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
3408
    z1    -= z2;
3409
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
3410
    tmp16 += tmp15;
3411
    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
3412
    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
3413
    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
3414
    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
3415
    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3416
    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
3417
3418
    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3419
3420
    /* Final output stage */
3421
3422
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3423
                 CONST_BITS+PASS2_BITS)
3424
           & RANGE_MASK];
3425
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3426
                 CONST_BITS+PASS2_BITS)
3427
           & RANGE_MASK];
3428
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3429
                 CONST_BITS+PASS2_BITS)
3430
           & RANGE_MASK];
3431
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3432
                 CONST_BITS+PASS2_BITS)
3433
           & RANGE_MASK];
3434
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3435
                 CONST_BITS+PASS2_BITS)
3436
           & RANGE_MASK];
3437
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3438
                 CONST_BITS+PASS2_BITS)
3439
           & RANGE_MASK];
3440
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3441
                 CONST_BITS+PASS2_BITS)
3442
           & RANGE_MASK];
3443
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3444
                 CONST_BITS+PASS2_BITS)
3445
           & RANGE_MASK];
3446
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3447
                 CONST_BITS+PASS2_BITS)
3448
           & RANGE_MASK];
3449
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3450
                 CONST_BITS+PASS2_BITS)
3451
           & RANGE_MASK];
3452
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3453
                 CONST_BITS+PASS2_BITS)
3454
           & RANGE_MASK];
3455
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3456
                 CONST_BITS+PASS2_BITS)
3457
           & RANGE_MASK];
3458
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3459
                 CONST_BITS+PASS2_BITS)
3460
           & RANGE_MASK];
3461
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3462
                 CONST_BITS+PASS2_BITS)
3463
           & RANGE_MASK];
3464
3465
    wsptr += 8;   /* advance pointer to next row */
3466
  }
3467
}
3468
3469
3470
/*
3471
 * Perform dequantization and inverse DCT on one block of coefficients,
3472
 * producing a 12x6 output block.
3473
 *
3474
 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3475
 */
3476
3477
GLOBAL(void)
3478
jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3479
    JCOEFPTR coef_block,
3480
    JSAMPARRAY output_buf, JDIMENSION output_col)
3481
{
3482
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3483
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3484
  INT32 z1, z2, z3, z4;
3485
  JCOEFPTR inptr;
3486
  ISLOW_MULT_TYPE * quantptr;
3487
  int * wsptr;
3488
  JSAMPROW outptr;
3489
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3490
  int ctr;
3491
  int workspace[8*6]; /* buffers data between passes */
3492
  SHIFT_TEMPS
3493
3494
  /* Pass 1: process columns from input, store into work array.
3495
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3496
   */
3497
3498
  inptr = coef_block;
3499
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3500
  wsptr = workspace;
3501
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3502
    /* Even part */
3503
3504
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3505
    tmp10 <<= CONST_BITS;
3506
    /* Add fudge factor here for final descale. */
3507
    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3508
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3509
    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
3510
    tmp11 = tmp10 + tmp20;
3511
    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3512
    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3513
    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
3514
    tmp20 = tmp11 + tmp10;
3515
    tmp22 = tmp11 - tmp10;
3516
3517
    /* Odd part */
3518
3519
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3520
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3521
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3522
    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3523
    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3524
    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3525
#if PASS1_BITS > 0
3526
    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3527
#else
3528
    tmp11 = z1 - z2 - z3;
3529
#endif
3530
3531
    /* Final output stage */
3532
3533
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3534
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3535
    wsptr[8*1] = (int) (tmp21 + tmp11);
3536
    wsptr[8*4] = (int) (tmp21 - tmp11);
3537
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3538
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3539
  }
3540
3541
  /* Pass 2: process 6 rows from work array, store into output array.
3542
   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3543
   */
3544
3545
  wsptr = workspace;
3546
  for (ctr = 0; ctr < 6; ctr++) {
3547
    outptr = output_buf[ctr] + output_col;
3548
3549
    /* Even part */
3550
3551
    /* Add range center and fudge factor for final descale and range-limit. */
3552
    z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3553
    z3 <<= CONST_BITS;
3554
#if PASS2_BITS == 0
3555
    z3 += ONE << (CONST_BITS-1);
3556
#endif
3557
3558
    z4 = (INT32) wsptr[4];
3559
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3560
3561
    tmp10 = z3 + z4;
3562
    tmp11 = z3 - z4;
3563
3564
    z1 = (INT32) wsptr[2];
3565
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3566
    z1 <<= CONST_BITS;
3567
    z2 = (INT32) wsptr[6];
3568
    z2 <<= CONST_BITS;
3569
3570
    tmp12 = z1 - z2;
3571
3572
    tmp21 = z3 + tmp12;
3573
    tmp24 = z3 - tmp12;
3574
3575
    tmp12 = z4 + z2;
3576
3577
    tmp20 = tmp10 + tmp12;
3578
    tmp25 = tmp10 - tmp12;
3579
3580
    tmp12 = z4 - z1 - z2;
3581
3582
    tmp22 = tmp11 + tmp12;
3583
    tmp23 = tmp11 - tmp12;
3584
3585
    /* Odd part */
3586
3587
    z1 = (INT32) wsptr[1];
3588
    z2 = (INT32) wsptr[3];
3589
    z3 = (INT32) wsptr[5];
3590
    z4 = (INT32) wsptr[7];
3591
3592
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
3593
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
3594
3595
    tmp10 = z1 + z3;
3596
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
3597
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
3598
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
3599
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
3600
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3601
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3602
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
3603
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
3604
3605
    z1 -= z4;
3606
    z2 -= z3;
3607
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
3608
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
3609
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
3610
3611
    /* Final output stage */
3612
3613
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3614
                 CONST_BITS+PASS2_BITS)
3615
           & RANGE_MASK];
3616
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3617
                 CONST_BITS+PASS2_BITS)
3618
           & RANGE_MASK];
3619
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3620
                 CONST_BITS+PASS2_BITS)
3621
           & RANGE_MASK];
3622
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3623
                 CONST_BITS+PASS2_BITS)
3624
           & RANGE_MASK];
3625
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3626
                 CONST_BITS+PASS2_BITS)
3627
           & RANGE_MASK];
3628
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3629
                 CONST_BITS+PASS2_BITS)
3630
           & RANGE_MASK];
3631
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3632
                 CONST_BITS+PASS2_BITS)
3633
           & RANGE_MASK];
3634
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3635
                 CONST_BITS+PASS2_BITS)
3636
           & RANGE_MASK];
3637
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3638
                 CONST_BITS+PASS2_BITS)
3639
           & RANGE_MASK];
3640
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3641
                 CONST_BITS+PASS2_BITS)
3642
           & RANGE_MASK];
3643
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3644
                 CONST_BITS+PASS2_BITS)
3645
           & RANGE_MASK];
3646
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3647
                 CONST_BITS+PASS2_BITS)
3648
           & RANGE_MASK];
3649
3650
    wsptr += 8;   /* advance pointer to next row */
3651
  }
3652
}
3653
3654
3655
/*
3656
 * Perform dequantization and inverse DCT on one block of coefficients,
3657
 * producing a 10x5 output block.
3658
 *
3659
 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3660
 */
3661
3662
GLOBAL(void)
3663
jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3664
    JCOEFPTR coef_block,
3665
    JSAMPARRAY output_buf, JDIMENSION output_col)
3666
{
3667
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3668
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3669
  INT32 z1, z2, z3, z4;
3670
  JCOEFPTR inptr;
3671
  ISLOW_MULT_TYPE * quantptr;
3672
  int * wsptr;
3673
  JSAMPROW outptr;
3674
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3675
  int ctr;
3676
  int workspace[8*5]; /* buffers data between passes */
3677
  SHIFT_TEMPS
3678
3679
  /* Pass 1: process columns from input, store into work array.
3680
   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3681
   */
3682
3683
  inptr = coef_block;
3684
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3685
  wsptr = workspace;
3686
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3687
    /* Even part */
3688
3689
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3690
    tmp12 <<= CONST_BITS;
3691
    /* Add fudge factor here for final descale. */
3692
    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3693
    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3694
    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3695
    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3696
    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3697
    z3 = tmp12 + z2;
3698
    tmp10 = z3 + z1;
3699
    tmp11 = z3 - z1;
3700
    tmp12 -= z2 << 2;
3701
3702
    /* Odd part */
3703
3704
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3705
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3706
3707
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
3708
    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
3709
    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
3710
3711
    /* Final output stage */
3712
3713
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3714
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3715
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3716
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3717
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3718
  }
3719
3720
  /* Pass 2: process 5 rows from work array, store into output array.
3721
   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3722
   */
3723
3724
  wsptr = workspace;
3725
  for (ctr = 0; ctr < 5; ctr++) {
3726
    outptr = output_buf[ctr] + output_col;
3727
3728
    /* Even part */
3729
3730
    /* Add range center and fudge factor for final descale and range-limit. */
3731
    z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3732
    z3 <<= CONST_BITS;
3733
#if PASS2_BITS == 0
3734
    z3 += ONE << (CONST_BITS-1);
3735
#endif
3736
    z4 = (INT32) wsptr[4];
3737
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
3738
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
3739
    tmp10 = z3 + z1;
3740
    tmp11 = z3 - z2;
3741
3742
    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
3743
3744
    z2 = (INT32) wsptr[2];
3745
    z3 = (INT32) wsptr[6];
3746
3747
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
3748
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3749
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3750
3751
    tmp20 = tmp10 + tmp12;
3752
    tmp24 = tmp10 - tmp12;
3753
    tmp21 = tmp11 + tmp13;
3754
    tmp23 = tmp11 - tmp13;
3755
3756
    /* Odd part */
3757
3758
    z1 = (INT32) wsptr[1];
3759
    z2 = (INT32) wsptr[3];
3760
    z3 = (INT32) wsptr[5];
3761
    z3 <<= CONST_BITS;
3762
    z4 = (INT32) wsptr[7];
3763
3764
    tmp11 = z2 + z4;
3765
    tmp13 = z2 - z4;
3766
3767
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
3768
3769
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
3770
    z4 = z3 + tmp12;
3771
3772
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3773
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3774
3775
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
3776
    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3777
3778
    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3779
3780
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3781
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3782
3783
    /* Final output stage */
3784
3785
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3786
                CONST_BITS+PASS2_BITS)
3787
          & RANGE_MASK];
3788
    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3789
                CONST_BITS+PASS2_BITS)
3790
          & RANGE_MASK];
3791
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3792
                CONST_BITS+PASS2_BITS)
3793
          & RANGE_MASK];
3794
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3795
                CONST_BITS+PASS2_BITS)
3796
          & RANGE_MASK];
3797
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3798
                CONST_BITS+PASS2_BITS)
3799
          & RANGE_MASK];
3800
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3801
                CONST_BITS+PASS2_BITS)
3802
          & RANGE_MASK];
3803
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3804
                CONST_BITS+PASS2_BITS)
3805
          & RANGE_MASK];
3806
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3807
                CONST_BITS+PASS2_BITS)
3808
          & RANGE_MASK];
3809
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3810
                CONST_BITS+PASS2_BITS)
3811
          & RANGE_MASK];
3812
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3813
                CONST_BITS+PASS2_BITS)
3814
          & RANGE_MASK];
3815
3816
    wsptr += 8;   /* advance pointer to next row */
3817
  }
3818
}
3819
3820
3821
/*
3822
 * Perform dequantization and inverse DCT on one block of coefficients,
3823
 * producing an 8x4 output block.
3824
 *
3825
 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3826
 */
3827
3828
GLOBAL(void)
3829
jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3830
         JCOEFPTR coef_block,
3831
         JSAMPARRAY output_buf, JDIMENSION output_col)
3832
{
3833
  INT32 tmp0, tmp1, tmp2, tmp3;
3834
  INT32 tmp10, tmp11, tmp12, tmp13;
3835
  INT32 z1, z2, z3;
3836
  JCOEFPTR inptr;
3837
  ISLOW_MULT_TYPE * quantptr;
3838
  int * wsptr;
3839
  JSAMPROW outptr;
3840
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3841
  int ctr;
3842
  int workspace[8*4]; /* buffers data between passes */
3843
  SHIFT_TEMPS
3844
3845
  /* Pass 1: process columns from input, store into work array.
3846
   * 4-point IDCT kernel,
3847
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3848
   */
3849
3850
  inptr = coef_block;
3851
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3852
  wsptr = workspace;
3853
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3854
    /* Even part */
3855
3856
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3857
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3858
3859
#if PASS1_BITS > 0
3860
    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3861
    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3862
#else
3863
    tmp10 = tmp0 + tmp2;
3864
    tmp12 = tmp0 - tmp2;
3865
#endif
3866
3867
    /* Odd part */
3868
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3869
3870
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3871
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3872
3873
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
3874
    /* Add fudge factor here for final descale. */
3875
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3876
    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3877
           CONST_BITS-PASS1_BITS);
3878
    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3879
           CONST_BITS-PASS1_BITS);
3880
3881
    /* Final output stage */
3882
3883
    wsptr[8*0] = (int) (tmp10 + tmp0);
3884
    wsptr[8*3] = (int) (tmp10 - tmp0);
3885
    wsptr[8*1] = (int) (tmp12 + tmp2);
3886
    wsptr[8*2] = (int) (tmp12 - tmp2);
3887
  }
3888
3889
  /* Pass 2: process rows from work array, store into output array.
3890
   * Note that we must descale the results by a factor of 8 == 2**3,
3891
   * which is folded into the PASS2_BITS value.
3892
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3893
   */
3894
3895
  wsptr = workspace;
3896
  for (ctr = 0; ctr < 4; ctr++) {
3897
    outptr = output_buf[ctr] + output_col;
3898
3899
    /* Even part: reverse the even part of the forward DCT.
3900
     * The rotator is c(-6).
3901
     */
3902
3903
    /* Add range center and fudge factor for final descale and range-limit. */
3904
    z2 = (INT32) wsptr[0] + PASS2_OFFSET;
3905
    z3 = (INT32) wsptr[4];
3906
    z2 <<= CONST_BITS;
3907
    z3 <<= CONST_BITS;
3908
#if PASS2_BITS == 0
3909
    /* Add fudge factor here for final descale. */
3910
    z2 += ONE << (CONST_BITS-1);
3911
#endif
3912
3913
    tmp0 = z2 + z3;
3914
    tmp1 = z2 - z3;
3915
3916
    z2 = (INT32) wsptr[2];
3917
    z3 = (INT32) wsptr[6];
3918
3919
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
3920
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
3921
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
3922
3923
    tmp10 = tmp0 + tmp2;
3924
    tmp13 = tmp0 - tmp2;
3925
    tmp11 = tmp1 + tmp3;
3926
    tmp12 = tmp1 - tmp3;
3927
3928
    /* Odd part per figure 8; the matrix is unitary and hence its
3929
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
3930
     */
3931
3932
    tmp0 = (INT32) wsptr[7];
3933
    tmp1 = (INT32) wsptr[5];
3934
    tmp2 = (INT32) wsptr[3];
3935
    tmp3 = (INT32) wsptr[1];
3936
3937
    z2 = tmp0 + tmp2;
3938
    z3 = tmp1 + tmp3;
3939
3940
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
3941
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
3942
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
3943
    z2 += z1;
3944
    z3 += z1;
3945
3946
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3947
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
3948
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
3949
    tmp0 += z1 + z2;
3950
    tmp3 += z1 + z3;
3951
3952
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3953
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
3954
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
3955
    tmp1 += z1 + z3;
3956
    tmp2 += z1 + z2;
3957
3958
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3959
3960
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3961
                CONST_BITS+PASS2_BITS)
3962
          & RANGE_MASK];
3963
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3964
                CONST_BITS+PASS2_BITS)
3965
          & RANGE_MASK];
3966
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3967
                CONST_BITS+PASS2_BITS)
3968
          & RANGE_MASK];
3969
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3970
                CONST_BITS+PASS2_BITS)
3971
          & RANGE_MASK];
3972
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3973
                CONST_BITS+PASS2_BITS)
3974
          & RANGE_MASK];
3975
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3976
                CONST_BITS+PASS2_BITS)
3977
          & RANGE_MASK];
3978
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3979
                CONST_BITS+PASS2_BITS)
3980
          & RANGE_MASK];
3981
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3982
                CONST_BITS+PASS2_BITS)
3983
          & RANGE_MASK];
3984
3985
    wsptr += DCTSIZE;   /* advance pointer to next row */
3986
  }
3987
}
3988
3989
3990
/*
3991
 * Perform dequantization and inverse DCT on one block of coefficients,
3992
 * producing a 6x3 output block.
3993
 *
3994
 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3995
 */
3996
3997
GLOBAL(void)
3998
jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3999
         JCOEFPTR coef_block,
4000
         JSAMPARRAY output_buf, JDIMENSION output_col)
4001
{
4002
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
4003
  INT32 z1, z2, z3;
4004
  JCOEFPTR inptr;
4005
  ISLOW_MULT_TYPE * quantptr;
4006
  int * wsptr;
4007
  JSAMPROW outptr;
4008
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4009
  int ctr;
4010
  int workspace[6*3]; /* buffers data between passes */
4011
  SHIFT_TEMPS
4012
4013
  /* Pass 1: process columns from input, store into work array.
4014
   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4015
   */
4016
4017
  inptr = coef_block;
4018
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4019
  wsptr = workspace;
4020
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4021
    /* Even part */
4022
4023
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4024
    tmp0 <<= CONST_BITS;
4025
    /* Add fudge factor here for final descale. */
4026
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4027
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4028
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
4029
    tmp10 = tmp0 + tmp12;
4030
    tmp2 = tmp0 - tmp12 - tmp12;
4031
4032
    /* Odd part */
4033
4034
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4035
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
4036
4037
    /* Final output stage */
4038
4039
    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
4040
    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
4041
    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
4042
  }
4043
  
4044
  /* Pass 2: process 3 rows from work array, store into output array.
4045
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4046
   */
4047
4048
  wsptr = workspace;
4049
  for (ctr = 0; ctr < 3; ctr++) {
4050
    outptr = output_buf[ctr] + output_col;
4051
4052
    /* Even part */
4053
4054
    /* Add range center and fudge factor for final descale and range-limit. */
4055
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
4056
    tmp0 <<= CONST_BITS;
4057
#if PASS2_BITS == 0
4058
    tmp0 += ONE << (CONST_BITS-1);
4059
#endif
4060
    tmp2 = (INT32) wsptr[4];
4061
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
4062
    tmp1 = tmp0 + tmp10;
4063
    tmp11 = tmp0 - tmp10 - tmp10;
4064
    tmp10 = (INT32) wsptr[2];
4065
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
4066
    tmp10 = tmp1 + tmp0;
4067
    tmp12 = tmp1 - tmp0;
4068
4069
    /* Odd part */
4070
4071
    z1 = (INT32) wsptr[1];
4072
    z2 = (INT32) wsptr[3];
4073
    z3 = (INT32) wsptr[5];
4074
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4075
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
4076
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
4077
    tmp1 = (z1 - z2 - z3) << CONST_BITS;
4078
4079
    /* Final output stage */
4080
4081
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4082
                CONST_BITS+PASS2_BITS)
4083
          & RANGE_MASK];
4084
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4085
                CONST_BITS+PASS2_BITS)
4086
          & RANGE_MASK];
4087
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
4088
                CONST_BITS+PASS2_BITS)
4089
          & RANGE_MASK];
4090
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
4091
                CONST_BITS+PASS2_BITS)
4092
          & RANGE_MASK];
4093
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4094
                CONST_BITS+PASS2_BITS)
4095
          & RANGE_MASK];
4096
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4097
                CONST_BITS+PASS2_BITS)
4098
          & RANGE_MASK];
4099
4100
    wsptr += 6;   /* advance pointer to next row */
4101
  }
4102
}
4103
4104
4105
/*
4106
 * Perform dequantization and inverse DCT on one block of coefficients,
4107
 * producing a 4x2 output block.
4108
 *
4109
 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4110
 */
4111
4112
GLOBAL(void)
4113
jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4114
         JCOEFPTR coef_block,
4115
         JSAMPARRAY output_buf, JDIMENSION output_col)
4116
{
4117
  INT32 tmp0, tmp2, tmp10, tmp12;
4118
  INT32 z1, z2, z3;
4119
  JCOEFPTR inptr;
4120
  ISLOW_MULT_TYPE * quantptr;
4121
  INT32 * wsptr;
4122
  JSAMPROW outptr;
4123
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4124
  int ctr;
4125
  INT32 workspace[4*2]; /* buffers data between passes */
4126
  SHIFT_TEMPS
4127
4128
  /* Pass 1: process columns from input, store into work array. */
4129
4130
  inptr = coef_block;
4131
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4132
  wsptr = workspace;
4133
  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
4134
    /* Even part */
4135
4136
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4137
4138
    /* Odd part */
4139
4140
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4141
4142
    /* Final output stage */
4143
4144
    wsptr[4*0] = tmp10 + tmp0;
4145
    wsptr[4*1] = tmp10 - tmp0;
4146
  }
4147
4148
  /* Pass 2: process 2 rows from work array, store into output array.
4149
   * 4-point IDCT kernel,
4150
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4151
   */
4152
4153
  wsptr = workspace;
4154
  for (ctr = 0; ctr < 2; ctr++) {
4155
    outptr = output_buf[ctr] + output_col;
4156
4157
    /* Even part */
4158
4159
    tmp0 = wsptr[0];
4160
    tmp2 = wsptr[2];
4161
4162
    /* Add range center and fudge factor for final descale and range-limit. */
4163
#if PASS2_BITS > PASS1_BITS
4164
#if PASS2_BITS > PASS1_BITS + 1
4165
    tmp0 += (((INT32) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4166
      (ONE << (PASS2_BITS-PASS1_BITS-1));
4167
#else
4168
    tmp0 += (((INT32) RANGE_CENTER) << 1) + ONE;
4169
#endif
4170
    tmp0 <<= CONST_BITS;
4171
#else
4172
#if PASS2_BITS == PASS1_BITS
4173
    tmp0 += (INT32) RANGE_CENTER;
4174
    tmp0 <<= CONST_BITS;
4175
    tmp0 += ONE << (CONST_BITS-1);
4176
#else
4177
    tmp0 <<= CONST_BITS;
4178
    tmp0 += (((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
4179
      (ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1));
4180
#endif
4181
#endif
4182
4183
    tmp2 <<= CONST_BITS;
4184
4185
    tmp10 = tmp0 + tmp2;
4186
    tmp12 = tmp0 - tmp2;
4187
4188
    /* Odd part */
4189
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4190
4191
    z2 = wsptr[1];
4192
    z3 = wsptr[3];
4193
4194
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
4195
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4196
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4197
4198
    /* Final output stage */
4199
4200
    outptr[0] =
4201
      range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4202
            CONST_BITS+PASS2_BITS-PASS1_BITS)
4203
      & RANGE_MASK];
4204
    outptr[3] =
4205
      range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4206
            CONST_BITS+PASS2_BITS-PASS1_BITS)
4207
      & RANGE_MASK];
4208
    outptr[1] =
4209
      range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4210
            CONST_BITS+PASS2_BITS-PASS1_BITS)
4211
      & RANGE_MASK];
4212
    outptr[2] =
4213
      range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4214
            CONST_BITS+PASS2_BITS-PASS1_BITS)
4215
      & RANGE_MASK];
4216
4217
    wsptr += 4;   /* advance pointer to next row */
4218
  }
4219
}
4220
4221
4222
/*
4223
 * Perform dequantization and inverse DCT on one block of coefficients,
4224
 * producing a 2x1 output block.
4225
 *
4226
 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4227
 */
4228
4229
GLOBAL(void)
4230
jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4231
         JCOEFPTR coef_block,
4232
         JSAMPARRAY output_buf, JDIMENSION output_col)
4233
{
4234
  DCTELEM tmp0, tmp1;
4235
  ISLOW_MULT_TYPE * quantptr;
4236
  JSAMPROW outptr;
4237
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4238
  ISHIFT_TEMPS
4239
4240
  /* Pass 1: empty. */
4241
4242
  /* Pass 2: process 1 row from input, store into output array. */
4243
4244
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4245
  outptr = output_buf[0] + output_col;
4246
4247
  /* Even part */
4248
4249
  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4250
4251
  /* Odd part */
4252
4253
  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4254
4255
  /* Final output stage */
4256
4257
#if PASS2_BITS > PASS1_BITS
4258
  /* Add range center and fudge factor for downscale and range-limit. */
4259
#if PASS2_BITS > PASS1_BITS + 1
4260
  tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4261
    (1 << (PASS2_BITS-PASS1_BITS-1));
4262
#else
4263
  tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
4264
#endif
4265
4266
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
4267
               PASS2_BITS-PASS1_BITS)
4268
        & RANGE_MASK];
4269
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
4270
               PASS2_BITS-PASS1_BITS)
4271
        & RANGE_MASK];
4272
#else
4273
#if PASS2_BITS < PASS1_BITS
4274
  tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4275
  tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4276
#endif
4277
4278
  tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
4279
4280
  outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
4281
  outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
4282
#endif
4283
}
4284
4285
4286
/*
4287
 * Perform dequantization and inverse DCT on one block of coefficients,
4288
 * producing an 8x16 output block.
4289
 *
4290
 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4291
 */
4292
4293
GLOBAL(void)
4294
jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4295
    JCOEFPTR coef_block,
4296
    JSAMPARRAY output_buf, JDIMENSION output_col)
4297
{
4298
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4299
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4300
  INT32 z1, z2, z3, z4;
4301
  JCOEFPTR inptr;
4302
  ISLOW_MULT_TYPE * quantptr;
4303
  int * wsptr;
4304
  JSAMPROW outptr;
4305
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4306
  int ctr;
4307
  int workspace[8*16];  /* buffers data between passes */
4308
  SHIFT_TEMPS
4309
4310
  /* Pass 1: process columns from input, store into work array.
4311
   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4312
   */
4313
4314
  inptr = coef_block;
4315
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4316
  wsptr = workspace;
4317
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4318
    /* Even part */
4319
4320
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4321
    tmp0 <<= CONST_BITS;
4322
    /* Add fudge factor here for final descale. */
4323
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4324
4325
    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4326
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
4327
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
4328
4329
    tmp10 = tmp0 + tmp1;
4330
    tmp11 = tmp0 - tmp1;
4331
    tmp12 = tmp0 + tmp2;
4332
    tmp13 = tmp0 - tmp2;
4333
4334
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4335
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4336
    z3 = z1 - z2;
4337
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
4338
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
4339
4340
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
4341
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
4342
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4343
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4344
4345
    tmp20 = tmp10 + tmp0;
4346
    tmp27 = tmp10 - tmp0;
4347
    tmp21 = tmp12 + tmp1;
4348
    tmp26 = tmp12 - tmp1;
4349
    tmp22 = tmp13 + tmp2;
4350
    tmp25 = tmp13 - tmp2;
4351
    tmp23 = tmp11 + tmp3;
4352
    tmp24 = tmp11 - tmp3;
4353
4354
    /* Odd part */
4355
4356
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4357
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4358
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4359
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4360
4361
    tmp11 = z1 + z3;
4362
4363
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
4364
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
4365
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
4366
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
4367
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
4368
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
4369
    tmp0  = tmp1 + tmp2 + tmp3 -
4370
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
4371
    tmp13 = tmp10 + tmp11 + tmp12 -
4372
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
4373
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
4374
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
4375
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
4376
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
4377
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
4378
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
4379
    z2    += z4;
4380
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
4381
    tmp1  += z1;
4382
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
4383
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
4384
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
4385
    tmp12 += z2;
4386
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4387
    tmp2  += z2;
4388
    tmp3  += z2;
4389
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
4390
    tmp10 += z2;
4391
    tmp11 += z2;
4392
4393
    /* Final output stage */
4394
4395
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
4396
    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
4397
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
4398
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
4399
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
4400
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
4401
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
4402
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
4403
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4404
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4405
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4406
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4407
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4408
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4409
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4410
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4411
  }
4412
4413
  /* Pass 2: process rows from work array, store into output array.
4414
   * Note that we must descale the results by a factor of 8 == 2**3,
4415
   * which is folded into the PASS2_BITS value.
4416
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4417
   */
4418
4419
  wsptr = workspace;
4420
  for (ctr = 0; ctr < 16; ctr++) {
4421
    outptr = output_buf[ctr] + output_col;
4422
4423
    /* Even part: reverse the even part of the forward DCT.
4424
     * The rotator is c(-6).
4425
     */
4426
4427
    /* Add range center and fudge factor for final descale and range-limit. */
4428
    z2 = (INT32) wsptr[0] + PASS2_OFFSET;
4429
    z3 = (INT32) wsptr[4];
4430
    z2 <<= CONST_BITS;
4431
    z3 <<= CONST_BITS;
4432
#if PASS2_BITS == 0
4433
    /* Add fudge factor here for final descale. */
4434
    z2 += ONE << (CONST_BITS-1);
4435
#endif
4436
4437
    tmp0 = z2 + z3;
4438
    tmp1 = z2 - z3;
4439
4440
    z2 = (INT32) wsptr[2];
4441
    z3 = (INT32) wsptr[6];
4442
4443
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
4444
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
4445
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
4446
4447
    tmp10 = tmp0 + tmp2;
4448
    tmp13 = tmp0 - tmp2;
4449
    tmp11 = tmp1 + tmp3;
4450
    tmp12 = tmp1 - tmp3;
4451
4452
    /* Odd part per figure 8; the matrix is unitary and hence its
4453
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
4454
     */
4455
4456
    tmp0 = (INT32) wsptr[7];
4457
    tmp1 = (INT32) wsptr[5];
4458
    tmp2 = (INT32) wsptr[3];
4459
    tmp3 = (INT32) wsptr[1];
4460
4461
    z2 = tmp0 + tmp2;
4462
    z3 = tmp1 + tmp3;
4463
4464
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
4465
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
4466
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
4467
    z2 += z1;
4468
    z3 += z1;
4469
4470
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4471
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
4472
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
4473
    tmp0 += z1 + z2;
4474
    tmp3 += z1 + z3;
4475
4476
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4477
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
4478
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
4479
    tmp1 += z1 + z3;
4480
    tmp2 += z1 + z2;
4481
4482
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4483
4484
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4485
                CONST_BITS+PASS2_BITS)
4486
          & RANGE_MASK];
4487
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4488
                CONST_BITS+PASS2_BITS)
4489
          & RANGE_MASK];
4490
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4491
                CONST_BITS+PASS2_BITS)
4492
          & RANGE_MASK];
4493
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4494
                CONST_BITS+PASS2_BITS)
4495
          & RANGE_MASK];
4496
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4497
                CONST_BITS+PASS2_BITS)
4498
          & RANGE_MASK];
4499
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4500
                CONST_BITS+PASS2_BITS)
4501
          & RANGE_MASK];
4502
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4503
                CONST_BITS+PASS2_BITS)
4504
          & RANGE_MASK];
4505
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4506
                CONST_BITS+PASS2_BITS)
4507
          & RANGE_MASK];
4508
4509
    wsptr += DCTSIZE;   /* advance pointer to next row */
4510
  }
4511
}
4512
4513
4514
/*
4515
 * Perform dequantization and inverse DCT on one block of coefficients,
4516
 * producing a 7x14 output block.
4517
 *
4518
 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4519
 */
4520
4521
GLOBAL(void)
4522
jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4523
    JCOEFPTR coef_block,
4524
    JSAMPARRAY output_buf, JDIMENSION output_col)
4525
{
4526
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4527
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4528
  INT32 z1, z2, z3, z4;
4529
  JCOEFPTR inptr;
4530
  ISLOW_MULT_TYPE * quantptr;
4531
  int * wsptr;
4532
  JSAMPROW outptr;
4533
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4534
  int ctr;
4535
  int workspace[7*14];  /* buffers data between passes */
4536
  SHIFT_TEMPS
4537
4538
  /* Pass 1: process columns from input, store into work array.
4539
   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4540
   */
4541
4542
  inptr = coef_block;
4543
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4544
  wsptr = workspace;
4545
  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4546
    /* Even part */
4547
4548
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4549
    z1 <<= CONST_BITS;
4550
    /* Add fudge factor here for final descale. */
4551
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4552
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4553
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
4554
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
4555
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
4556
4557
    tmp10 = z1 + z2;
4558
    tmp11 = z1 + z3;
4559
    tmp12 = z1 - z4;
4560
4561
    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4562
      CONST_BITS-PASS1_BITS);
4563
4564
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4565
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4566
4567
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
4568
4569
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4570
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4571
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
4572
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
4573
4574
    tmp20 = tmp10 + tmp13;
4575
    tmp26 = tmp10 - tmp13;
4576
    tmp21 = tmp11 + tmp14;
4577
    tmp25 = tmp11 - tmp14;
4578
    tmp22 = tmp12 + tmp15;
4579
    tmp24 = tmp12 - tmp15;
4580
4581
    /* Odd part */
4582
4583
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4584
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4585
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4586
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4587
    tmp13 = z4 << CONST_BITS;
4588
4589
    tmp14 = z1 + z3;
4590
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
4591
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
4592
    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4593
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
4594
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
4595
    z1    -= z2;
4596
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
4597
    tmp16 += tmp15;
4598
    z1    += z4;
4599
    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4600
    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
4601
    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
4602
    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
4603
    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4604
    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
4605
4606
#if PASS1_BITS > 0
4607
    tmp13 = (z1 - z3) << PASS1_BITS;
4608
#else
4609
    tmp13 = z1 - z3;
4610
#endif
4611
4612
    /* Final output stage */
4613
4614
    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4615
    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4616
    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4617
    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4618
    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4619
    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4620
    wsptr[7*3]  = (int) (tmp23 + tmp13);
4621
    wsptr[7*10] = (int) (tmp23 - tmp13);
4622
    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4623
    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4624
    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4625
    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4626
    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4627
    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4628
  }
4629
4630
  /* Pass 2: process 14 rows from work array, store into output array.
4631
   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4632
   */
4633
4634
  wsptr = workspace;
4635
  for (ctr = 0; ctr < 14; ctr++) {
4636
    outptr = output_buf[ctr] + output_col;
4637
4638
    /* Even part */
4639
4640
    /* Add range center and fudge factor for final descale and range-limit. */
4641
    tmp23 = (INT32) wsptr[0] + PASS2_OFFSET;
4642
    tmp23 <<= CONST_BITS;
4643
#if PASS2_BITS == 0
4644
    tmp23 += ONE << (CONST_BITS-1);
4645
#endif
4646
4647
    z1 = (INT32) wsptr[2];
4648
    z2 = (INT32) wsptr[4];
4649
    z3 = (INT32) wsptr[6];
4650
4651
    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
4652
    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
4653
    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4654
    tmp10 = z1 + z3;
4655
    z2 -= tmp10;
4656
    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4657
    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
4658
    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
4659
    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
4660
4661
    /* Odd part */
4662
4663
    z1 = (INT32) wsptr[1];
4664
    z2 = (INT32) wsptr[3];
4665
    z3 = (INT32) wsptr[5];
4666
4667
    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
4668
    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
4669
    tmp10 = tmp11 - tmp12;
4670
    tmp11 += tmp12;
4671
    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
4672
    tmp11 += tmp12;
4673
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
4674
    tmp10 += z2;
4675
    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
4676
4677
    /* Final output stage */
4678
4679
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4680
                CONST_BITS+PASS2_BITS)
4681
          & RANGE_MASK];
4682
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4683
                CONST_BITS+PASS2_BITS)
4684
          & RANGE_MASK];
4685
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4686
                CONST_BITS+PASS2_BITS)
4687
          & RANGE_MASK];
4688
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4689
                CONST_BITS+PASS2_BITS)
4690
          & RANGE_MASK];
4691
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4692
                CONST_BITS+PASS2_BITS)
4693
          & RANGE_MASK];
4694
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4695
                CONST_BITS+PASS2_BITS)
4696
          & RANGE_MASK];
4697
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4698
                CONST_BITS+PASS2_BITS)
4699
          & RANGE_MASK];
4700
4701
    wsptr += 7;   /* advance pointer to next row */
4702
  }
4703
}
4704
4705
4706
/*
4707
 * Perform dequantization and inverse DCT on one block of coefficients,
4708
 * producing a 6x12 output block.
4709
 *
4710
 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4711
 */
4712
4713
GLOBAL(void)
4714
jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4715
    JCOEFPTR coef_block,
4716
    JSAMPARRAY output_buf, JDIMENSION output_col)
4717
{
4718
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4719
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4720
  INT32 z1, z2, z3, z4;
4721
  JCOEFPTR inptr;
4722
  ISLOW_MULT_TYPE * quantptr;
4723
  int * wsptr;
4724
  JSAMPROW outptr;
4725
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4726
  int ctr;
4727
  int workspace[6*12];  /* buffers data between passes */
4728
  SHIFT_TEMPS
4729
4730
  /* Pass 1: process columns from input, store into work array.
4731
   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4732
   */
4733
4734
  inptr = coef_block;
4735
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4736
  wsptr = workspace;
4737
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4738
    /* Even part */
4739
4740
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4741
    z3 <<= CONST_BITS;
4742
    /* Add fudge factor here for final descale. */
4743
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4744
4745
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4746
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4747
4748
    tmp10 = z3 + z4;
4749
    tmp11 = z3 - z4;
4750
4751
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4752
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4753
    z1 <<= CONST_BITS;
4754
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4755
    z2 <<= CONST_BITS;
4756
4757
    tmp12 = z1 - z2;
4758
4759
    tmp21 = z3 + tmp12;
4760
    tmp24 = z3 - tmp12;
4761
4762
    tmp12 = z4 + z2;
4763
4764
    tmp20 = tmp10 + tmp12;
4765
    tmp25 = tmp10 - tmp12;
4766
4767
    tmp12 = z4 - z1 - z2;
4768
4769
    tmp22 = tmp11 + tmp12;
4770
    tmp23 = tmp11 - tmp12;
4771
4772
    /* Odd part */
4773
4774
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4775
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4776
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4777
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4778
4779
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
4780
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
4781
4782
    tmp10 = z1 + z3;
4783
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
4784
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
4785
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
4786
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
4787
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4788
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4789
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
4790
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
4791
4792
    z1 -= z4;
4793
    z2 -= z3;
4794
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
4795
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
4796
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
4797
4798
    /* Final output stage */
4799
4800
    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4801
    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4802
    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4803
    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4804
    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4805
    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4806
    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4807
    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4808
    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4809
    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4810
    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4811
    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4812
  }
4813
4814
  /* Pass 2: process 12 rows from work array, store into output array.
4815
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4816
   */
4817
4818
  wsptr = workspace;
4819
  for (ctr = 0; ctr < 12; ctr++) {
4820
    outptr = output_buf[ctr] + output_col;
4821
4822
    /* Even part */
4823
4824
    /* Add range center and fudge factor for final descale and range-limit. */
4825
    tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
4826
    tmp10 <<= CONST_BITS;
4827
#if PASS2_BITS == 0
4828
    tmp10 += ONE << (CONST_BITS-1);
4829
#endif
4830
    tmp12 = (INT32) wsptr[4];
4831
    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
4832
    tmp11 = tmp10 + tmp20;
4833
    tmp21 = tmp10 - tmp20 - tmp20;
4834
    tmp20 = (INT32) wsptr[2];
4835
    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
4836
    tmp20 = tmp11 + tmp10;
4837
    tmp22 = tmp11 - tmp10;
4838
4839
    /* Odd part */
4840
4841
    z1 = (INT32) wsptr[1];
4842
    z2 = (INT32) wsptr[3];
4843
    z3 = (INT32) wsptr[5];
4844
    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4845
    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4846
    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4847
    tmp11 = (z1 - z2 - z3) << CONST_BITS;
4848
4849
    /* Final output stage */
4850
4851
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4852
                CONST_BITS+PASS2_BITS)
4853
          & RANGE_MASK];
4854
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4855
                CONST_BITS+PASS2_BITS)
4856
          & RANGE_MASK];
4857
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4858
                CONST_BITS+PASS2_BITS)
4859
          & RANGE_MASK];
4860
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4861
                CONST_BITS+PASS2_BITS)
4862
          & RANGE_MASK];
4863
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4864
                CONST_BITS+PASS2_BITS)
4865
          & RANGE_MASK];
4866
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4867
                CONST_BITS+PASS2_BITS)
4868
          & RANGE_MASK];
4869
4870
    wsptr += 6;   /* advance pointer to next row */
4871
  }
4872
}
4873
4874
4875
/*
4876
 * Perform dequantization and inverse DCT on one block of coefficients,
4877
 * producing a 5x10 output block.
4878
 *
4879
 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4880
 */
4881
4882
GLOBAL(void)
4883
jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4884
    JCOEFPTR coef_block,
4885
    JSAMPARRAY output_buf, JDIMENSION output_col)
4886
{
4887
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4888
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4889
  INT32 z1, z2, z3, z4, z5;
4890
  JCOEFPTR inptr;
4891
  ISLOW_MULT_TYPE * quantptr;
4892
  int * wsptr;
4893
  JSAMPROW outptr;
4894
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4895
  int ctr;
4896
  int workspace[5*10];  /* buffers data between passes */
4897
  SHIFT_TEMPS
4898
4899
  /* Pass 1: process columns from input, store into work array.
4900
   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4901
   */
4902
4903
  inptr = coef_block;
4904
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4905
  wsptr = workspace;
4906
  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4907
    /* Even part */
4908
4909
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4910
    z3 <<= CONST_BITS;
4911
    /* Add fudge factor here for final descale. */
4912
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4913
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4914
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
4915
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
4916
    tmp10 = z3 + z1;
4917
    tmp11 = z3 - z2;
4918
4919
    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
4920
      CONST_BITS-PASS1_BITS);
4921
4922
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4923
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4924
4925
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
4926
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4927
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4928
4929
    tmp20 = tmp10 + tmp12;
4930
    tmp24 = tmp10 - tmp12;
4931
    tmp21 = tmp11 + tmp13;
4932
    tmp23 = tmp11 - tmp13;
4933
4934
    /* Odd part */
4935
4936
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4937
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4938
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4939
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4940
4941
    tmp11 = z2 + z4;
4942
    tmp13 = z2 - z4;
4943
4944
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
4945
    z5 = z3 << CONST_BITS;
4946
4947
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
4948
    z4 = z5 + tmp12;
4949
4950
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4951
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4952
4953
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
4954
    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4955
4956
#if PASS1_BITS > 0
4957
    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4958
#else
4959
    tmp12 = z1 - tmp13 - z3;
4960
#endif
4961
4962
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4963
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4964
4965
    /* Final output stage */
4966
4967
    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4968
    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4969
    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4970
    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4971
    wsptr[5*2] = (int) (tmp22 + tmp12);
4972
    wsptr[5*7] = (int) (tmp22 - tmp12);
4973
    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4974
    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4975
    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4976
    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4977
  }
4978
4979
  /* Pass 2: process 10 rows from work array, store into output array.
4980
   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4981
   */
4982
4983
  wsptr = workspace;
4984
  for (ctr = 0; ctr < 10; ctr++) {
4985
    outptr = output_buf[ctr] + output_col;
4986
4987
    /* Even part */
4988
4989
    /* Add range center and fudge factor for final descale and range-limit. */
4990
    tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
4991
    tmp12 <<= CONST_BITS;
4992
#if PASS2_BITS == 0
4993
    tmp12 += ONE << (CONST_BITS-1);
4994
#endif
4995
    tmp13 = (INT32) wsptr[2];
4996
    tmp14 = (INT32) wsptr[4];
4997
    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4998
    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4999
    z3 = tmp12 + z2;
5000
    tmp10 = z3 + z1;
5001
    tmp11 = z3 - z1;
5002
    tmp12 -= z2 << 2;
5003
5004
    /* Odd part */
5005
5006
    z2 = (INT32) wsptr[1];
5007
    z3 = (INT32) wsptr[3];
5008
5009
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
5010
    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
5011
    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
5012
5013
    /* Final output stage */
5014
5015
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
5016
                CONST_BITS+PASS2_BITS)
5017
          & RANGE_MASK];
5018
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
5019
                CONST_BITS+PASS2_BITS)
5020
          & RANGE_MASK];
5021
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
5022
                CONST_BITS+PASS2_BITS)
5023
          & RANGE_MASK];
5024
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
5025
                CONST_BITS+PASS2_BITS)
5026
          & RANGE_MASK];
5027
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
5028
                CONST_BITS+PASS2_BITS)
5029
          & RANGE_MASK];
5030
5031
    wsptr += 5;   /* advance pointer to next row */
5032
  }
5033
}
5034
5035
5036
/*
5037
 * Perform dequantization and inverse DCT on one block of coefficients,
5038
 * producing a 4x8 output block.
5039
 *
5040
 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
5041
 */
5042
5043
GLOBAL(void)
5044
jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5045
         JCOEFPTR coef_block,
5046
         JSAMPARRAY output_buf, JDIMENSION output_col)
5047
{
5048
  INT32 tmp0, tmp1, tmp2, tmp3;
5049
  INT32 tmp10, tmp11, tmp12, tmp13;
5050
  INT32 z1, z2, z3;
5051
  JCOEFPTR inptr;
5052
  ISLOW_MULT_TYPE * quantptr;
5053
  int * wsptr;
5054
  JSAMPROW outptr;
5055
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5056
  int ctr;
5057
  int workspace[4*8]; /* buffers data between passes */
5058
  SHIFT_TEMPS
5059
5060
  /* Pass 1: process columns from input, store into work array.
5061
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
5062
   * furthermore, we scale the results by 2**PASS1_BITS.
5063
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
5064
   */
5065
5066
  inptr = coef_block;
5067
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5068
  wsptr = workspace;
5069
  for (ctr = 4; ctr > 0; ctr--) {
5070
    /* Due to quantization, we will usually find that many of the input
5071
     * coefficients are zero, especially the AC terms.  We can exploit this
5072
     * by short-circuiting the IDCT calculation for any column in which all
5073
     * the AC terms are zero.  In that case each output is equal to the
5074
     * DC coefficient (with scale factor as needed).
5075
     * With typical images and quantization tables, half or more of the
5076
     * column DCT calculations can be simplified this way.
5077
     */
5078
5079
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
5080
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
5081
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
5082
  inptr[DCTSIZE*7] == 0) {
5083
      /* AC terms all zero */
5084
#if PASS1_BITS > 0
5085
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
5086
#else
5087
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5088
#endif
5089
5090
      wsptr[4*0] = dcval;
5091
      wsptr[4*1] = dcval;
5092
      wsptr[4*2] = dcval;
5093
      wsptr[4*3] = dcval;
5094
      wsptr[4*4] = dcval;
5095
      wsptr[4*5] = dcval;
5096
      wsptr[4*6] = dcval;
5097
      wsptr[4*7] = dcval;
5098
5099
      inptr++;      /* advance pointers to next column */
5100
      quantptr++;
5101
      wsptr++;
5102
      continue;
5103
    }
5104
5105
    /* Even part: reverse the even part of the forward DCT.
5106
     * The rotator is c(-6).
5107
     */
5108
5109
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5110
    if (ctr == 4)
5111
      CLAMP_DC(z2);
5112
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5113
    z2 <<= CONST_BITS;
5114
    z3 <<= CONST_BITS;
5115
    /* Add fudge factor here for final descale. */
5116
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
5117
5118
    tmp0 = z2 + z3;
5119
    tmp1 = z2 - z3;
5120
5121
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5122
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
5123
5124
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
5125
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
5126
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
5127
5128
    tmp10 = tmp0 + tmp2;
5129
    tmp13 = tmp0 - tmp2;
5130
    tmp11 = tmp1 + tmp3;
5131
    tmp12 = tmp1 - tmp3;
5132
5133
    /* Odd part per figure 8; the matrix is unitary and hence its
5134
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
5135
     */
5136
5137
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
5138
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5139
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5140
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5141
5142
    z2 = tmp0 + tmp2;
5143
    z3 = tmp1 + tmp3;
5144
5145
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
5146
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
5147
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
5148
    z2 += z1;
5149
    z3 += z1;
5150
5151
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
5152
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
5153
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
5154
    tmp0 += z1 + z2;
5155
    tmp3 += z1 + z3;
5156
5157
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
5158
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
5159
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
5160
    tmp1 += z1 + z3;
5161
    tmp2 += z1 + z2;
5162
5163
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
5164
5165
    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
5166
    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
5167
    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
5168
    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
5169
    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
5170
    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
5171
    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
5172
    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
5173
5174
    inptr++;      /* advance pointers to next column */
5175
    quantptr++;
5176
    wsptr++;
5177
  }
5178
5179
  /* Pass 2: process 8 rows from work array, store into output array.
5180
   * 4-point IDCT kernel,
5181
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5182
   */
5183
5184
  wsptr = workspace;
5185
  for (ctr = 0; ctr < 8; ctr++) {
5186
    outptr = output_buf[ctr] + output_col;
5187
5188
    /* Even part */
5189
5190
    /* Add range center and fudge factor for final descale and range-limit. */
5191
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5192
    tmp2 = (INT32) wsptr[2];
5193
    tmp0 <<= CONST_BITS;
5194
    tmp2 <<= CONST_BITS;
5195
#if PASS2_BITS == 0
5196
    tmp0 += ONE << (CONST_BITS-1);
5197
#endif
5198
5199
    tmp10 = tmp0 + tmp2;
5200
    tmp12 = tmp0 - tmp2;
5201
5202
    /* Odd part */
5203
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5204
5205
    z2 = (INT32) wsptr[1];
5206
    z3 = (INT32) wsptr[3];
5207
5208
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
5209
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5210
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5211
5212
    /* Final output stage */
5213
5214
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5215
                CONST_BITS+PASS2_BITS)
5216
          & RANGE_MASK];
5217
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5218
                CONST_BITS+PASS2_BITS)
5219
          & RANGE_MASK];
5220
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
5221
                CONST_BITS+PASS2_BITS)
5222
          & RANGE_MASK];
5223
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
5224
                CONST_BITS+PASS2_BITS)
5225
          & RANGE_MASK];
5226
5227
    wsptr += 4;   /* advance pointer to next row */
5228
  }
5229
}
5230
5231
5232
/*
5233
 * Perform dequantization and inverse DCT on one block of coefficients,
5234
 * producing a 3x6 output block.
5235
 *
5236
 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5237
 */
5238
5239
GLOBAL(void)
5240
jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5241
         JCOEFPTR coef_block,
5242
         JSAMPARRAY output_buf, JDIMENSION output_col)
5243
{
5244
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5245
  INT32 z1, z2, z3;
5246
  JCOEFPTR inptr;
5247
  ISLOW_MULT_TYPE * quantptr;
5248
  int * wsptr;
5249
  JSAMPROW outptr;
5250
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5251
  int ctr;
5252
  int workspace[3*6]; /* buffers data between passes */
5253
  SHIFT_TEMPS
5254
5255
  /* Pass 1: process columns from input, store into work array.
5256
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5257
   */
5258
5259
  inptr = coef_block;
5260
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5261
  wsptr = workspace;
5262
  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5263
    /* Even part */
5264
5265
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5266
    tmp0 <<= CONST_BITS;
5267
    /* Add fudge factor here for final descale. */
5268
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5269
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5270
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
5271
    tmp1 = tmp0 + tmp10;
5272
    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5273
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5274
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
5275
    tmp10 = tmp1 + tmp0;
5276
    tmp12 = tmp1 - tmp0;
5277
5278
    /* Odd part */
5279
5280
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5281
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5282
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5283
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5284
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5285
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5286
#if PASS1_BITS > 0
5287
    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5288
#else
5289
    tmp1 = z1 - z2 - z3;
5290
#endif
5291
5292
    /* Final output stage */
5293
5294
    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5295
    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5296
    wsptr[3*1] = (int) (tmp11 + tmp1);
5297
    wsptr[3*4] = (int) (tmp11 - tmp1);
5298
    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5299
    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5300
  }
5301
5302
  /* Pass 2: process 6 rows from work array, store into output array.
5303
   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5304
   */
5305
5306
  wsptr = workspace;
5307
  for (ctr = 0; ctr < 6; ctr++) {
5308
    outptr = output_buf[ctr] + output_col;
5309
5310
    /* Even part */
5311
5312
    /* Add range center and fudge factor for final descale and range-limit. */
5313
    tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5314
    tmp0 <<= CONST_BITS;
5315
#if PASS2_BITS == 0
5316
    tmp0 += ONE << (CONST_BITS-1);
5317
#endif
5318
    tmp2 = (INT32) wsptr[2];
5319
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5320
    tmp10 = tmp0 + tmp12;
5321
    tmp2 = tmp0 - tmp12 - tmp12;
5322
5323
    /* Odd part */
5324
5325
    tmp12 = (INT32) wsptr[1];
5326
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5327
5328
    /* Final output stage */
5329
5330
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5331
                CONST_BITS+PASS2_BITS)
5332
          & RANGE_MASK];
5333
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5334
                CONST_BITS+PASS2_BITS)
5335
          & RANGE_MASK];
5336
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5337
                CONST_BITS+PASS2_BITS)
5338
          & RANGE_MASK];
5339
5340
    wsptr += 3;   /* advance pointer to next row */
5341
  }
5342
}
5343
5344
5345
/*
5346
 * Perform dequantization and inverse DCT on one block of coefficients,
5347
 * producing a 2x4 output block.
5348
 *
5349
 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5350
 */
5351
5352
GLOBAL(void)
5353
jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5354
         JCOEFPTR coef_block,
5355
         JSAMPARRAY output_buf, JDIMENSION output_col)
5356
{
5357
  INT32 tmp0, tmp2, tmp10, tmp12;
5358
  INT32 z1, z2, z3;
5359
  JCOEFPTR inptr;
5360
  ISLOW_MULT_TYPE * quantptr;
5361
  INT32 * wsptr;
5362
  JSAMPROW outptr;
5363
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5364
  int ctr;
5365
  INT32 workspace[2*4]; /* buffers data between passes */
5366
  SHIFT_TEMPS
5367
5368
  /* Pass 1: process columns from input, store into work array.
5369
   * 4-point IDCT kernel,
5370
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5371
   */
5372
5373
  inptr = coef_block;
5374
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5375
  wsptr = workspace;
5376
  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5377
    /* Even part */
5378
5379
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5380
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5381
5382
    tmp10 = (tmp0 + tmp2) << CONST_BITS;
5383
    tmp12 = (tmp0 - tmp2) << CONST_BITS;
5384
5385
    /* Odd part */
5386
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5387
5388
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5389
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5390
5391
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
5392
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5393
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5394
5395
    /* Final output stage */
5396
5397
    wsptr[2*0] = tmp10 + tmp0;
5398
    wsptr[2*3] = tmp10 - tmp0;
5399
    wsptr[2*1] = tmp12 + tmp2;
5400
    wsptr[2*2] = tmp12 - tmp2;
5401
  }
5402
5403
  /* Pass 2: process 4 rows from work array, store into output array. */
5404
5405
  wsptr = workspace;
5406
  for (ctr = 0; ctr < 4; ctr++) {
5407
    outptr = output_buf[ctr] + output_col;
5408
5409
    /* Even part */
5410
5411
    /* Add range center and fudge factor for final descale and range-limit. */
5412
    tmp10 = wsptr[0] +
5413
      ((((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
5414
       (ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1)));
5415
5416
    /* Odd part */
5417
5418
    tmp0 = wsptr[1];
5419
5420
    /* Final output stage */
5421
5422
    outptr[0] =
5423
      range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5424
            CONST_BITS+PASS2_BITS-PASS1_BITS)
5425
      & RANGE_MASK];
5426
    outptr[1] =
5427
      range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5428
            CONST_BITS+PASS2_BITS-PASS1_BITS)
5429
      & RANGE_MASK];
5430
5431
    wsptr += 2;   /* advance pointer to next row */
5432
  }
5433
}
5434
5435
5436
/*
5437
 * Perform dequantization and inverse DCT on one block of coefficients,
5438
 * producing a 1x2 output block.
5439
 *
5440
 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5441
 */
5442
5443
GLOBAL(void)
5444
jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5445
         JCOEFPTR coef_block,
5446
         JSAMPARRAY output_buf, JDIMENSION output_col)
5447
{
5448
  DCTELEM tmp0, tmp1;
5449
  ISLOW_MULT_TYPE * quantptr;
5450
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5451
  ISHIFT_TEMPS
5452
5453
  /* Process 1 column from input, store into output array. */
5454
5455
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5456
5457
  /* Even part */
5458
5459
  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5460
5461
  /* Odd part */
5462
5463
  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5464
5465
  /* Final output stage */
5466
5467
#if PASS2_BITS > PASS1_BITS
5468
  /* Add range center and fudge factor for downscale and range-limit. */
5469
#if PASS2_BITS > PASS1_BITS + 1
5470
  tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
5471
    (1 << (PASS2_BITS-PASS1_BITS-1));
5472
#else
5473
  tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
5474
#endif
5475
5476
  output_buf[0][output_col] =
5477
    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, PASS2_BITS-PASS1_BITS)
5478
    & RANGE_MASK];
5479
  output_buf[1][output_col] =
5480
    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, PASS2_BITS-PASS1_BITS)
5481
    & RANGE_MASK];
5482
#else
5483
#if PASS2_BITS < PASS1_BITS
5484
  tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5485
  tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5486
#endif
5487
5488
  tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
5489
5490
  output_buf[0][output_col] =
5491
    range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
5492
  output_buf[1][output_col] =
5493
    range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
5494
#endif
5495
}
5496
5497
#endif /* IDCT_SCALING_SUPPORTED */
5498
#endif /* DCT_ISLOW_SUPPORTED */