Coverage Report

Created: 2023-12-08 06:53

/src/freeimage-svn/FreeImage/trunk/Source/LibJPEG/jidctint.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jidctint.c
3
 *
4
 * Copyright (C) 1991-1998, Thomas G. Lane.
5
 * Modification developed 2002-2018 by Guido Vollbeding.
6
 * This file is part of the Independent JPEG Group's software.
7
 * For conditions of distribution and use, see the accompanying README file.
8
 *
9
 * This file contains a slow-but-accurate integer implementation of the
10
 * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
11
 * must also perform dequantization of the input coefficients.
12
 *
13
 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14
 * on each row (or vice versa, but it's more convenient to emit a row at
15
 * a time).  Direct algorithms are also available, but they are much more
16
 * complex and seem not to be any faster when reduced to code.
17
 *
18
 * This implementation is based on an algorithm described in
19
 *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20
 *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21
 *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22
 * The primary algorithm described there uses 11 multiplies and 29 adds.
23
 * We use their alternate method with 12 multiplies and 32 adds.
24
 * The advantage of this method is that no data path contains more than one
25
 * multiplication; this allows a very simple and accurate implementation in
26
 * scaled fixed-point arithmetic, with a minimal number of shifts.
27
 *
28
 * We also provide IDCT routines with various output sample block sizes for
29
 * direct resolution reduction or enlargement and for direct resolving the
30
 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31
 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32
 *
33
 * For N<8 we simply take the corresponding low-frequency coefficients of
34
 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35
 * to yield the downscaled outputs.
36
 * This can be seen as direct low-pass downsampling from the DCT domain
37
 * point of view rather than the usual spatial domain point of view,
38
 * yielding significant computational savings and results at least
39
 * as good as common bilinear (averaging) spatial downsampling.
40
 *
41
 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42
 * lower frequencies and higher frequencies assumed to be zero.
43
 * It turns out that the computational effort is similar to the 8x8 IDCT
44
 * regarding the output size.
45
 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46
 *
47
 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48
 * since there would be too many additional constants to pre-calculate.
49
 */
50
51
#define JPEG_INTERNALS
52
#include "jinclude.h"
53
#include "jpeglib.h"
54
#include "jdct.h"   /* Private declarations for DCT subsystem */
55
56
#ifdef DCT_ISLOW_SUPPORTED
57
58
59
/*
60
 * This module is specialized to the case DCTSIZE = 8.
61
 */
62
63
#if DCTSIZE != 8
64
  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65
#endif
66
67
68
/*
69
 * The poop on this scaling stuff is as follows:
70
 *
71
 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72
 * larger than the true IDCT outputs.  The final outputs are therefore
73
 * a factor of N larger than desired; since N=8 this can be cured by
74
 * a simple right shift at the end of the algorithm.  The advantage of
75
 * this arrangement is that we save two multiplications per 1-D IDCT,
76
 * because the y0 and y4 inputs need not be divided by sqrt(N).
77
 *
78
 * We have to do addition and subtraction of the integer inputs, which
79
 * is no problem, and multiplication by fractional constants, which is
80
 * a problem to do in integer arithmetic.  We multiply all the constants
81
 * by CONST_SCALE and convert them to integer constants (thus retaining
82
 * CONST_BITS bits of precision in the constants).  After doing a
83
 * multiplication we have to divide the product by CONST_SCALE, with proper
84
 * rounding, to produce the correct output.  This division can be done
85
 * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
86
 * as long as possible so that partial sums can be added together with
87
 * full fractional precision.
88
 *
89
 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90
 * they are represented to better-than-integral precision.  These outputs
91
 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92
 * with the recommended scaling.  (To scale up 12-bit sample data further, an
93
 * intermediate INT32 array would be needed.)
94
 *
95
 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96
 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
97
 * shows that the values given below are the most effective.
98
 */
99
100
#if BITS_IN_JSAMPLE == 8
101
0
#define CONST_BITS  13
102
0
#define PASS1_BITS  2
103
#else
104
#define CONST_BITS  13
105
#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
106
#endif
107
108
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109
 * causing a lot of useless floating-point operations at run time.
110
 * To get around this we use the following pre-calculated constants.
111
 * If you change CONST_BITS you may want to add appropriate values.
112
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113
 */
114
115
#if CONST_BITS == 13
116
#define FIX_0_298631336  ((INT32)  2446)  /* FIX(0.298631336) */
117
#define FIX_0_390180644  ((INT32)  3196)  /* FIX(0.390180644) */
118
#define FIX_0_541196100  ((INT32)  4433)  /* FIX(0.541196100) */
119
#define FIX_0_765366865  ((INT32)  6270)  /* FIX(0.765366865) */
120
#define FIX_0_899976223  ((INT32)  7373)  /* FIX(0.899976223) */
121
#define FIX_1_175875602  ((INT32)  9633)  /* FIX(1.175875602) */
122
#define FIX_1_501321110  ((INT32)  12299) /* FIX(1.501321110) */
123
#define FIX_1_847759065  ((INT32)  15137) /* FIX(1.847759065) */
124
#define FIX_1_961570560  ((INT32)  16069) /* FIX(1.961570560) */
125
#define FIX_2_053119869  ((INT32)  16819) /* FIX(2.053119869) */
126
#define FIX_2_562915447  ((INT32)  20995) /* FIX(2.562915447) */
127
#define FIX_3_072711026  ((INT32)  25172) /* FIX(3.072711026) */
128
#else
129
#define FIX_0_298631336  FIX(0.298631336)
130
#define FIX_0_390180644  FIX(0.390180644)
131
#define FIX_0_541196100  FIX(0.541196100)
132
#define FIX_0_765366865  FIX(0.765366865)
133
#define FIX_0_899976223  FIX(0.899976223)
134
#define FIX_1_175875602  FIX(1.175875602)
135
#define FIX_1_501321110  FIX(1.501321110)
136
#define FIX_1_847759065  FIX(1.847759065)
137
#define FIX_1_961570560  FIX(1.961570560)
138
#define FIX_2_053119869  FIX(2.053119869)
139
#define FIX_2_562915447  FIX(2.562915447)
140
#define FIX_3_072711026  FIX(3.072711026)
141
#endif
142
143
144
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145
 * For 8-bit samples with the recommended scaling, all the variable
146
 * and constant values involved are no more than 16 bits wide, so a
147
 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148
 * For 12-bit samples, a full 32-bit multiplication will be needed.
149
 */
150
151
#if BITS_IN_JSAMPLE == 8
152
0
#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
153
#else
154
#define MULTIPLY(var,const)  ((var) * (const))
155
#endif
156
157
158
/* Dequantize a coefficient by multiplying it by the multiplier-table
159
 * entry; produce an int result.  In this module, both inputs and result
160
 * are 16 bits or less, so either int or short multiply will work.
161
 */
162
163
0
#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166
/*
167
 * Perform dequantization and inverse DCT on one block of coefficients.
168
 *
169
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
170
 * cK represents sqrt(2) * cos(K*pi/16).
171
 */
172
173
GLOBAL(void)
174
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
175
     JCOEFPTR coef_block,
176
     JSAMPARRAY output_buf, JDIMENSION output_col)
177
0
{
178
0
  INT32 tmp0, tmp1, tmp2, tmp3;
179
0
  INT32 tmp10, tmp11, tmp12, tmp13;
180
0
  INT32 z1, z2, z3;
181
0
  JCOEFPTR inptr;
182
0
  ISLOW_MULT_TYPE * quantptr;
183
0
  int * wsptr;
184
0
  JSAMPROW outptr;
185
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186
0
  int ctr;
187
0
  int workspace[DCTSIZE2];  /* buffers data between passes */
188
  SHIFT_TEMPS
189
190
  /* Pass 1: process columns from input, store into work array.
191
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
192
   * furthermore, we scale the results by 2**PASS1_BITS.
193
   */
194
195
0
  inptr = coef_block;
196
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197
0
  wsptr = workspace;
198
0
  for (ctr = DCTSIZE; ctr > 0; ctr--) {
199
    /* Due to quantization, we will usually find that many of the input
200
     * coefficients are zero, especially the AC terms.  We can exploit this
201
     * by short-circuiting the IDCT calculation for any column in which all
202
     * the AC terms are zero.  In that case each output is equal to the
203
     * DC coefficient (with scale factor as needed).
204
     * With typical images and quantization tables, half or more of the
205
     * column DCT calculations can be simplified this way.
206
     */
207
208
0
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209
0
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210
0
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211
0
  inptr[DCTSIZE*7] == 0) {
212
      /* AC terms all zero */
213
0
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
214
215
0
      wsptr[DCTSIZE*0] = dcval;
216
0
      wsptr[DCTSIZE*1] = dcval;
217
0
      wsptr[DCTSIZE*2] = dcval;
218
0
      wsptr[DCTSIZE*3] = dcval;
219
0
      wsptr[DCTSIZE*4] = dcval;
220
0
      wsptr[DCTSIZE*5] = dcval;
221
0
      wsptr[DCTSIZE*6] = dcval;
222
0
      wsptr[DCTSIZE*7] = dcval;
223
224
0
      inptr++;      /* advance pointers to next column */
225
0
      quantptr++;
226
0
      wsptr++;
227
0
      continue;
228
0
    }
229
230
    /* Even part: reverse the even part of the forward DCT.
231
     * The rotator is c(-6).
232
     */
233
234
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236
0
    z2 <<= CONST_BITS;
237
0
    z3 <<= CONST_BITS;
238
    /* Add fudge factor here for final descale. */
239
0
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
240
241
0
    tmp0 = z2 + z3;
242
0
    tmp1 = z2 - z3;
243
244
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
246
247
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
248
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
249
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
250
251
0
    tmp10 = tmp0 + tmp2;
252
0
    tmp13 = tmp0 - tmp2;
253
0
    tmp11 = tmp1 + tmp3;
254
0
    tmp12 = tmp1 - tmp3;
255
256
    /* Odd part per figure 8; the matrix is unitary and hence its
257
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
258
     */
259
260
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261
0
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263
0
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
264
265
0
    z2 = tmp0 + tmp2;
266
0
    z3 = tmp1 + tmp3;
267
268
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
269
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
270
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
271
0
    z2 += z1;
272
0
    z3 += z1;
273
274
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
276
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
277
0
    tmp0 += z1 + z2;
278
0
    tmp3 += z1 + z3;
279
280
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
282
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
283
0
    tmp1 += z1 + z3;
284
0
    tmp2 += z1 + z2;
285
286
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
287
288
0
    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289
0
    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290
0
    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291
0
    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292
0
    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293
0
    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294
0
    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295
0
    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
296
297
0
    inptr++;      /* advance pointers to next column */
298
0
    quantptr++;
299
0
    wsptr++;
300
0
  }
301
302
  /* Pass 2: process rows from work array, store into output array.
303
   * Note that we must descale the results by a factor of 8 == 2**3,
304
   * and also undo the PASS1_BITS scaling.
305
   */
306
307
0
  wsptr = workspace;
308
0
  for (ctr = 0; ctr < DCTSIZE; ctr++) {
309
0
    outptr = output_buf[ctr] + output_col;
310
311
    /* Add range center and fudge factor for final descale and range-limit. */
312
0
    z2 = (INT32) wsptr[0] +
313
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314
0
      (ONE << (PASS1_BITS+2)));
315
316
    /* Rows of zeroes can be exploited in the same way as we did with columns.
317
     * However, the column calculation has created many nonzero AC terms, so
318
     * the simplification applies less often (typically 5% to 10% of the time).
319
     * On machines with very fast multiplication, it's possible that the
320
     * test takes more time than it's worth.  In that case this section
321
     * may be commented out.
322
     */
323
324
0
#ifndef NO_ZERO_ROW_TEST
325
0
    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326
0
  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327
      /* AC terms all zero */
328
0
      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329
0
          & RANGE_MASK];
330
331
0
      outptr[0] = dcval;
332
0
      outptr[1] = dcval;
333
0
      outptr[2] = dcval;
334
0
      outptr[3] = dcval;
335
0
      outptr[4] = dcval;
336
0
      outptr[5] = dcval;
337
0
      outptr[6] = dcval;
338
0
      outptr[7] = dcval;
339
340
0
      wsptr += DCTSIZE;   /* advance pointer to next row */
341
0
      continue;
342
0
    }
343
0
#endif
344
345
    /* Even part: reverse the even part of the forward DCT.
346
     * The rotator is c(-6).
347
     */
348
349
0
    z3 = (INT32) wsptr[4];
350
351
0
    tmp0 = (z2 + z3) << CONST_BITS;
352
0
    tmp1 = (z2 - z3) << CONST_BITS;
353
354
0
    z2 = (INT32) wsptr[2];
355
0
    z3 = (INT32) wsptr[6];
356
357
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
358
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
359
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
360
361
0
    tmp10 = tmp0 + tmp2;
362
0
    tmp13 = tmp0 - tmp2;
363
0
    tmp11 = tmp1 + tmp3;
364
0
    tmp12 = tmp1 - tmp3;
365
366
    /* Odd part per figure 8; the matrix is unitary and hence its
367
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
368
     */
369
370
0
    tmp0 = (INT32) wsptr[7];
371
0
    tmp1 = (INT32) wsptr[5];
372
0
    tmp2 = (INT32) wsptr[3];
373
0
    tmp3 = (INT32) wsptr[1];
374
375
0
    z2 = tmp0 + tmp2;
376
0
    z3 = tmp1 + tmp3;
377
378
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
379
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
380
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
381
0
    z2 += z1;
382
0
    z3 += z1;
383
384
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
386
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
387
0
    tmp0 += z1 + z2;
388
0
    tmp3 += z1 + z3;
389
390
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
392
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
393
0
    tmp1 += z1 + z3;
394
0
    tmp2 += z1 + z2;
395
396
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
397
398
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399
0
                CONST_BITS+PASS1_BITS+3)
400
0
          & RANGE_MASK];
401
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402
0
                CONST_BITS+PASS1_BITS+3)
403
0
          & RANGE_MASK];
404
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405
0
                CONST_BITS+PASS1_BITS+3)
406
0
          & RANGE_MASK];
407
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408
0
                CONST_BITS+PASS1_BITS+3)
409
0
          & RANGE_MASK];
410
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411
0
                CONST_BITS+PASS1_BITS+3)
412
0
          & RANGE_MASK];
413
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414
0
                CONST_BITS+PASS1_BITS+3)
415
0
          & RANGE_MASK];
416
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417
0
                CONST_BITS+PASS1_BITS+3)
418
0
          & RANGE_MASK];
419
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420
0
                CONST_BITS+PASS1_BITS+3)
421
0
          & RANGE_MASK];
422
423
0
    wsptr += DCTSIZE;   /* advance pointer to next row */
424
0
  }
425
0
}
426
427
#ifdef IDCT_SCALING_SUPPORTED
428
429
430
/*
431
 * Perform dequantization and inverse DCT on one block of coefficients,
432
 * producing a reduced-size 7x7 output block.
433
 *
434
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
435
 * cK represents sqrt(2) * cos(K*pi/14).
436
 */
437
438
GLOBAL(void)
439
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
440
         JCOEFPTR coef_block,
441
         JSAMPARRAY output_buf, JDIMENSION output_col)
442
0
{
443
0
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444
0
  INT32 z1, z2, z3;
445
0
  JCOEFPTR inptr;
446
0
  ISLOW_MULT_TYPE * quantptr;
447
0
  int * wsptr;
448
0
  JSAMPROW outptr;
449
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450
0
  int ctr;
451
0
  int workspace[7*7]; /* buffers data between passes */
452
  SHIFT_TEMPS
453
454
  /* Pass 1: process columns from input, store into work array. */
455
456
0
  inptr = coef_block;
457
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
458
0
  wsptr = workspace;
459
0
  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
460
    /* Even part */
461
462
0
    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
463
0
    tmp13 <<= CONST_BITS;
464
    /* Add fudge factor here for final descale. */
465
0
    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
466
467
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
468
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
469
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
470
471
0
    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
472
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
473
0
    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
474
0
    tmp0 = z1 + z3;
475
0
    z2 -= tmp0;
476
0
    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
477
0
    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
478
0
    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
479
0
    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
480
481
    /* Odd part */
482
483
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
484
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
485
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
486
487
0
    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
488
0
    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
489
0
    tmp0 = tmp1 - tmp2;
490
0
    tmp1 += tmp2;
491
0
    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
492
0
    tmp1 += tmp2;
493
0
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
494
0
    tmp0 += z2;
495
0
    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
496
497
    /* Final output stage */
498
499
0
    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500
0
    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501
0
    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502
0
    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503
0
    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504
0
    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505
0
    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
506
0
  }
507
508
  /* Pass 2: process 7 rows from work array, store into output array. */
509
510
0
  wsptr = workspace;
511
0
  for (ctr = 0; ctr < 7; ctr++) {
512
0
    outptr = output_buf[ctr] + output_col;
513
514
    /* Even part */
515
516
    /* Add range center and fudge factor for final descale and range-limit. */
517
0
    tmp13 = (INT32) wsptr[0] +
518
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519
0
         (ONE << (PASS1_BITS+2)));
520
0
    tmp13 <<= CONST_BITS;
521
522
0
    z1 = (INT32) wsptr[2];
523
0
    z2 = (INT32) wsptr[4];
524
0
    z3 = (INT32) wsptr[6];
525
526
0
    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
527
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
528
0
    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529
0
    tmp0 = z1 + z3;
530
0
    z2 -= tmp0;
531
0
    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532
0
    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
533
0
    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
534
0
    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
535
536
    /* Odd part */
537
538
0
    z1 = (INT32) wsptr[1];
539
0
    z2 = (INT32) wsptr[3];
540
0
    z3 = (INT32) wsptr[5];
541
542
0
    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
543
0
    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
544
0
    tmp0 = tmp1 - tmp2;
545
0
    tmp1 += tmp2;
546
0
    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
547
0
    tmp1 += tmp2;
548
0
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
549
0
    tmp0 += z2;
550
0
    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
551
552
    /* Final output stage */
553
554
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
555
0
                CONST_BITS+PASS1_BITS+3)
556
0
          & RANGE_MASK];
557
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
558
0
                CONST_BITS+PASS1_BITS+3)
559
0
          & RANGE_MASK];
560
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
561
0
                CONST_BITS+PASS1_BITS+3)
562
0
          & RANGE_MASK];
563
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
564
0
                CONST_BITS+PASS1_BITS+3)
565
0
          & RANGE_MASK];
566
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
567
0
                CONST_BITS+PASS1_BITS+3)
568
0
          & RANGE_MASK];
569
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
570
0
                CONST_BITS+PASS1_BITS+3)
571
0
          & RANGE_MASK];
572
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
573
0
                CONST_BITS+PASS1_BITS+3)
574
0
          & RANGE_MASK];
575
576
0
    wsptr += 7;   /* advance pointer to next row */
577
0
  }
578
0
}
579
580
581
/*
582
 * Perform dequantization and inverse DCT on one block of coefficients,
583
 * producing a reduced-size 6x6 output block.
584
 *
585
 * Optimized algorithm with 3 multiplications in the 1-D kernel.
586
 * cK represents sqrt(2) * cos(K*pi/12).
587
 */
588
589
GLOBAL(void)
590
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
591
         JCOEFPTR coef_block,
592
         JSAMPARRAY output_buf, JDIMENSION output_col)
593
0
{
594
0
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
595
0
  INT32 z1, z2, z3;
596
0
  JCOEFPTR inptr;
597
0
  ISLOW_MULT_TYPE * quantptr;
598
0
  int * wsptr;
599
0
  JSAMPROW outptr;
600
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
601
0
  int ctr;
602
0
  int workspace[6*6]; /* buffers data between passes */
603
  SHIFT_TEMPS
604
605
  /* Pass 1: process columns from input, store into work array. */
606
607
0
  inptr = coef_block;
608
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
609
0
  wsptr = workspace;
610
0
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
611
    /* Even part */
612
613
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
614
0
    tmp0 <<= CONST_BITS;
615
    /* Add fudge factor here for final descale. */
616
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
617
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
618
0
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
619
0
    tmp1 = tmp0 + tmp10;
620
0
    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
621
0
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
622
0
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
623
0
    tmp10 = tmp1 + tmp0;
624
0
    tmp12 = tmp1 - tmp0;
625
626
    /* Odd part */
627
628
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
629
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
630
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
631
0
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
632
0
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
633
0
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
634
0
    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
635
636
    /* Final output stage */
637
638
0
    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639
0
    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640
0
    wsptr[6*1] = (int) (tmp11 + tmp1);
641
0
    wsptr[6*4] = (int) (tmp11 - tmp1);
642
0
    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643
0
    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
644
0
  }
645
646
  /* Pass 2: process 6 rows from work array, store into output array. */
647
648
0
  wsptr = workspace;
649
0
  for (ctr = 0; ctr < 6; ctr++) {
650
0
    outptr = output_buf[ctr] + output_col;
651
652
    /* Even part */
653
654
    /* Add range center and fudge factor for final descale and range-limit. */
655
0
    tmp0 = (INT32) wsptr[0] +
656
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657
0
        (ONE << (PASS1_BITS+2)));
658
0
    tmp0 <<= CONST_BITS;
659
0
    tmp2 = (INT32) wsptr[4];
660
0
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
661
0
    tmp1 = tmp0 + tmp10;
662
0
    tmp11 = tmp0 - tmp10 - tmp10;
663
0
    tmp10 = (INT32) wsptr[2];
664
0
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
665
0
    tmp10 = tmp1 + tmp0;
666
0
    tmp12 = tmp1 - tmp0;
667
668
    /* Odd part */
669
670
0
    z1 = (INT32) wsptr[1];
671
0
    z2 = (INT32) wsptr[3];
672
0
    z3 = (INT32) wsptr[5];
673
0
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674
0
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675
0
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676
0
    tmp1 = (z1 - z2 - z3) << CONST_BITS;
677
678
    /* Final output stage */
679
680
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
681
0
                CONST_BITS+PASS1_BITS+3)
682
0
          & RANGE_MASK];
683
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
684
0
                CONST_BITS+PASS1_BITS+3)
685
0
          & RANGE_MASK];
686
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
687
0
                CONST_BITS+PASS1_BITS+3)
688
0
          & RANGE_MASK];
689
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
690
0
                CONST_BITS+PASS1_BITS+3)
691
0
          & RANGE_MASK];
692
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
693
0
                CONST_BITS+PASS1_BITS+3)
694
0
          & RANGE_MASK];
695
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
696
0
                CONST_BITS+PASS1_BITS+3)
697
0
          & RANGE_MASK];
698
699
0
    wsptr += 6;   /* advance pointer to next row */
700
0
  }
701
0
}
702
703
704
/*
705
 * Perform dequantization and inverse DCT on one block of coefficients,
706
 * producing a reduced-size 5x5 output block.
707
 *
708
 * Optimized algorithm with 5 multiplications in the 1-D kernel.
709
 * cK represents sqrt(2) * cos(K*pi/10).
710
 */
711
712
GLOBAL(void)
713
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
714
         JCOEFPTR coef_block,
715
         JSAMPARRAY output_buf, JDIMENSION output_col)
716
0
{
717
0
  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
718
0
  INT32 z1, z2, z3;
719
0
  JCOEFPTR inptr;
720
0
  ISLOW_MULT_TYPE * quantptr;
721
0
  int * wsptr;
722
0
  JSAMPROW outptr;
723
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
724
0
  int ctr;
725
0
  int workspace[5*5]; /* buffers data between passes */
726
  SHIFT_TEMPS
727
728
  /* Pass 1: process columns from input, store into work array. */
729
730
0
  inptr = coef_block;
731
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
732
0
  wsptr = workspace;
733
0
  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
734
    /* Even part */
735
736
0
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
737
0
    tmp12 <<= CONST_BITS;
738
    /* Add fudge factor here for final descale. */
739
0
    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
740
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
741
0
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
742
0
    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
743
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
744
0
    z3 = tmp12 + z2;
745
0
    tmp10 = z3 + z1;
746
0
    tmp11 = z3 - z1;
747
0
    tmp12 -= z2 << 2;
748
749
    /* Odd part */
750
751
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
752
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
753
754
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
755
0
    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
756
0
    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
757
758
    /* Final output stage */
759
760
0
    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761
0
    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762
0
    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763
0
    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764
0
    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
765
0
  }
766
767
  /* Pass 2: process 5 rows from work array, store into output array. */
768
769
0
  wsptr = workspace;
770
0
  for (ctr = 0; ctr < 5; ctr++) {
771
0
    outptr = output_buf[ctr] + output_col;
772
773
    /* Even part */
774
775
    /* Add range center and fudge factor for final descale and range-limit. */
776
0
    tmp12 = (INT32) wsptr[0] +
777
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778
0
         (ONE << (PASS1_BITS+2)));
779
0
    tmp12 <<= CONST_BITS;
780
0
    tmp0 = (INT32) wsptr[2];
781
0
    tmp1 = (INT32) wsptr[4];
782
0
    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784
0
    z3 = tmp12 + z2;
785
0
    tmp10 = z3 + z1;
786
0
    tmp11 = z3 - z1;
787
0
    tmp12 -= z2 << 2;
788
789
    /* Odd part */
790
791
0
    z2 = (INT32) wsptr[1];
792
0
    z3 = (INT32) wsptr[3];
793
794
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
795
0
    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
796
0
    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
797
798
    /* Final output stage */
799
800
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
801
0
                CONST_BITS+PASS1_BITS+3)
802
0
          & RANGE_MASK];
803
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
804
0
                CONST_BITS+PASS1_BITS+3)
805
0
          & RANGE_MASK];
806
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
807
0
                CONST_BITS+PASS1_BITS+3)
808
0
          & RANGE_MASK];
809
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
810
0
                CONST_BITS+PASS1_BITS+3)
811
0
          & RANGE_MASK];
812
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
813
0
                CONST_BITS+PASS1_BITS+3)
814
0
          & RANGE_MASK];
815
816
0
    wsptr += 5;   /* advance pointer to next row */
817
0
  }
818
0
}
819
820
821
/*
822
 * Perform dequantization and inverse DCT on one block of coefficients,
823
 * producing a reduced-size 4x4 output block.
824
 *
825
 * Optimized algorithm with 3 multiplications in the 1-D kernel.
826
 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
827
 */
828
829
GLOBAL(void)
830
jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
831
         JCOEFPTR coef_block,
832
         JSAMPARRAY output_buf, JDIMENSION output_col)
833
0
{
834
0
  INT32 tmp0, tmp2, tmp10, tmp12;
835
0
  INT32 z1, z2, z3;
836
0
  JCOEFPTR inptr;
837
0
  ISLOW_MULT_TYPE * quantptr;
838
0
  int * wsptr;
839
0
  JSAMPROW outptr;
840
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
841
0
  int ctr;
842
0
  int workspace[4*4]; /* buffers data between passes */
843
  SHIFT_TEMPS
844
845
  /* Pass 1: process columns from input, store into work array. */
846
847
0
  inptr = coef_block;
848
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
849
0
  wsptr = workspace;
850
0
  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
851
    /* Even part */
852
853
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
854
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
855
    
856
0
    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
857
0
    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
858
859
    /* Odd part */
860
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
861
862
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
863
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
864
865
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
866
    /* Add fudge factor here for final descale. */
867
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
868
0
    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
869
0
           CONST_BITS-PASS1_BITS);
870
0
    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871
0
           CONST_BITS-PASS1_BITS);
872
873
    /* Final output stage */
874
875
0
    wsptr[4*0] = (int) (tmp10 + tmp0);
876
0
    wsptr[4*3] = (int) (tmp10 - tmp0);
877
0
    wsptr[4*1] = (int) (tmp12 + tmp2);
878
0
    wsptr[4*2] = (int) (tmp12 - tmp2);
879
0
  }
880
881
  /* Pass 2: process 4 rows from work array, store into output array. */
882
883
0
  wsptr = workspace;
884
0
  for (ctr = 0; ctr < 4; ctr++) {
885
0
    outptr = output_buf[ctr] + output_col;
886
887
    /* Even part */
888
889
    /* Add range center and fudge factor for final descale and range-limit. */
890
0
    tmp0 = (INT32) wsptr[0] +
891
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892
0
        (ONE << (PASS1_BITS+2)));
893
0
    tmp2 = (INT32) wsptr[2];
894
895
0
    tmp10 = (tmp0 + tmp2) << CONST_BITS;
896
0
    tmp12 = (tmp0 - tmp2) << CONST_BITS;
897
898
    /* Odd part */
899
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
900
901
0
    z2 = (INT32) wsptr[1];
902
0
    z3 = (INT32) wsptr[3];
903
904
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
905
0
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906
0
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
907
908
    /* Final output stage */
909
910
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911
0
                CONST_BITS+PASS1_BITS+3)
912
0
          & RANGE_MASK];
913
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
914
0
                CONST_BITS+PASS1_BITS+3)
915
0
          & RANGE_MASK];
916
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
917
0
                CONST_BITS+PASS1_BITS+3)
918
0
          & RANGE_MASK];
919
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
920
0
                CONST_BITS+PASS1_BITS+3)
921
0
          & RANGE_MASK];
922
923
0
    wsptr += 4;   /* advance pointer to next row */
924
0
  }
925
0
}
926
927
928
/*
929
 * Perform dequantization and inverse DCT on one block of coefficients,
930
 * producing a reduced-size 3x3 output block.
931
 *
932
 * Optimized algorithm with 2 multiplications in the 1-D kernel.
933
 * cK represents sqrt(2) * cos(K*pi/6).
934
 */
935
936
GLOBAL(void)
937
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
938
         JCOEFPTR coef_block,
939
         JSAMPARRAY output_buf, JDIMENSION output_col)
940
0
{
941
0
  INT32 tmp0, tmp2, tmp10, tmp12;
942
0
  JCOEFPTR inptr;
943
0
  ISLOW_MULT_TYPE * quantptr;
944
0
  int * wsptr;
945
0
  JSAMPROW outptr;
946
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
947
0
  int ctr;
948
0
  int workspace[3*3]; /* buffers data between passes */
949
  SHIFT_TEMPS
950
951
  /* Pass 1: process columns from input, store into work array. */
952
953
0
  inptr = coef_block;
954
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
955
0
  wsptr = workspace;
956
0
  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
957
    /* Even part */
958
959
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
960
0
    tmp0 <<= CONST_BITS;
961
    /* Add fudge factor here for final descale. */
962
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
963
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
964
0
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
965
0
    tmp10 = tmp0 + tmp12;
966
0
    tmp2 = tmp0 - tmp12 - tmp12;
967
968
    /* Odd part */
969
970
0
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971
0
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
972
973
    /* Final output stage */
974
975
0
    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976
0
    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977
0
    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
978
0
  }
979
980
  /* Pass 2: process 3 rows from work array, store into output array. */
981
982
0
  wsptr = workspace;
983
0
  for (ctr = 0; ctr < 3; ctr++) {
984
0
    outptr = output_buf[ctr] + output_col;
985
986
    /* Even part */
987
988
    /* Add range center and fudge factor for final descale and range-limit. */
989
0
    tmp0 = (INT32) wsptr[0] +
990
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991
0
        (ONE << (PASS1_BITS+2)));
992
0
    tmp0 <<= CONST_BITS;
993
0
    tmp2 = (INT32) wsptr[2];
994
0
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995
0
    tmp10 = tmp0 + tmp12;
996
0
    tmp2 = tmp0 - tmp12 - tmp12;
997
998
    /* Odd part */
999
1000
0
    tmp12 = (INT32) wsptr[1];
1001
0
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1002
1003
    /* Final output stage */
1004
1005
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006
0
                CONST_BITS+PASS1_BITS+3)
1007
0
          & RANGE_MASK];
1008
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009
0
                CONST_BITS+PASS1_BITS+3)
1010
0
          & RANGE_MASK];
1011
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012
0
                CONST_BITS+PASS1_BITS+3)
1013
0
          & RANGE_MASK];
1014
1015
0
    wsptr += 3;   /* advance pointer to next row */
1016
0
  }
1017
0
}
1018
1019
1020
/*
1021
 * Perform dequantization and inverse DCT on one block of coefficients,
1022
 * producing a reduced-size 2x2 output block.
1023
 *
1024
 * Multiplication-less algorithm.
1025
 */
1026
1027
GLOBAL(void)
1028
jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1029
         JCOEFPTR coef_block,
1030
         JSAMPARRAY output_buf, JDIMENSION output_col)
1031
0
{
1032
0
  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033
0
  ISLOW_MULT_TYPE * quantptr;
1034
0
  JSAMPROW outptr;
1035
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1036
0
  ISHIFT_TEMPS
1037
1038
  /* Pass 1: process columns from input. */
1039
1040
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1041
1042
  /* Column 0 */
1043
0
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044
0
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045
  /* Add range center and fudge factor for final descale and range-limit. */
1046
0
  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1047
1048
0
  tmp0 = tmp4 + tmp5;
1049
0
  tmp2 = tmp4 - tmp5;
1050
1051
  /* Column 1 */
1052
0
  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053
0
  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1054
1055
0
  tmp1 = tmp4 + tmp5;
1056
0
  tmp3 = tmp4 - tmp5;
1057
1058
  /* Pass 2: process 2 rows, store into output array. */
1059
1060
  /* Row 0 */
1061
0
  outptr = output_buf[0] + output_col;
1062
1063
0
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064
0
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1065
1066
  /* Row 1 */
1067
0
  outptr = output_buf[1] + output_col;
1068
1069
0
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070
0
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1071
0
}
1072
1073
1074
/*
1075
 * Perform dequantization and inverse DCT on one block of coefficients,
1076
 * producing a reduced-size 1x1 output block.
1077
 *
1078
 * We hardly need an inverse DCT routine for this: just take the
1079
 * average pixel value, which is one-eighth of the DC coefficient.
1080
 */
1081
1082
GLOBAL(void)
1083
jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1084
         JCOEFPTR coef_block,
1085
         JSAMPARRAY output_buf, JDIMENSION output_col)
1086
0
{
1087
0
  DCTELEM dcval;
1088
0
  ISLOW_MULT_TYPE * quantptr;
1089
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1090
0
  ISHIFT_TEMPS
1091
1092
  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1093
1094
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1095
1096
0
  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097
  /* Add range center and fudge factor for descale and range-limit. */
1098
0
  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1099
1100
0
  output_buf[0][output_col] =
1101
0
    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1102
0
}
1103
1104
1105
/*
1106
 * Perform dequantization and inverse DCT on one block of coefficients,
1107
 * producing a 9x9 output block.
1108
 *
1109
 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110
 * cK represents sqrt(2) * cos(K*pi/18).
1111
 */
1112
1113
GLOBAL(void)
1114
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1115
         JCOEFPTR coef_block,
1116
         JSAMPARRAY output_buf, JDIMENSION output_col)
1117
0
{
1118
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119
0
  INT32 z1, z2, z3, z4;
1120
0
  JCOEFPTR inptr;
1121
0
  ISLOW_MULT_TYPE * quantptr;
1122
0
  int * wsptr;
1123
0
  JSAMPROW outptr;
1124
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125
0
  int ctr;
1126
0
  int workspace[8*9]; /* buffers data between passes */
1127
  SHIFT_TEMPS
1128
1129
  /* Pass 1: process columns from input, store into work array. */
1130
1131
0
  inptr = coef_block;
1132
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133
0
  wsptr = workspace;
1134
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135
    /* Even part */
1136
1137
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138
0
    tmp0 <<= CONST_BITS;
1139
    /* Add fudge factor here for final descale. */
1140
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1141
1142
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1143
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1144
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1145
1146
0
    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
1147
0
    tmp1 = tmp0 + tmp3;
1148
0
    tmp2 = tmp0 - tmp3 - tmp3;
1149
1150
0
    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1151
0
    tmp11 = tmp2 + tmp0;
1152
0
    tmp14 = tmp2 - tmp0 - tmp0;
1153
1154
0
    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1155
0
    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
1156
0
    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
1157
1158
0
    tmp10 = tmp1 + tmp0 - tmp3;
1159
0
    tmp12 = tmp1 - tmp0 + tmp2;
1160
0
    tmp13 = tmp1 - tmp2 + tmp3;
1161
1162
    /* Odd part */
1163
1164
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1165
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1166
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1167
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1168
1169
0
    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
1170
1171
0
    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
1172
0
    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
1173
0
    tmp0 = tmp2 + tmp3 - z2;
1174
0
    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
1175
0
    tmp2 += z2 - tmp1;
1176
0
    tmp3 += z2 + tmp1;
1177
0
    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1178
1179
    /* Final output stage */
1180
1181
0
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182
0
    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183
0
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184
0
    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185
0
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186
0
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187
0
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188
0
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189
0
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1190
0
  }
1191
1192
  /* Pass 2: process 9 rows from work array, store into output array. */
1193
1194
0
  wsptr = workspace;
1195
0
  for (ctr = 0; ctr < 9; ctr++) {
1196
0
    outptr = output_buf[ctr] + output_col;
1197
1198
    /* Even part */
1199
1200
    /* Add range center and fudge factor for final descale and range-limit. */
1201
0
    tmp0 = (INT32) wsptr[0] +
1202
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203
0
        (ONE << (PASS1_BITS+2)));
1204
0
    tmp0 <<= CONST_BITS;
1205
1206
0
    z1 = (INT32) wsptr[2];
1207
0
    z2 = (INT32) wsptr[4];
1208
0
    z3 = (INT32) wsptr[6];
1209
1210
0
    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
1211
0
    tmp1 = tmp0 + tmp3;
1212
0
    tmp2 = tmp0 - tmp3 - tmp3;
1213
1214
0
    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215
0
    tmp11 = tmp2 + tmp0;
1216
0
    tmp14 = tmp2 - tmp0 - tmp0;
1217
1218
0
    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219
0
    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
1220
0
    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
1221
1222
0
    tmp10 = tmp1 + tmp0 - tmp3;
1223
0
    tmp12 = tmp1 - tmp0 + tmp2;
1224
0
    tmp13 = tmp1 - tmp2 + tmp3;
1225
1226
    /* Odd part */
1227
1228
0
    z1 = (INT32) wsptr[1];
1229
0
    z2 = (INT32) wsptr[3];
1230
0
    z3 = (INT32) wsptr[5];
1231
0
    z4 = (INT32) wsptr[7];
1232
1233
0
    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
1234
1235
0
    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
1236
0
    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
1237
0
    tmp0 = tmp2 + tmp3 - z2;
1238
0
    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
1239
0
    tmp2 += z2 - tmp1;
1240
0
    tmp3 += z2 + tmp1;
1241
0
    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1242
1243
    /* Final output stage */
1244
1245
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246
0
                CONST_BITS+PASS1_BITS+3)
1247
0
          & RANGE_MASK];
1248
0
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249
0
                CONST_BITS+PASS1_BITS+3)
1250
0
          & RANGE_MASK];
1251
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252
0
                CONST_BITS+PASS1_BITS+3)
1253
0
          & RANGE_MASK];
1254
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255
0
                CONST_BITS+PASS1_BITS+3)
1256
0
          & RANGE_MASK];
1257
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258
0
                CONST_BITS+PASS1_BITS+3)
1259
0
          & RANGE_MASK];
1260
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261
0
                CONST_BITS+PASS1_BITS+3)
1262
0
          & RANGE_MASK];
1263
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264
0
                CONST_BITS+PASS1_BITS+3)
1265
0
          & RANGE_MASK];
1266
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267
0
                CONST_BITS+PASS1_BITS+3)
1268
0
          & RANGE_MASK];
1269
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270
0
                CONST_BITS+PASS1_BITS+3)
1271
0
          & RANGE_MASK];
1272
1273
0
    wsptr += 8;   /* advance pointer to next row */
1274
0
  }
1275
0
}
1276
1277
1278
/*
1279
 * Perform dequantization and inverse DCT on one block of coefficients,
1280
 * producing a 10x10 output block.
1281
 *
1282
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283
 * cK represents sqrt(2) * cos(K*pi/20).
1284
 */
1285
1286
GLOBAL(void)
1287
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1288
     JCOEFPTR coef_block,
1289
     JSAMPARRAY output_buf, JDIMENSION output_col)
1290
0
{
1291
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293
0
  INT32 z1, z2, z3, z4, z5;
1294
0
  JCOEFPTR inptr;
1295
0
  ISLOW_MULT_TYPE * quantptr;
1296
0
  int * wsptr;
1297
0
  JSAMPROW outptr;
1298
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299
0
  int ctr;
1300
0
  int workspace[8*10];  /* buffers data between passes */
1301
  SHIFT_TEMPS
1302
1303
  /* Pass 1: process columns from input, store into work array. */
1304
1305
0
  inptr = coef_block;
1306
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307
0
  wsptr = workspace;
1308
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309
    /* Even part */
1310
1311
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312
0
    z3 <<= CONST_BITS;
1313
    /* Add fudge factor here for final descale. */
1314
0
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1315
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1316
0
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1317
0
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1318
0
    tmp10 = z3 + z1;
1319
0
    tmp11 = z3 - z2;
1320
1321
0
    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
1322
0
      CONST_BITS-PASS1_BITS);
1323
1324
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1325
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1326
1327
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1328
0
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1329
0
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1330
1331
0
    tmp20 = tmp10 + tmp12;
1332
0
    tmp24 = tmp10 - tmp12;
1333
0
    tmp21 = tmp11 + tmp13;
1334
0
    tmp23 = tmp11 - tmp13;
1335
1336
    /* Odd part */
1337
1338
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1339
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1340
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1341
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1342
1343
0
    tmp11 = z2 + z4;
1344
0
    tmp13 = z2 - z4;
1345
1346
0
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1347
0
    z5 = z3 << CONST_BITS;
1348
1349
0
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1350
0
    z4 = z5 + tmp12;
1351
1352
0
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1353
0
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1354
1355
0
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1356
0
    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1357
1358
0
    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1359
1360
0
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1361
0
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1362
1363
    /* Final output stage */
1364
1365
0
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366
0
    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367
0
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368
0
    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369
0
    wsptr[8*2] = (int) (tmp22 + tmp12);
1370
0
    wsptr[8*7] = (int) (tmp22 - tmp12);
1371
0
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372
0
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373
0
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374
0
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1375
0
  }
1376
1377
  /* Pass 2: process 10 rows from work array, store into output array. */
1378
1379
0
  wsptr = workspace;
1380
0
  for (ctr = 0; ctr < 10; ctr++) {
1381
0
    outptr = output_buf[ctr] + output_col;
1382
1383
    /* Even part */
1384
1385
    /* Add range center and fudge factor for final descale and range-limit. */
1386
0
    z3 = (INT32) wsptr[0] +
1387
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388
0
      (ONE << (PASS1_BITS+2)));
1389
0
    z3 <<= CONST_BITS;
1390
0
    z4 = (INT32) wsptr[4];
1391
0
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1392
0
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1393
0
    tmp10 = z3 + z1;
1394
0
    tmp11 = z3 - z2;
1395
1396
0
    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
1397
1398
0
    z2 = (INT32) wsptr[2];
1399
0
    z3 = (INT32) wsptr[6];
1400
1401
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1402
0
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403
0
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404
1405
0
    tmp20 = tmp10 + tmp12;
1406
0
    tmp24 = tmp10 - tmp12;
1407
0
    tmp21 = tmp11 + tmp13;
1408
0
    tmp23 = tmp11 - tmp13;
1409
1410
    /* Odd part */
1411
1412
0
    z1 = (INT32) wsptr[1];
1413
0
    z2 = (INT32) wsptr[3];
1414
0
    z3 = (INT32) wsptr[5];
1415
0
    z3 <<= CONST_BITS;
1416
0
    z4 = (INT32) wsptr[7];
1417
1418
0
    tmp11 = z2 + z4;
1419
0
    tmp13 = z2 - z4;
1420
1421
0
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1422
1423
0
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1424
0
    z4 = z3 + tmp12;
1425
1426
0
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427
0
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1428
1429
0
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1430
0
    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1431
1432
0
    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1433
1434
0
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435
0
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1436
1437
    /* Final output stage */
1438
1439
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440
0
                CONST_BITS+PASS1_BITS+3)
1441
0
          & RANGE_MASK];
1442
0
    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443
0
                CONST_BITS+PASS1_BITS+3)
1444
0
          & RANGE_MASK];
1445
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446
0
                CONST_BITS+PASS1_BITS+3)
1447
0
          & RANGE_MASK];
1448
0
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449
0
                CONST_BITS+PASS1_BITS+3)
1450
0
          & RANGE_MASK];
1451
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452
0
                CONST_BITS+PASS1_BITS+3)
1453
0
          & RANGE_MASK];
1454
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455
0
                CONST_BITS+PASS1_BITS+3)
1456
0
          & RANGE_MASK];
1457
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458
0
                CONST_BITS+PASS1_BITS+3)
1459
0
          & RANGE_MASK];
1460
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461
0
                CONST_BITS+PASS1_BITS+3)
1462
0
          & RANGE_MASK];
1463
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464
0
                CONST_BITS+PASS1_BITS+3)
1465
0
          & RANGE_MASK];
1466
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467
0
                CONST_BITS+PASS1_BITS+3)
1468
0
          & RANGE_MASK];
1469
1470
0
    wsptr += 8;   /* advance pointer to next row */
1471
0
  }
1472
0
}
1473
1474
1475
/*
1476
 * Perform dequantization and inverse DCT on one block of coefficients,
1477
 * producing an 11x11 output block.
1478
 *
1479
 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480
 * cK represents sqrt(2) * cos(K*pi/22).
1481
 */
1482
1483
GLOBAL(void)
1484
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1485
     JCOEFPTR coef_block,
1486
     JSAMPARRAY output_buf, JDIMENSION output_col)
1487
0
{
1488
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490
0
  INT32 z1, z2, z3, z4;
1491
0
  JCOEFPTR inptr;
1492
0
  ISLOW_MULT_TYPE * quantptr;
1493
0
  int * wsptr;
1494
0
  JSAMPROW outptr;
1495
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496
0
  int ctr;
1497
0
  int workspace[8*11];  /* buffers data between passes */
1498
  SHIFT_TEMPS
1499
1500
  /* Pass 1: process columns from input, store into work array. */
1501
1502
0
  inptr = coef_block;
1503
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504
0
  wsptr = workspace;
1505
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506
    /* Even part */
1507
1508
0
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509
0
    tmp10 <<= CONST_BITS;
1510
    /* Add fudge factor here for final descale. */
1511
0
    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1512
1513
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1514
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1515
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1516
1517
0
    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1518
0
    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1519
0
    z4 = z1 + z3;
1520
0
    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1521
0
    z4 -= z2;
1522
0
    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1523
0
    tmp21 = tmp20 + tmp23 + tmp25 -
1524
0
      MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1525
0
    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1526
0
    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1527
0
    tmp24 += tmp25;
1528
0
    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1529
0
    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1530
0
       MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1531
0
    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1532
1533
    /* Odd part */
1534
1535
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1536
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1537
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1538
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1539
1540
0
    tmp11 = z1 + z2;
1541
0
    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1542
0
    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1543
0
    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1544
0
    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1545
0
    tmp10 = tmp11 + tmp12 + tmp13 -
1546
0
      MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1547
0
    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1548
0
    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1549
0
    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1550
0
    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1551
0
    tmp11 += z1;
1552
0
    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1553
0
    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1554
0
       MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1555
0
       MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1556
1557
    /* Final output stage */
1558
1559
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1560
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1570
0
  }
1571
1572
  /* Pass 2: process 11 rows from work array, store into output array. */
1573
1574
0
  wsptr = workspace;
1575
0
  for (ctr = 0; ctr < 11; ctr++) {
1576
0
    outptr = output_buf[ctr] + output_col;
1577
1578
    /* Even part */
1579
1580
    /* Add range center and fudge factor for final descale and range-limit. */
1581
0
    tmp10 = (INT32) wsptr[0] +
1582
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583
0
         (ONE << (PASS1_BITS+2)));
1584
0
    tmp10 <<= CONST_BITS;
1585
1586
0
    z1 = (INT32) wsptr[2];
1587
0
    z2 = (INT32) wsptr[4];
1588
0
    z3 = (INT32) wsptr[6];
1589
1590
0
    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1591
0
    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1592
0
    z4 = z1 + z3;
1593
0
    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1594
0
    z4 -= z2;
1595
0
    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1596
0
    tmp21 = tmp20 + tmp23 + tmp25 -
1597
0
      MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1598
0
    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599
0
    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600
0
    tmp24 += tmp25;
1601
0
    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1602
0
    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1603
0
       MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1604
0
    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1605
1606
    /* Odd part */
1607
1608
0
    z1 = (INT32) wsptr[1];
1609
0
    z2 = (INT32) wsptr[3];
1610
0
    z3 = (INT32) wsptr[5];
1611
0
    z4 = (INT32) wsptr[7];
1612
1613
0
    tmp11 = z1 + z2;
1614
0
    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615
0
    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1616
0
    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1617
0
    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618
0
    tmp10 = tmp11 + tmp12 + tmp13 -
1619
0
      MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1620
0
    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621
0
    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1622
0
    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1623
0
    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1624
0
    tmp11 += z1;
1625
0
    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1626
0
    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1627
0
       MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1628
0
       MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1629
1630
    /* Final output stage */
1631
1632
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633
0
                 CONST_BITS+PASS1_BITS+3)
1634
0
           & RANGE_MASK];
1635
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636
0
                 CONST_BITS+PASS1_BITS+3)
1637
0
           & RANGE_MASK];
1638
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639
0
                 CONST_BITS+PASS1_BITS+3)
1640
0
           & RANGE_MASK];
1641
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642
0
                 CONST_BITS+PASS1_BITS+3)
1643
0
           & RANGE_MASK];
1644
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645
0
                 CONST_BITS+PASS1_BITS+3)
1646
0
           & RANGE_MASK];
1647
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648
0
                 CONST_BITS+PASS1_BITS+3)
1649
0
           & RANGE_MASK];
1650
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651
0
                 CONST_BITS+PASS1_BITS+3)
1652
0
           & RANGE_MASK];
1653
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654
0
                 CONST_BITS+PASS1_BITS+3)
1655
0
           & RANGE_MASK];
1656
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657
0
                 CONST_BITS+PASS1_BITS+3)
1658
0
           & RANGE_MASK];
1659
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660
0
                 CONST_BITS+PASS1_BITS+3)
1661
0
           & RANGE_MASK];
1662
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
1663
0
                 CONST_BITS+PASS1_BITS+3)
1664
0
           & RANGE_MASK];
1665
1666
0
    wsptr += 8;   /* advance pointer to next row */
1667
0
  }
1668
0
}
1669
1670
1671
/*
1672
 * Perform dequantization and inverse DCT on one block of coefficients,
1673
 * producing a 12x12 output block.
1674
 *
1675
 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676
 * cK represents sqrt(2) * cos(K*pi/24).
1677
 */
1678
1679
GLOBAL(void)
1680
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1681
     JCOEFPTR coef_block,
1682
     JSAMPARRAY output_buf, JDIMENSION output_col)
1683
0
{
1684
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686
0
  INT32 z1, z2, z3, z4;
1687
0
  JCOEFPTR inptr;
1688
0
  ISLOW_MULT_TYPE * quantptr;
1689
0
  int * wsptr;
1690
0
  JSAMPROW outptr;
1691
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692
0
  int ctr;
1693
0
  int workspace[8*12];  /* buffers data between passes */
1694
  SHIFT_TEMPS
1695
1696
  /* Pass 1: process columns from input, store into work array. */
1697
1698
0
  inptr = coef_block;
1699
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700
0
  wsptr = workspace;
1701
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702
    /* Even part */
1703
1704
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705
0
    z3 <<= CONST_BITS;
1706
    /* Add fudge factor here for final descale. */
1707
0
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1708
1709
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1710
0
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1711
1712
0
    tmp10 = z3 + z4;
1713
0
    tmp11 = z3 - z4;
1714
1715
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1716
0
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1717
0
    z1 <<= CONST_BITS;
1718
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1719
0
    z2 <<= CONST_BITS;
1720
1721
0
    tmp12 = z1 - z2;
1722
1723
0
    tmp21 = z3 + tmp12;
1724
0
    tmp24 = z3 - tmp12;
1725
1726
0
    tmp12 = z4 + z2;
1727
1728
0
    tmp20 = tmp10 + tmp12;
1729
0
    tmp25 = tmp10 - tmp12;
1730
1731
0
    tmp12 = z4 - z1 - z2;
1732
1733
0
    tmp22 = tmp11 + tmp12;
1734
0
    tmp23 = tmp11 - tmp12;
1735
1736
    /* Odd part */
1737
1738
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1739
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1740
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1741
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1742
1743
0
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1744
0
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1745
1746
0
    tmp10 = z1 + z3;
1747
0
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1748
0
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1749
0
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1750
0
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1751
0
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752
0
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1753
0
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1754
0
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1755
1756
0
    z1 -= z4;
1757
0
    z2 -= z3;
1758
0
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1759
0
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1760
0
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1761
1762
    /* Final output stage */
1763
1764
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1765
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1766
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1776
0
  }
1777
1778
  /* Pass 2: process 12 rows from work array, store into output array. */
1779
1780
0
  wsptr = workspace;
1781
0
  for (ctr = 0; ctr < 12; ctr++) {
1782
0
    outptr = output_buf[ctr] + output_col;
1783
1784
    /* Even part */
1785
1786
    /* Add range center and fudge factor for final descale and range-limit. */
1787
0
    z3 = (INT32) wsptr[0] +
1788
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789
0
      (ONE << (PASS1_BITS+2)));
1790
0
    z3 <<= CONST_BITS;
1791
1792
0
    z4 = (INT32) wsptr[4];
1793
0
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794
1795
0
    tmp10 = z3 + z4;
1796
0
    tmp11 = z3 - z4;
1797
1798
0
    z1 = (INT32) wsptr[2];
1799
0
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800
0
    z1 <<= CONST_BITS;
1801
0
    z2 = (INT32) wsptr[6];
1802
0
    z2 <<= CONST_BITS;
1803
1804
0
    tmp12 = z1 - z2;
1805
1806
0
    tmp21 = z3 + tmp12;
1807
0
    tmp24 = z3 - tmp12;
1808
1809
0
    tmp12 = z4 + z2;
1810
1811
0
    tmp20 = tmp10 + tmp12;
1812
0
    tmp25 = tmp10 - tmp12;
1813
1814
0
    tmp12 = z4 - z1 - z2;
1815
1816
0
    tmp22 = tmp11 + tmp12;
1817
0
    tmp23 = tmp11 - tmp12;
1818
1819
    /* Odd part */
1820
1821
0
    z1 = (INT32) wsptr[1];
1822
0
    z2 = (INT32) wsptr[3];
1823
0
    z3 = (INT32) wsptr[5];
1824
0
    z4 = (INT32) wsptr[7];
1825
1826
0
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1827
0
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1828
1829
0
    tmp10 = z1 + z3;
1830
0
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1831
0
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1832
0
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1833
0
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1834
0
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835
0
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836
0
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1837
0
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1838
1839
0
    z1 -= z4;
1840
0
    z2 -= z3;
1841
0
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1842
0
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1843
0
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1844
1845
    /* Final output stage */
1846
1847
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848
0
                 CONST_BITS+PASS1_BITS+3)
1849
0
           & RANGE_MASK];
1850
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851
0
                 CONST_BITS+PASS1_BITS+3)
1852
0
           & RANGE_MASK];
1853
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854
0
                 CONST_BITS+PASS1_BITS+3)
1855
0
           & RANGE_MASK];
1856
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857
0
                 CONST_BITS+PASS1_BITS+3)
1858
0
           & RANGE_MASK];
1859
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860
0
                 CONST_BITS+PASS1_BITS+3)
1861
0
           & RANGE_MASK];
1862
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863
0
                 CONST_BITS+PASS1_BITS+3)
1864
0
           & RANGE_MASK];
1865
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866
0
                 CONST_BITS+PASS1_BITS+3)
1867
0
           & RANGE_MASK];
1868
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869
0
                 CONST_BITS+PASS1_BITS+3)
1870
0
           & RANGE_MASK];
1871
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872
0
                 CONST_BITS+PASS1_BITS+3)
1873
0
           & RANGE_MASK];
1874
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875
0
                 CONST_BITS+PASS1_BITS+3)
1876
0
           & RANGE_MASK];
1877
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878
0
                 CONST_BITS+PASS1_BITS+3)
1879
0
           & RANGE_MASK];
1880
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881
0
                 CONST_BITS+PASS1_BITS+3)
1882
0
           & RANGE_MASK];
1883
1884
0
    wsptr += 8;   /* advance pointer to next row */
1885
0
  }
1886
0
}
1887
1888
1889
/*
1890
 * Perform dequantization and inverse DCT on one block of coefficients,
1891
 * producing a 13x13 output block.
1892
 *
1893
 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894
 * cK represents sqrt(2) * cos(K*pi/26).
1895
 */
1896
1897
GLOBAL(void)
1898
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1899
     JCOEFPTR coef_block,
1900
     JSAMPARRAY output_buf, JDIMENSION output_col)
1901
0
{
1902
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904
0
  INT32 z1, z2, z3, z4;
1905
0
  JCOEFPTR inptr;
1906
0
  ISLOW_MULT_TYPE * quantptr;
1907
0
  int * wsptr;
1908
0
  JSAMPROW outptr;
1909
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910
0
  int ctr;
1911
0
  int workspace[8*13];  /* buffers data between passes */
1912
  SHIFT_TEMPS
1913
1914
  /* Pass 1: process columns from input, store into work array. */
1915
1916
0
  inptr = coef_block;
1917
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918
0
  wsptr = workspace;
1919
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920
    /* Even part */
1921
1922
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923
0
    z1 <<= CONST_BITS;
1924
    /* Add fudge factor here for final descale. */
1925
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1926
1927
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1928
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1929
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1930
1931
0
    tmp10 = z3 + z4;
1932
0
    tmp11 = z3 - z4;
1933
1934
0
    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
1935
0
    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
1936
1937
0
    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
1938
0
    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
1939
1940
0
    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
1941
0
    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
1942
1943
0
    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
1944
0
    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1945
1946
0
    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
1947
0
    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
1948
1949
0
    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1950
0
    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1951
1952
0
    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
1953
1954
    /* Odd part */
1955
1956
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1957
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1958
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1959
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1960
1961
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
1962
0
    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
1963
0
    tmp15 = z1 + z4;
1964
0
    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
1965
0
    tmp10 = tmp11 + tmp12 + tmp13 -
1966
0
      MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
1967
0
    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
1968
0
    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969
0
    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970
0
    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
1971
0
    tmp11 += tmp14;
1972
0
    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973
0
    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
1974
0
    tmp12 += tmp14;
1975
0
    tmp13 += tmp14;
1976
0
    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
1977
0
    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1978
0
      MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
1979
0
    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
1980
0
    tmp14 += z1;
1981
0
    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
1982
0
       MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
1983
1984
    /* Final output stage */
1985
1986
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1987
0
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1988
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1989
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1999
0
  }
2000
2001
  /* Pass 2: process 13 rows from work array, store into output array. */
2002
2003
0
  wsptr = workspace;
2004
0
  for (ctr = 0; ctr < 13; ctr++) {
2005
0
    outptr = output_buf[ctr] + output_col;
2006
2007
    /* Even part */
2008
2009
    /* Add range center and fudge factor for final descale and range-limit. */
2010
0
    z1 = (INT32) wsptr[0] +
2011
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012
0
      (ONE << (PASS1_BITS+2)));
2013
0
    z1 <<= CONST_BITS;
2014
2015
0
    z2 = (INT32) wsptr[2];
2016
0
    z3 = (INT32) wsptr[4];
2017
0
    z4 = (INT32) wsptr[6];
2018
2019
0
    tmp10 = z3 + z4;
2020
0
    tmp11 = z3 - z4;
2021
2022
0
    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
2023
0
    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
2024
2025
0
    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
2026
0
    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
2027
2028
0
    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
2029
0
    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
2030
2031
0
    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
2032
0
    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2033
2034
0
    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
2035
0
    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
2036
2037
0
    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038
0
    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2039
2040
0
    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
2041
2042
    /* Odd part */
2043
2044
0
    z1 = (INT32) wsptr[1];
2045
0
    z2 = (INT32) wsptr[3];
2046
0
    z3 = (INT32) wsptr[5];
2047
0
    z4 = (INT32) wsptr[7];
2048
2049
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
2050
0
    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
2051
0
    tmp15 = z1 + z4;
2052
0
    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
2053
0
    tmp10 = tmp11 + tmp12 + tmp13 -
2054
0
      MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
2055
0
    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
2056
0
    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057
0
    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058
0
    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
2059
0
    tmp11 += tmp14;
2060
0
    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061
0
    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
2062
0
    tmp12 += tmp14;
2063
0
    tmp13 += tmp14;
2064
0
    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
2065
0
    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066
0
      MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
2067
0
    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
2068
0
    tmp14 += z1;
2069
0
    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
2070
0
       MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
2071
2072
    /* Final output stage */
2073
2074
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075
0
                 CONST_BITS+PASS1_BITS+3)
2076
0
           & RANGE_MASK];
2077
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078
0
                 CONST_BITS+PASS1_BITS+3)
2079
0
           & RANGE_MASK];
2080
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081
0
                 CONST_BITS+PASS1_BITS+3)
2082
0
           & RANGE_MASK];
2083
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084
0
                 CONST_BITS+PASS1_BITS+3)
2085
0
           & RANGE_MASK];
2086
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087
0
                 CONST_BITS+PASS1_BITS+3)
2088
0
           & RANGE_MASK];
2089
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090
0
                 CONST_BITS+PASS1_BITS+3)
2091
0
           & RANGE_MASK];
2092
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093
0
                 CONST_BITS+PASS1_BITS+3)
2094
0
           & RANGE_MASK];
2095
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096
0
                 CONST_BITS+PASS1_BITS+3)
2097
0
           & RANGE_MASK];
2098
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099
0
                 CONST_BITS+PASS1_BITS+3)
2100
0
           & RANGE_MASK];
2101
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102
0
                 CONST_BITS+PASS1_BITS+3)
2103
0
           & RANGE_MASK];
2104
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105
0
                 CONST_BITS+PASS1_BITS+3)
2106
0
           & RANGE_MASK];
2107
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108
0
                 CONST_BITS+PASS1_BITS+3)
2109
0
           & RANGE_MASK];
2110
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
2111
0
                 CONST_BITS+PASS1_BITS+3)
2112
0
           & RANGE_MASK];
2113
2114
0
    wsptr += 8;   /* advance pointer to next row */
2115
0
  }
2116
0
}
2117
2118
2119
/*
2120
 * Perform dequantization and inverse DCT on one block of coefficients,
2121
 * producing a 14x14 output block.
2122
 *
2123
 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124
 * cK represents sqrt(2) * cos(K*pi/28).
2125
 */
2126
2127
GLOBAL(void)
2128
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129
     JCOEFPTR coef_block,
2130
     JSAMPARRAY output_buf, JDIMENSION output_col)
2131
0
{
2132
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134
0
  INT32 z1, z2, z3, z4;
2135
0
  JCOEFPTR inptr;
2136
0
  ISLOW_MULT_TYPE * quantptr;
2137
0
  int * wsptr;
2138
0
  JSAMPROW outptr;
2139
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140
0
  int ctr;
2141
0
  int workspace[8*14];  /* buffers data between passes */
2142
  SHIFT_TEMPS
2143
2144
  /* Pass 1: process columns from input, store into work array. */
2145
2146
0
  inptr = coef_block;
2147
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148
0
  wsptr = workspace;
2149
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150
    /* Even part */
2151
2152
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153
0
    z1 <<= CONST_BITS;
2154
    /* Add fudge factor here for final descale. */
2155
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2157
0
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
2158
0
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
2159
0
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
2160
2161
0
    tmp10 = z1 + z2;
2162
0
    tmp11 = z1 + z3;
2163
0
    tmp12 = z1 - z4;
2164
2165
0
    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2166
0
      CONST_BITS-PASS1_BITS);
2167
2168
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2169
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2170
2171
0
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
2172
2173
0
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2174
0
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2175
0
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
2176
0
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
2177
2178
0
    tmp20 = tmp10 + tmp13;
2179
0
    tmp26 = tmp10 - tmp13;
2180
0
    tmp21 = tmp11 + tmp14;
2181
0
    tmp25 = tmp11 - tmp14;
2182
0
    tmp22 = tmp12 + tmp15;
2183
0
    tmp24 = tmp12 - tmp15;
2184
2185
    /* Odd part */
2186
2187
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2188
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2189
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2190
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2191
0
    tmp13 = z4 << CONST_BITS;
2192
2193
0
    tmp14 = z1 + z3;
2194
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
2195
0
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
2196
0
    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2197
0
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
2198
0
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
2199
0
    z1    -= z2;
2200
0
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
2201
0
    tmp16 += tmp15;
2202
0
    z1    += z4;
2203
0
    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2204
0
    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
2205
0
    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
2206
0
    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
2207
0
    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2208
0
    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
2209
2210
0
    tmp13 = (z1 - z3) << PASS1_BITS;
2211
2212
    /* Final output stage */
2213
2214
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2215
0
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2216
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2217
0
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2218
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220
0
    wsptr[8*3]  = (int) (tmp23 + tmp13);
2221
0
    wsptr[8*10] = (int) (tmp23 - tmp13);
2222
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2228
0
  }
2229
2230
  /* Pass 2: process 14 rows from work array, store into output array. */
2231
2232
0
  wsptr = workspace;
2233
0
  for (ctr = 0; ctr < 14; ctr++) {
2234
0
    outptr = output_buf[ctr] + output_col;
2235
2236
    /* Even part */
2237
2238
    /* Add range center and fudge factor for final descale and range-limit. */
2239
0
    z1 = (INT32) wsptr[0] +
2240
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241
0
      (ONE << (PASS1_BITS+2)));
2242
0
    z1 <<= CONST_BITS;
2243
0
    z4 = (INT32) wsptr[4];
2244
0
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
2245
0
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
2246
0
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
2247
2248
0
    tmp10 = z1 + z2;
2249
0
    tmp11 = z1 + z3;
2250
0
    tmp12 = z1 - z4;
2251
2252
0
    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
2253
2254
0
    z1 = (INT32) wsptr[2];
2255
0
    z2 = (INT32) wsptr[6];
2256
2257
0
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
2258
2259
0
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260
0
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261
0
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
2262
0
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
2263
2264
0
    tmp20 = tmp10 + tmp13;
2265
0
    tmp26 = tmp10 - tmp13;
2266
0
    tmp21 = tmp11 + tmp14;
2267
0
    tmp25 = tmp11 - tmp14;
2268
0
    tmp22 = tmp12 + tmp15;
2269
0
    tmp24 = tmp12 - tmp15;
2270
2271
    /* Odd part */
2272
2273
0
    z1 = (INT32) wsptr[1];
2274
0
    z2 = (INT32) wsptr[3];
2275
0
    z3 = (INT32) wsptr[5];
2276
0
    z4 = (INT32) wsptr[7];
2277
0
    z4 <<= CONST_BITS;
2278
2279
0
    tmp14 = z1 + z3;
2280
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
2281
0
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
2282
0
    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283
0
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
2284
0
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
2285
0
    z1    -= z2;
2286
0
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
2287
0
    tmp16 += tmp15;
2288
0
    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
2289
0
    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
2290
0
    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
2291
0
    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
2292
0
    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293
0
    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
2294
2295
0
    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2296
2297
    /* Final output stage */
2298
2299
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300
0
                 CONST_BITS+PASS1_BITS+3)
2301
0
           & RANGE_MASK];
2302
0
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303
0
                 CONST_BITS+PASS1_BITS+3)
2304
0
           & RANGE_MASK];
2305
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306
0
                 CONST_BITS+PASS1_BITS+3)
2307
0
           & RANGE_MASK];
2308
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309
0
                 CONST_BITS+PASS1_BITS+3)
2310
0
           & RANGE_MASK];
2311
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312
0
                 CONST_BITS+PASS1_BITS+3)
2313
0
           & RANGE_MASK];
2314
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315
0
                 CONST_BITS+PASS1_BITS+3)
2316
0
           & RANGE_MASK];
2317
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318
0
                 CONST_BITS+PASS1_BITS+3)
2319
0
           & RANGE_MASK];
2320
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321
0
                 CONST_BITS+PASS1_BITS+3)
2322
0
           & RANGE_MASK];
2323
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324
0
                 CONST_BITS+PASS1_BITS+3)
2325
0
           & RANGE_MASK];
2326
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327
0
                 CONST_BITS+PASS1_BITS+3)
2328
0
           & RANGE_MASK];
2329
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330
0
                 CONST_BITS+PASS1_BITS+3)
2331
0
           & RANGE_MASK];
2332
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333
0
                 CONST_BITS+PASS1_BITS+3)
2334
0
           & RANGE_MASK];
2335
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336
0
                 CONST_BITS+PASS1_BITS+3)
2337
0
           & RANGE_MASK];
2338
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339
0
                 CONST_BITS+PASS1_BITS+3)
2340
0
           & RANGE_MASK];
2341
2342
0
    wsptr += 8;   /* advance pointer to next row */
2343
0
  }
2344
0
}
2345
2346
2347
/*
2348
 * Perform dequantization and inverse DCT on one block of coefficients,
2349
 * producing a 15x15 output block.
2350
 *
2351
 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352
 * cK represents sqrt(2) * cos(K*pi/30).
2353
 */
2354
2355
GLOBAL(void)
2356
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2357
     JCOEFPTR coef_block,
2358
     JSAMPARRAY output_buf, JDIMENSION output_col)
2359
0
{
2360
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362
0
  INT32 z1, z2, z3, z4;
2363
0
  JCOEFPTR inptr;
2364
0
  ISLOW_MULT_TYPE * quantptr;
2365
0
  int * wsptr;
2366
0
  JSAMPROW outptr;
2367
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368
0
  int ctr;
2369
0
  int workspace[8*15];  /* buffers data between passes */
2370
  SHIFT_TEMPS
2371
2372
  /* Pass 1: process columns from input, store into work array. */
2373
2374
0
  inptr = coef_block;
2375
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376
0
  wsptr = workspace;
2377
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378
    /* Even part */
2379
2380
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381
0
    z1 <<= CONST_BITS;
2382
    /* Add fudge factor here for final descale. */
2383
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2384
2385
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2386
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2387
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2388
2389
0
    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2390
0
    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2391
2392
0
    tmp12 = z1 - tmp10;
2393
0
    tmp13 = z1 + tmp11;
2394
0
    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2395
2396
0
    z4 = z2 - z3;
2397
0
    z3 += z2;
2398
0
    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2399
0
    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2400
0
    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2401
2402
0
    tmp20 = tmp13 + tmp10 + tmp11;
2403
0
    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2404
2405
0
    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2406
0
    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2407
2408
0
    tmp25 = tmp13 - tmp10 - tmp11;
2409
0
    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2410
2411
0
    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2412
0
    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2413
2414
0
    tmp21 = tmp12 + tmp10 + tmp11;
2415
0
    tmp24 = tmp13 - tmp10 + tmp11;
2416
0
    tmp11 += tmp11;
2417
0
    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2418
0
    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2419
2420
    /* Odd part */
2421
2422
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2423
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2424
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2425
0
    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2426
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2427
2428
0
    tmp13 = z2 - z4;
2429
0
    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2430
0
    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2431
0
    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2432
2433
0
    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2434
0
    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2435
0
    z2 = z1 - z4;
2436
0
    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2437
2438
0
    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2439
0
    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2440
0
    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2441
0
    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2442
0
    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2443
0
    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2444
2445
    /* Final output stage */
2446
2447
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2448
0
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2449
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2450
0
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2451
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2452
0
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2462
0
  }
2463
2464
  /* Pass 2: process 15 rows from work array, store into output array. */
2465
2466
0
  wsptr = workspace;
2467
0
  for (ctr = 0; ctr < 15; ctr++) {
2468
0
    outptr = output_buf[ctr] + output_col;
2469
2470
    /* Even part */
2471
2472
    /* Add range center and fudge factor for final descale and range-limit. */
2473
0
    z1 = (INT32) wsptr[0] +
2474
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475
0
      (ONE << (PASS1_BITS+2)));
2476
0
    z1 <<= CONST_BITS;
2477
2478
0
    z2 = (INT32) wsptr[2];
2479
0
    z3 = (INT32) wsptr[4];
2480
0
    z4 = (INT32) wsptr[6];
2481
2482
0
    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483
0
    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484
2485
0
    tmp12 = z1 - tmp10;
2486
0
    tmp13 = z1 + tmp11;
2487
0
    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2488
2489
0
    z4 = z2 - z3;
2490
0
    z3 += z2;
2491
0
    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492
0
    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493
0
    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2494
2495
0
    tmp20 = tmp13 + tmp10 + tmp11;
2496
0
    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2497
2498
0
    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499
0
    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2500
2501
0
    tmp25 = tmp13 - tmp10 - tmp11;
2502
0
    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2503
2504
0
    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505
0
    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2506
2507
0
    tmp21 = tmp12 + tmp10 + tmp11;
2508
0
    tmp24 = tmp13 - tmp10 + tmp11;
2509
0
    tmp11 += tmp11;
2510
0
    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2511
0
    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2512
2513
    /* Odd part */
2514
2515
0
    z1 = (INT32) wsptr[1];
2516
0
    z2 = (INT32) wsptr[3];
2517
0
    z4 = (INT32) wsptr[5];
2518
0
    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2519
0
    z4 = (INT32) wsptr[7];
2520
2521
0
    tmp13 = z2 - z4;
2522
0
    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2523
0
    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2524
0
    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2525
2526
0
    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2527
0
    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2528
0
    z2 = z1 - z4;
2529
0
    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2530
2531
0
    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532
0
    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533
0
    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2534
0
    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2535
0
    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2536
0
    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2537
2538
    /* Final output stage */
2539
2540
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541
0
                 CONST_BITS+PASS1_BITS+3)
2542
0
           & RANGE_MASK];
2543
0
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544
0
                 CONST_BITS+PASS1_BITS+3)
2545
0
           & RANGE_MASK];
2546
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547
0
                 CONST_BITS+PASS1_BITS+3)
2548
0
           & RANGE_MASK];
2549
0
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550
0
                 CONST_BITS+PASS1_BITS+3)
2551
0
           & RANGE_MASK];
2552
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553
0
                 CONST_BITS+PASS1_BITS+3)
2554
0
           & RANGE_MASK];
2555
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556
0
                 CONST_BITS+PASS1_BITS+3)
2557
0
           & RANGE_MASK];
2558
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559
0
                 CONST_BITS+PASS1_BITS+3)
2560
0
           & RANGE_MASK];
2561
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562
0
                 CONST_BITS+PASS1_BITS+3)
2563
0
           & RANGE_MASK];
2564
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565
0
                 CONST_BITS+PASS1_BITS+3)
2566
0
           & RANGE_MASK];
2567
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568
0
                 CONST_BITS+PASS1_BITS+3)
2569
0
           & RANGE_MASK];
2570
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571
0
                 CONST_BITS+PASS1_BITS+3)
2572
0
           & RANGE_MASK];
2573
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574
0
                 CONST_BITS+PASS1_BITS+3)
2575
0
           & RANGE_MASK];
2576
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577
0
                 CONST_BITS+PASS1_BITS+3)
2578
0
           & RANGE_MASK];
2579
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580
0
                 CONST_BITS+PASS1_BITS+3)
2581
0
           & RANGE_MASK];
2582
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
2583
0
                 CONST_BITS+PASS1_BITS+3)
2584
0
           & RANGE_MASK];
2585
2586
0
    wsptr += 8;   /* advance pointer to next row */
2587
0
  }
2588
0
}
2589
2590
2591
/*
2592
 * Perform dequantization and inverse DCT on one block of coefficients,
2593
 * producing a 16x16 output block.
2594
 *
2595
 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596
 * cK represents sqrt(2) * cos(K*pi/32).
2597
 */
2598
2599
GLOBAL(void)
2600
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2601
     JCOEFPTR coef_block,
2602
     JSAMPARRAY output_buf, JDIMENSION output_col)
2603
0
{
2604
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606
0
  INT32 z1, z2, z3, z4;
2607
0
  JCOEFPTR inptr;
2608
0
  ISLOW_MULT_TYPE * quantptr;
2609
0
  int * wsptr;
2610
0
  JSAMPROW outptr;
2611
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612
0
  int ctr;
2613
0
  int workspace[8*16];  /* buffers data between passes */
2614
  SHIFT_TEMPS
2615
2616
  /* Pass 1: process columns from input, store into work array. */
2617
2618
0
  inptr = coef_block;
2619
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620
0
  wsptr = workspace;
2621
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622
    /* Even part */
2623
2624
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625
0
    tmp0 <<= CONST_BITS;
2626
    /* Add fudge factor here for final descale. */
2627
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2628
2629
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630
0
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2631
0
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2632
2633
0
    tmp10 = tmp0 + tmp1;
2634
0
    tmp11 = tmp0 - tmp1;
2635
0
    tmp12 = tmp0 + tmp2;
2636
0
    tmp13 = tmp0 - tmp2;
2637
2638
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640
0
    z3 = z1 - z2;
2641
0
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2642
0
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2643
2644
0
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2645
0
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2646
0
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647
0
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2648
2649
0
    tmp20 = tmp10 + tmp0;
2650
0
    tmp27 = tmp10 - tmp0;
2651
0
    tmp21 = tmp12 + tmp1;
2652
0
    tmp26 = tmp12 - tmp1;
2653
0
    tmp22 = tmp13 + tmp2;
2654
0
    tmp25 = tmp13 - tmp2;
2655
0
    tmp23 = tmp11 + tmp3;
2656
0
    tmp24 = tmp11 - tmp3;
2657
2658
    /* Odd part */
2659
2660
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2661
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2662
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2663
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2664
2665
0
    tmp11 = z1 + z3;
2666
2667
0
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2668
0
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2669
0
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2670
0
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2671
0
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2672
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2673
0
    tmp0  = tmp1 + tmp2 + tmp3 -
2674
0
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2675
0
    tmp13 = tmp10 + tmp11 + tmp12 -
2676
0
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2677
0
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2678
0
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2679
0
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2680
0
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2681
0
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2682
0
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2683
0
    z2    += z4;
2684
0
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2685
0
    tmp1  += z1;
2686
0
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2687
0
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2688
0
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2689
0
    tmp12 += z2;
2690
0
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2691
0
    tmp2  += z2;
2692
0
    tmp3  += z2;
2693
0
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2694
0
    tmp10 += z2;
2695
0
    tmp11 += z2;
2696
2697
    /* Final output stage */
2698
2699
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
2700
0
    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
2701
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
2702
0
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
2703
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
2704
0
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
2705
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
2706
0
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
2707
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2715
0
  }
2716
2717
  /* Pass 2: process 16 rows from work array, store into output array. */
2718
2719
0
  wsptr = workspace;
2720
0
  for (ctr = 0; ctr < 16; ctr++) {
2721
0
    outptr = output_buf[ctr] + output_col;
2722
2723
    /* Even part */
2724
2725
    /* Add range center and fudge factor for final descale and range-limit. */
2726
0
    tmp0 = (INT32) wsptr[0] +
2727
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728
0
        (ONE << (PASS1_BITS+2)));
2729
0
    tmp0 <<= CONST_BITS;
2730
2731
0
    z1 = (INT32) wsptr[4];
2732
0
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2733
0
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2734
2735
0
    tmp10 = tmp0 + tmp1;
2736
0
    tmp11 = tmp0 - tmp1;
2737
0
    tmp12 = tmp0 + tmp2;
2738
0
    tmp13 = tmp0 - tmp2;
2739
2740
0
    z1 = (INT32) wsptr[2];
2741
0
    z2 = (INT32) wsptr[6];
2742
0
    z3 = z1 - z2;
2743
0
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2744
0
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2745
2746
0
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2747
0
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2748
0
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749
0
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2750
2751
0
    tmp20 = tmp10 + tmp0;
2752
0
    tmp27 = tmp10 - tmp0;
2753
0
    tmp21 = tmp12 + tmp1;
2754
0
    tmp26 = tmp12 - tmp1;
2755
0
    tmp22 = tmp13 + tmp2;
2756
0
    tmp25 = tmp13 - tmp2;
2757
0
    tmp23 = tmp11 + tmp3;
2758
0
    tmp24 = tmp11 - tmp3;
2759
2760
    /* Odd part */
2761
2762
0
    z1 = (INT32) wsptr[1];
2763
0
    z2 = (INT32) wsptr[3];
2764
0
    z3 = (INT32) wsptr[5];
2765
0
    z4 = (INT32) wsptr[7];
2766
2767
0
    tmp11 = z1 + z3;
2768
2769
0
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2770
0
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2771
0
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2772
0
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2773
0
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2774
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2775
0
    tmp0  = tmp1 + tmp2 + tmp3 -
2776
0
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2777
0
    tmp13 = tmp10 + tmp11 + tmp12 -
2778
0
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2779
0
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2780
0
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2781
0
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2782
0
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2783
0
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2784
0
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2785
0
    z2    += z4;
2786
0
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2787
0
    tmp1  += z1;
2788
0
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2789
0
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2790
0
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2791
0
    tmp12 += z2;
2792
0
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793
0
    tmp2  += z2;
2794
0
    tmp3  += z2;
2795
0
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2796
0
    tmp10 += z2;
2797
0
    tmp11 += z2;
2798
2799
    /* Final output stage */
2800
2801
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802
0
                 CONST_BITS+PASS1_BITS+3)
2803
0
           & RANGE_MASK];
2804
0
    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805
0
                 CONST_BITS+PASS1_BITS+3)
2806
0
           & RANGE_MASK];
2807
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808
0
                 CONST_BITS+PASS1_BITS+3)
2809
0
           & RANGE_MASK];
2810
0
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811
0
                 CONST_BITS+PASS1_BITS+3)
2812
0
           & RANGE_MASK];
2813
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814
0
                 CONST_BITS+PASS1_BITS+3)
2815
0
           & RANGE_MASK];
2816
0
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817
0
                 CONST_BITS+PASS1_BITS+3)
2818
0
           & RANGE_MASK];
2819
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820
0
                 CONST_BITS+PASS1_BITS+3)
2821
0
           & RANGE_MASK];
2822
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823
0
                 CONST_BITS+PASS1_BITS+3)
2824
0
           & RANGE_MASK];
2825
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826
0
                 CONST_BITS+PASS1_BITS+3)
2827
0
           & RANGE_MASK];
2828
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829
0
                 CONST_BITS+PASS1_BITS+3)
2830
0
           & RANGE_MASK];
2831
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832
0
                 CONST_BITS+PASS1_BITS+3)
2833
0
           & RANGE_MASK];
2834
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835
0
                 CONST_BITS+PASS1_BITS+3)
2836
0
           & RANGE_MASK];
2837
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838
0
                 CONST_BITS+PASS1_BITS+3)
2839
0
           & RANGE_MASK];
2840
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841
0
                 CONST_BITS+PASS1_BITS+3)
2842
0
           & RANGE_MASK];
2843
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844
0
                 CONST_BITS+PASS1_BITS+3)
2845
0
           & RANGE_MASK];
2846
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847
0
                 CONST_BITS+PASS1_BITS+3)
2848
0
           & RANGE_MASK];
2849
2850
0
    wsptr += 8;   /* advance pointer to next row */
2851
0
  }
2852
0
}
2853
2854
2855
/*
2856
 * Perform dequantization and inverse DCT on one block of coefficients,
2857
 * producing a 16x8 output block.
2858
 *
2859
 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860
 */
2861
2862
GLOBAL(void)
2863
jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2864
    JCOEFPTR coef_block,
2865
    JSAMPARRAY output_buf, JDIMENSION output_col)
2866
0
{
2867
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869
0
  INT32 z1, z2, z3, z4;
2870
0
  JCOEFPTR inptr;
2871
0
  ISLOW_MULT_TYPE * quantptr;
2872
0
  int * wsptr;
2873
0
  JSAMPROW outptr;
2874
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875
0
  int ctr;
2876
0
  int workspace[8*8]; /* buffers data between passes */
2877
  SHIFT_TEMPS
2878
2879
  /* Pass 1: process columns from input, store into work array.
2880
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881
   * furthermore, we scale the results by 2**PASS1_BITS.
2882
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2883
   */
2884
2885
0
  inptr = coef_block;
2886
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887
0
  wsptr = workspace;
2888
0
  for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889
    /* Due to quantization, we will usually find that many of the input
2890
     * coefficients are zero, especially the AC terms.  We can exploit this
2891
     * by short-circuiting the IDCT calculation for any column in which all
2892
     * the AC terms are zero.  In that case each output is equal to the
2893
     * DC coefficient (with scale factor as needed).
2894
     * With typical images and quantization tables, half or more of the
2895
     * column DCT calculations can be simplified this way.
2896
     */
2897
2898
0
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899
0
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900
0
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901
0
  inptr[DCTSIZE*7] == 0) {
2902
      /* AC terms all zero */
2903
0
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2904
2905
0
      wsptr[DCTSIZE*0] = dcval;
2906
0
      wsptr[DCTSIZE*1] = dcval;
2907
0
      wsptr[DCTSIZE*2] = dcval;
2908
0
      wsptr[DCTSIZE*3] = dcval;
2909
0
      wsptr[DCTSIZE*4] = dcval;
2910
0
      wsptr[DCTSIZE*5] = dcval;
2911
0
      wsptr[DCTSIZE*6] = dcval;
2912
0
      wsptr[DCTSIZE*7] = dcval;
2913
2914
0
      inptr++;      /* advance pointers to next column */
2915
0
      quantptr++;
2916
0
      wsptr++;
2917
0
      continue;
2918
0
    }
2919
2920
    /* Even part: reverse the even part of the forward DCT.
2921
     * The rotator is c(-6).
2922
     */
2923
2924
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926
0
    z2 <<= CONST_BITS;
2927
0
    z3 <<= CONST_BITS;
2928
    /* Add fudge factor here for final descale. */
2929
0
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2930
2931
0
    tmp0 = z2 + z3;
2932
0
    tmp1 = z2 - z3;
2933
2934
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2936
2937
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
2938
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
2939
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
2940
2941
0
    tmp10 = tmp0 + tmp2;
2942
0
    tmp13 = tmp0 - tmp2;
2943
0
    tmp11 = tmp1 + tmp3;
2944
0
    tmp12 = tmp1 - tmp3;
2945
2946
    /* Odd part per figure 8; the matrix is unitary and hence its
2947
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
2948
     */
2949
2950
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951
0
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953
0
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2954
2955
0
    z2 = tmp0 + tmp2;
2956
0
    z3 = tmp1 + tmp3;
2957
2958
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
2959
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
2960
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
2961
0
    z2 += z1;
2962
0
    z3 += z1;
2963
2964
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
2966
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
2967
0
    tmp0 += z1 + z2;
2968
0
    tmp3 += z1 + z3;
2969
2970
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
2972
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
2973
0
    tmp1 += z1 + z3;
2974
0
    tmp2 += z1 + z2;
2975
2976
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2977
2978
0
    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979
0
    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980
0
    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981
0
    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982
0
    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983
0
    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984
0
    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985
0
    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2986
2987
0
    inptr++;      /* advance pointers to next column */
2988
0
    quantptr++;
2989
0
    wsptr++;
2990
0
  }
2991
2992
  /* Pass 2: process 8 rows from work array, store into output array.
2993
   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2994
   */
2995
2996
0
  wsptr = workspace;
2997
0
  for (ctr = 0; ctr < 8; ctr++) {
2998
0
    outptr = output_buf[ctr] + output_col;
2999
3000
    /* Even part */
3001
3002
    /* Add range center and fudge factor for final descale and range-limit. */
3003
0
    tmp0 = (INT32) wsptr[0] +
3004
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005
0
        (ONE << (PASS1_BITS+2)));
3006
0
    tmp0 <<= CONST_BITS;
3007
3008
0
    z1 = (INT32) wsptr[4];
3009
0
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
3010
0
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
3011
3012
0
    tmp10 = tmp0 + tmp1;
3013
0
    tmp11 = tmp0 - tmp1;
3014
0
    tmp12 = tmp0 + tmp2;
3015
0
    tmp13 = tmp0 - tmp2;
3016
3017
0
    z1 = (INT32) wsptr[2];
3018
0
    z2 = (INT32) wsptr[6];
3019
0
    z3 = z1 - z2;
3020
0
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
3021
0
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
3022
3023
0
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
3024
0
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
3025
0
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026
0
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3027
3028
0
    tmp20 = tmp10 + tmp0;
3029
0
    tmp27 = tmp10 - tmp0;
3030
0
    tmp21 = tmp12 + tmp1;
3031
0
    tmp26 = tmp12 - tmp1;
3032
0
    tmp22 = tmp13 + tmp2;
3033
0
    tmp25 = tmp13 - tmp2;
3034
0
    tmp23 = tmp11 + tmp3;
3035
0
    tmp24 = tmp11 - tmp3;
3036
3037
    /* Odd part */
3038
3039
0
    z1 = (INT32) wsptr[1];
3040
0
    z2 = (INT32) wsptr[3];
3041
0
    z3 = (INT32) wsptr[5];
3042
0
    z4 = (INT32) wsptr[7];
3043
3044
0
    tmp11 = z1 + z3;
3045
3046
0
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
3047
0
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
3048
0
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
3049
0
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
3050
0
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
3051
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
3052
0
    tmp0  = tmp1 + tmp2 + tmp3 -
3053
0
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
3054
0
    tmp13 = tmp10 + tmp11 + tmp12 -
3055
0
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
3056
0
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
3057
0
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
3058
0
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
3059
0
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
3060
0
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
3061
0
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
3062
0
    z2    += z4;
3063
0
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
3064
0
    tmp1  += z1;
3065
0
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
3066
0
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
3067
0
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
3068
0
    tmp12 += z2;
3069
0
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3070
0
    tmp2  += z2;
3071
0
    tmp3  += z2;
3072
0
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
3073
0
    tmp10 += z2;
3074
0
    tmp11 += z2;
3075
3076
    /* Final output stage */
3077
3078
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3079
0
                 CONST_BITS+PASS1_BITS+3)
3080
0
           & RANGE_MASK];
3081
0
    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3082
0
                 CONST_BITS+PASS1_BITS+3)
3083
0
           & RANGE_MASK];
3084
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3085
0
                 CONST_BITS+PASS1_BITS+3)
3086
0
           & RANGE_MASK];
3087
0
    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3088
0
                 CONST_BITS+PASS1_BITS+3)
3089
0
           & RANGE_MASK];
3090
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3091
0
                 CONST_BITS+PASS1_BITS+3)
3092
0
           & RANGE_MASK];
3093
0
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3094
0
                 CONST_BITS+PASS1_BITS+3)
3095
0
           & RANGE_MASK];
3096
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3097
0
                 CONST_BITS+PASS1_BITS+3)
3098
0
           & RANGE_MASK];
3099
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3100
0
                 CONST_BITS+PASS1_BITS+3)
3101
0
           & RANGE_MASK];
3102
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3103
0
                 CONST_BITS+PASS1_BITS+3)
3104
0
           & RANGE_MASK];
3105
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3106
0
                 CONST_BITS+PASS1_BITS+3)
3107
0
           & RANGE_MASK];
3108
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3109
0
                 CONST_BITS+PASS1_BITS+3)
3110
0
           & RANGE_MASK];
3111
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3112
0
                 CONST_BITS+PASS1_BITS+3)
3113
0
           & RANGE_MASK];
3114
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3115
0
                 CONST_BITS+PASS1_BITS+3)
3116
0
           & RANGE_MASK];
3117
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3118
0
                 CONST_BITS+PASS1_BITS+3)
3119
0
           & RANGE_MASK];
3120
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3121
0
                 CONST_BITS+PASS1_BITS+3)
3122
0
           & RANGE_MASK];
3123
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3124
0
                 CONST_BITS+PASS1_BITS+3)
3125
0
           & RANGE_MASK];
3126
3127
0
    wsptr += 8;   /* advance pointer to next row */
3128
0
  }
3129
0
}
3130
3131
3132
/*
3133
 * Perform dequantization and inverse DCT on one block of coefficients,
3134
 * producing a 14x7 output block.
3135
 *
3136
 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3137
 */
3138
3139
GLOBAL(void)
3140
jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3141
    JCOEFPTR coef_block,
3142
    JSAMPARRAY output_buf, JDIMENSION output_col)
3143
0
{
3144
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146
0
  INT32 z1, z2, z3, z4;
3147
0
  JCOEFPTR inptr;
3148
0
  ISLOW_MULT_TYPE * quantptr;
3149
0
  int * wsptr;
3150
0
  JSAMPROW outptr;
3151
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152
0
  int ctr;
3153
0
  int workspace[8*7]; /* buffers data between passes */
3154
  SHIFT_TEMPS
3155
3156
  /* Pass 1: process columns from input, store into work array.
3157
   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3158
   */
3159
3160
0
  inptr = coef_block;
3161
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162
0
  wsptr = workspace;
3163
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164
    /* Even part */
3165
3166
0
    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167
0
    tmp23 <<= CONST_BITS;
3168
    /* Add fudge factor here for final descale. */
3169
0
    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3170
3171
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3174
3175
0
    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
3176
0
    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
3177
0
    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178
0
    tmp10 = z1 + z3;
3179
0
    z2 -= tmp10;
3180
0
    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3181
0
    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
3182
0
    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
3183
0
    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
3184
3185
    /* Odd part */
3186
3187
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3188
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3189
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3190
3191
0
    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
3192
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
3193
0
    tmp10 = tmp11 - tmp12;
3194
0
    tmp11 += tmp12;
3195
0
    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
3196
0
    tmp11 += tmp12;
3197
0
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
3198
0
    tmp10 += z2;
3199
0
    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
3200
3201
    /* Final output stage */
3202
3203
0
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204
0
    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205
0
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206
0
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207
0
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208
0
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209
0
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3210
0
  }
3211
3212
  /* Pass 2: process 7 rows from work array, store into output array.
3213
   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3214
   */
3215
3216
0
  wsptr = workspace;
3217
0
  for (ctr = 0; ctr < 7; ctr++) {
3218
0
    outptr = output_buf[ctr] + output_col;
3219
3220
    /* Even part */
3221
3222
    /* Add range center and fudge factor for final descale and range-limit. */
3223
0
    z1 = (INT32) wsptr[0] +
3224
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225
0
      (ONE << (PASS1_BITS+2)));
3226
0
    z1 <<= CONST_BITS;
3227
0
    z4 = (INT32) wsptr[4];
3228
0
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
3229
0
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
3230
0
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
3231
3232
0
    tmp10 = z1 + z2;
3233
0
    tmp11 = z1 + z3;
3234
0
    tmp12 = z1 - z4;
3235
3236
0
    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
3237
3238
0
    z1 = (INT32) wsptr[2];
3239
0
    z2 = (INT32) wsptr[6];
3240
3241
0
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
3242
3243
0
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244
0
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245
0
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
3246
0
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
3247
3248
0
    tmp20 = tmp10 + tmp13;
3249
0
    tmp26 = tmp10 - tmp13;
3250
0
    tmp21 = tmp11 + tmp14;
3251
0
    tmp25 = tmp11 - tmp14;
3252
0
    tmp22 = tmp12 + tmp15;
3253
0
    tmp24 = tmp12 - tmp15;
3254
3255
    /* Odd part */
3256
3257
0
    z1 = (INT32) wsptr[1];
3258
0
    z2 = (INT32) wsptr[3];
3259
0
    z3 = (INT32) wsptr[5];
3260
0
    z4 = (INT32) wsptr[7];
3261
0
    z4 <<= CONST_BITS;
3262
3263
0
    tmp14 = z1 + z3;
3264
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
3265
0
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
3266
0
    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3267
0
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
3268
0
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
3269
0
    z1    -= z2;
3270
0
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
3271
0
    tmp16 += tmp15;
3272
0
    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
3273
0
    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
3274
0
    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
3275
0
    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
3276
0
    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3277
0
    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
3278
3279
0
    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3280
3281
    /* Final output stage */
3282
3283
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3284
0
                 CONST_BITS+PASS1_BITS+3)
3285
0
           & RANGE_MASK];
3286
0
    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3287
0
                 CONST_BITS+PASS1_BITS+3)
3288
0
           & RANGE_MASK];
3289
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3290
0
                 CONST_BITS+PASS1_BITS+3)
3291
0
           & RANGE_MASK];
3292
0
    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3293
0
                 CONST_BITS+PASS1_BITS+3)
3294
0
           & RANGE_MASK];
3295
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3296
0
                 CONST_BITS+PASS1_BITS+3)
3297
0
           & RANGE_MASK];
3298
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3299
0
                 CONST_BITS+PASS1_BITS+3)
3300
0
           & RANGE_MASK];
3301
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3302
0
                 CONST_BITS+PASS1_BITS+3)
3303
0
           & RANGE_MASK];
3304
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3305
0
                 CONST_BITS+PASS1_BITS+3)
3306
0
           & RANGE_MASK];
3307
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3308
0
                 CONST_BITS+PASS1_BITS+3)
3309
0
           & RANGE_MASK];
3310
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3311
0
                 CONST_BITS+PASS1_BITS+3)
3312
0
           & RANGE_MASK];
3313
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3314
0
                 CONST_BITS+PASS1_BITS+3)
3315
0
           & RANGE_MASK];
3316
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3317
0
                 CONST_BITS+PASS1_BITS+3)
3318
0
           & RANGE_MASK];
3319
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3320
0
                 CONST_BITS+PASS1_BITS+3)
3321
0
           & RANGE_MASK];
3322
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3323
0
                 CONST_BITS+PASS1_BITS+3)
3324
0
           & RANGE_MASK];
3325
3326
0
    wsptr += 8;   /* advance pointer to next row */
3327
0
  }
3328
0
}
3329
3330
3331
/*
3332
 * Perform dequantization and inverse DCT on one block of coefficients,
3333
 * producing a 12x6 output block.
3334
 *
3335
 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3336
 */
3337
3338
GLOBAL(void)
3339
jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3340
    JCOEFPTR coef_block,
3341
    JSAMPARRAY output_buf, JDIMENSION output_col)
3342
0
{
3343
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345
0
  INT32 z1, z2, z3, z4;
3346
0
  JCOEFPTR inptr;
3347
0
  ISLOW_MULT_TYPE * quantptr;
3348
0
  int * wsptr;
3349
0
  JSAMPROW outptr;
3350
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351
0
  int ctr;
3352
0
  int workspace[8*6]; /* buffers data between passes */
3353
  SHIFT_TEMPS
3354
3355
  /* Pass 1: process columns from input, store into work array.
3356
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3357
   */
3358
3359
0
  inptr = coef_block;
3360
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361
0
  wsptr = workspace;
3362
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363
    /* Even part */
3364
3365
0
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366
0
    tmp10 <<= CONST_BITS;
3367
    /* Add fudge factor here for final descale. */
3368
0
    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369
0
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370
0
    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
3371
0
    tmp11 = tmp10 + tmp20;
3372
0
    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373
0
    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374
0
    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
3375
0
    tmp20 = tmp11 + tmp10;
3376
0
    tmp22 = tmp11 - tmp10;
3377
3378
    /* Odd part */
3379
3380
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3381
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383
0
    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384
0
    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385
0
    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386
0
    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3387
3388
    /* Final output stage */
3389
3390
0
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391
0
    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392
0
    wsptr[8*1] = (int) (tmp21 + tmp11);
3393
0
    wsptr[8*4] = (int) (tmp21 - tmp11);
3394
0
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395
0
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3396
0
  }
3397
3398
  /* Pass 2: process 6 rows from work array, store into output array.
3399
   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3400
   */
3401
3402
0
  wsptr = workspace;
3403
0
  for (ctr = 0; ctr < 6; ctr++) {
3404
0
    outptr = output_buf[ctr] + output_col;
3405
3406
    /* Even part */
3407
3408
    /* Add range center and fudge factor for final descale and range-limit. */
3409
0
    z3 = (INT32) wsptr[0] +
3410
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411
0
      (ONE << (PASS1_BITS+2)));
3412
0
    z3 <<= CONST_BITS;
3413
3414
0
    z4 = (INT32) wsptr[4];
3415
0
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3416
3417
0
    tmp10 = z3 + z4;
3418
0
    tmp11 = z3 - z4;
3419
3420
0
    z1 = (INT32) wsptr[2];
3421
0
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422
0
    z1 <<= CONST_BITS;
3423
0
    z2 = (INT32) wsptr[6];
3424
0
    z2 <<= CONST_BITS;
3425
3426
0
    tmp12 = z1 - z2;
3427
3428
0
    tmp21 = z3 + tmp12;
3429
0
    tmp24 = z3 - tmp12;
3430
3431
0
    tmp12 = z4 + z2;
3432
3433
0
    tmp20 = tmp10 + tmp12;
3434
0
    tmp25 = tmp10 - tmp12;
3435
3436
0
    tmp12 = z4 - z1 - z2;
3437
3438
0
    tmp22 = tmp11 + tmp12;
3439
0
    tmp23 = tmp11 - tmp12;
3440
3441
    /* Odd part */
3442
3443
0
    z1 = (INT32) wsptr[1];
3444
0
    z2 = (INT32) wsptr[3];
3445
0
    z3 = (INT32) wsptr[5];
3446
0
    z4 = (INT32) wsptr[7];
3447
3448
0
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
3449
0
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
3450
3451
0
    tmp10 = z1 + z3;
3452
0
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
3453
0
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
3454
0
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
3455
0
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
3456
0
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457
0
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3458
0
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
3459
0
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
3460
3461
0
    z1 -= z4;
3462
0
    z2 -= z3;
3463
0
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
3464
0
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
3465
0
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
3466
3467
    /* Final output stage */
3468
3469
0
    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3470
0
                 CONST_BITS+PASS1_BITS+3)
3471
0
           & RANGE_MASK];
3472
0
    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3473
0
                 CONST_BITS+PASS1_BITS+3)
3474
0
           & RANGE_MASK];
3475
0
    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3476
0
                 CONST_BITS+PASS1_BITS+3)
3477
0
           & RANGE_MASK];
3478
0
    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3479
0
                 CONST_BITS+PASS1_BITS+3)
3480
0
           & RANGE_MASK];
3481
0
    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3482
0
                 CONST_BITS+PASS1_BITS+3)
3483
0
           & RANGE_MASK];
3484
0
    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3485
0
                 CONST_BITS+PASS1_BITS+3)
3486
0
           & RANGE_MASK];
3487
0
    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3488
0
                 CONST_BITS+PASS1_BITS+3)
3489
0
           & RANGE_MASK];
3490
0
    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3491
0
                 CONST_BITS+PASS1_BITS+3)
3492
0
           & RANGE_MASK];
3493
0
    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3494
0
                 CONST_BITS+PASS1_BITS+3)
3495
0
           & RANGE_MASK];
3496
0
    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3497
0
                 CONST_BITS+PASS1_BITS+3)
3498
0
           & RANGE_MASK];
3499
0
    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3500
0
                 CONST_BITS+PASS1_BITS+3)
3501
0
           & RANGE_MASK];
3502
0
    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3503
0
                 CONST_BITS+PASS1_BITS+3)
3504
0
           & RANGE_MASK];
3505
3506
0
    wsptr += 8;   /* advance pointer to next row */
3507
0
  }
3508
0
}
3509
3510
3511
/*
3512
 * Perform dequantization and inverse DCT on one block of coefficients,
3513
 * producing a 10x5 output block.
3514
 *
3515
 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3516
 */
3517
3518
GLOBAL(void)
3519
jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3520
    JCOEFPTR coef_block,
3521
    JSAMPARRAY output_buf, JDIMENSION output_col)
3522
0
{
3523
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525
0
  INT32 z1, z2, z3, z4;
3526
0
  JCOEFPTR inptr;
3527
0
  ISLOW_MULT_TYPE * quantptr;
3528
0
  int * wsptr;
3529
0
  JSAMPROW outptr;
3530
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531
0
  int ctr;
3532
0
  int workspace[8*5]; /* buffers data between passes */
3533
  SHIFT_TEMPS
3534
3535
  /* Pass 1: process columns from input, store into work array.
3536
   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3537
   */
3538
3539
0
  inptr = coef_block;
3540
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541
0
  wsptr = workspace;
3542
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543
    /* Even part */
3544
3545
0
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546
0
    tmp12 <<= CONST_BITS;
3547
    /* Add fudge factor here for final descale. */
3548
0
    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549
0
    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550
0
    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551
0
    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552
0
    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553
0
    z3 = tmp12 + z2;
3554
0
    tmp10 = z3 + z1;
3555
0
    tmp11 = z3 - z1;
3556
0
    tmp12 -= z2 << 2;
3557
3558
    /* Odd part */
3559
3560
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3562
3563
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
3564
0
    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
3565
0
    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
3566
3567
    /* Final output stage */
3568
3569
0
    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570
0
    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571
0
    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572
0
    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573
0
    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3574
0
  }
3575
3576
  /* Pass 2: process 5 rows from work array, store into output array.
3577
   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3578
   */
3579
3580
0
  wsptr = workspace;
3581
0
  for (ctr = 0; ctr < 5; ctr++) {
3582
0
    outptr = output_buf[ctr] + output_col;
3583
3584
    /* Even part */
3585
3586
    /* Add range center and fudge factor for final descale and range-limit. */
3587
0
    z3 = (INT32) wsptr[0] +
3588
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589
0
      (ONE << (PASS1_BITS+2)));
3590
0
    z3 <<= CONST_BITS;
3591
0
    z4 = (INT32) wsptr[4];
3592
0
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
3593
0
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
3594
0
    tmp10 = z3 + z1;
3595
0
    tmp11 = z3 - z2;
3596
3597
0
    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
3598
3599
0
    z2 = (INT32) wsptr[2];
3600
0
    z3 = (INT32) wsptr[6];
3601
3602
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
3603
0
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604
0
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3605
3606
0
    tmp20 = tmp10 + tmp12;
3607
0
    tmp24 = tmp10 - tmp12;
3608
0
    tmp21 = tmp11 + tmp13;
3609
0
    tmp23 = tmp11 - tmp13;
3610
3611
    /* Odd part */
3612
3613
0
    z1 = (INT32) wsptr[1];
3614
0
    z2 = (INT32) wsptr[3];
3615
0
    z3 = (INT32) wsptr[5];
3616
0
    z3 <<= CONST_BITS;
3617
0
    z4 = (INT32) wsptr[7];
3618
3619
0
    tmp11 = z2 + z4;
3620
0
    tmp13 = z2 - z4;
3621
3622
0
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
3623
3624
0
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
3625
0
    z4 = z3 + tmp12;
3626
3627
0
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3628
0
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3629
3630
0
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
3631
0
    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3632
3633
0
    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3634
3635
0
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3636
0
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3637
3638
    /* Final output stage */
3639
3640
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3641
0
                CONST_BITS+PASS1_BITS+3)
3642
0
          & RANGE_MASK];
3643
0
    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3644
0
                CONST_BITS+PASS1_BITS+3)
3645
0
          & RANGE_MASK];
3646
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3647
0
                CONST_BITS+PASS1_BITS+3)
3648
0
          & RANGE_MASK];
3649
0
    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3650
0
                CONST_BITS+PASS1_BITS+3)
3651
0
          & RANGE_MASK];
3652
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3653
0
                CONST_BITS+PASS1_BITS+3)
3654
0
          & RANGE_MASK];
3655
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3656
0
                CONST_BITS+PASS1_BITS+3)
3657
0
          & RANGE_MASK];
3658
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3659
0
                CONST_BITS+PASS1_BITS+3)
3660
0
          & RANGE_MASK];
3661
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3662
0
                CONST_BITS+PASS1_BITS+3)
3663
0
          & RANGE_MASK];
3664
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3665
0
                CONST_BITS+PASS1_BITS+3)
3666
0
          & RANGE_MASK];
3667
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3668
0
                CONST_BITS+PASS1_BITS+3)
3669
0
          & RANGE_MASK];
3670
3671
0
    wsptr += 8;   /* advance pointer to next row */
3672
0
  }
3673
0
}
3674
3675
3676
/*
3677
 * Perform dequantization and inverse DCT on one block of coefficients,
3678
 * producing an 8x4 output block.
3679
 *
3680
 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3681
 */
3682
3683
GLOBAL(void)
3684
jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3685
         JCOEFPTR coef_block,
3686
         JSAMPARRAY output_buf, JDIMENSION output_col)
3687
0
{
3688
0
  INT32 tmp0, tmp1, tmp2, tmp3;
3689
0
  INT32 tmp10, tmp11, tmp12, tmp13;
3690
0
  INT32 z1, z2, z3;
3691
0
  JCOEFPTR inptr;
3692
0
  ISLOW_MULT_TYPE * quantptr;
3693
0
  int * wsptr;
3694
0
  JSAMPROW outptr;
3695
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696
0
  int ctr;
3697
0
  int workspace[8*4]; /* buffers data between passes */
3698
  SHIFT_TEMPS
3699
3700
  /* Pass 1: process columns from input, store into work array.
3701
   * 4-point IDCT kernel,
3702
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3703
   */
3704
3705
0
  inptr = coef_block;
3706
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707
0
  wsptr = workspace;
3708
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709
    /* Even part */
3710
3711
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3713
3714
0
    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715
0
    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3716
3717
    /* Odd part */
3718
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3719
3720
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3722
3723
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
3724
    /* Add fudge factor here for final descale. */
3725
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726
0
    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727
0
           CONST_BITS-PASS1_BITS);
3728
0
    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729
0
           CONST_BITS-PASS1_BITS);
3730
3731
    /* Final output stage */
3732
3733
0
    wsptr[8*0] = (int) (tmp10 + tmp0);
3734
0
    wsptr[8*3] = (int) (tmp10 - tmp0);
3735
0
    wsptr[8*1] = (int) (tmp12 + tmp2);
3736
0
    wsptr[8*2] = (int) (tmp12 - tmp2);
3737
0
  }
3738
3739
  /* Pass 2: process rows from work array, store into output array.
3740
   * Note that we must descale the results by a factor of 8 == 2**3,
3741
   * and also undo the PASS1_BITS scaling.
3742
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3743
   */
3744
3745
0
  wsptr = workspace;
3746
0
  for (ctr = 0; ctr < 4; ctr++) {
3747
0
    outptr = output_buf[ctr] + output_col;
3748
3749
    /* Even part: reverse the even part of the forward DCT.
3750
     * The rotator is c(-6).
3751
     */
3752
3753
    /* Add range center and fudge factor for final descale and range-limit. */
3754
0
    z2 = (INT32) wsptr[0] +
3755
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756
0
      (ONE << (PASS1_BITS+2)));
3757
0
    z3 = (INT32) wsptr[4];
3758
3759
0
    tmp0 = (z2 + z3) << CONST_BITS;
3760
0
    tmp1 = (z2 - z3) << CONST_BITS;
3761
3762
0
    z2 = (INT32) wsptr[2];
3763
0
    z3 = (INT32) wsptr[6];
3764
3765
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
3766
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
3767
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
3768
3769
0
    tmp10 = tmp0 + tmp2;
3770
0
    tmp13 = tmp0 - tmp2;
3771
0
    tmp11 = tmp1 + tmp3;
3772
0
    tmp12 = tmp1 - tmp3;
3773
3774
    /* Odd part per figure 8; the matrix is unitary and hence its
3775
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
3776
     */
3777
3778
0
    tmp0 = (INT32) wsptr[7];
3779
0
    tmp1 = (INT32) wsptr[5];
3780
0
    tmp2 = (INT32) wsptr[3];
3781
0
    tmp3 = (INT32) wsptr[1];
3782
3783
0
    z2 = tmp0 + tmp2;
3784
0
    z3 = tmp1 + tmp3;
3785
3786
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
3787
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
3788
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
3789
0
    z2 += z1;
3790
0
    z3 += z1;
3791
3792
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
3794
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
3795
0
    tmp0 += z1 + z2;
3796
0
    tmp3 += z1 + z3;
3797
3798
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
3800
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
3801
0
    tmp1 += z1 + z3;
3802
0
    tmp2 += z1 + z2;
3803
3804
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3805
3806
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807
0
                CONST_BITS+PASS1_BITS+3)
3808
0
          & RANGE_MASK];
3809
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810
0
                CONST_BITS+PASS1_BITS+3)
3811
0
          & RANGE_MASK];
3812
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813
0
                CONST_BITS+PASS1_BITS+3)
3814
0
          & RANGE_MASK];
3815
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816
0
                CONST_BITS+PASS1_BITS+3)
3817
0
          & RANGE_MASK];
3818
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819
0
                CONST_BITS+PASS1_BITS+3)
3820
0
          & RANGE_MASK];
3821
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3822
0
                CONST_BITS+PASS1_BITS+3)
3823
0
          & RANGE_MASK];
3824
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3825
0
                CONST_BITS+PASS1_BITS+3)
3826
0
          & RANGE_MASK];
3827
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3828
0
                CONST_BITS+PASS1_BITS+3)
3829
0
          & RANGE_MASK];
3830
3831
0
    wsptr += DCTSIZE;   /* advance pointer to next row */
3832
0
  }
3833
0
}
3834
3835
3836
/*
3837
 * Perform dequantization and inverse DCT on one block of coefficients,
3838
 * producing a 6x3 output block.
3839
 *
3840
 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3841
 */
3842
3843
GLOBAL(void)
3844
jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3845
         JCOEFPTR coef_block,
3846
         JSAMPARRAY output_buf, JDIMENSION output_col)
3847
0
{
3848
0
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849
0
  INT32 z1, z2, z3;
3850
0
  JCOEFPTR inptr;
3851
0
  ISLOW_MULT_TYPE * quantptr;
3852
0
  int * wsptr;
3853
0
  JSAMPROW outptr;
3854
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855
0
  int ctr;
3856
0
  int workspace[6*3]; /* buffers data between passes */
3857
  SHIFT_TEMPS
3858
3859
  /* Pass 1: process columns from input, store into work array.
3860
   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3861
   */
3862
3863
0
  inptr = coef_block;
3864
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865
0
  wsptr = workspace;
3866
0
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867
    /* Even part */
3868
3869
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870
0
    tmp0 <<= CONST_BITS;
3871
    /* Add fudge factor here for final descale. */
3872
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874
0
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875
0
    tmp10 = tmp0 + tmp12;
3876
0
    tmp2 = tmp0 - tmp12 - tmp12;
3877
3878
    /* Odd part */
3879
3880
0
    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881
0
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3882
3883
    /* Final output stage */
3884
3885
0
    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886
0
    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887
0
    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3888
0
  }
3889
  
3890
  /* Pass 2: process 3 rows from work array, store into output array.
3891
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3892
   */
3893
3894
0
  wsptr = workspace;
3895
0
  for (ctr = 0; ctr < 3; ctr++) {
3896
0
    outptr = output_buf[ctr] + output_col;
3897
3898
    /* Even part */
3899
3900
    /* Add range center and fudge factor for final descale and range-limit. */
3901
0
    tmp0 = (INT32) wsptr[0] +
3902
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903
0
        (ONE << (PASS1_BITS+2)));
3904
0
    tmp0 <<= CONST_BITS;
3905
0
    tmp2 = (INT32) wsptr[4];
3906
0
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
3907
0
    tmp1 = tmp0 + tmp10;
3908
0
    tmp11 = tmp0 - tmp10 - tmp10;
3909
0
    tmp10 = (INT32) wsptr[2];
3910
0
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
3911
0
    tmp10 = tmp1 + tmp0;
3912
0
    tmp12 = tmp1 - tmp0;
3913
3914
    /* Odd part */
3915
3916
0
    z1 = (INT32) wsptr[1];
3917
0
    z2 = (INT32) wsptr[3];
3918
0
    z3 = (INT32) wsptr[5];
3919
0
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920
0
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921
0
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922
0
    tmp1 = (z1 - z2 - z3) << CONST_BITS;
3923
3924
    /* Final output stage */
3925
3926
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3927
0
                CONST_BITS+PASS1_BITS+3)
3928
0
          & RANGE_MASK];
3929
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3930
0
                CONST_BITS+PASS1_BITS+3)
3931
0
          & RANGE_MASK];
3932
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3933
0
                CONST_BITS+PASS1_BITS+3)
3934
0
          & RANGE_MASK];
3935
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3936
0
                CONST_BITS+PASS1_BITS+3)
3937
0
          & RANGE_MASK];
3938
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3939
0
                CONST_BITS+PASS1_BITS+3)
3940
0
          & RANGE_MASK];
3941
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3942
0
                CONST_BITS+PASS1_BITS+3)
3943
0
          & RANGE_MASK];
3944
3945
0
    wsptr += 6;   /* advance pointer to next row */
3946
0
  }
3947
0
}
3948
3949
3950
/*
3951
 * Perform dequantization and inverse DCT on one block of coefficients,
3952
 * producing a 4x2 output block.
3953
 *
3954
 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3955
 */
3956
3957
GLOBAL(void)
3958
jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3959
         JCOEFPTR coef_block,
3960
         JSAMPARRAY output_buf, JDIMENSION output_col)
3961
0
{
3962
0
  INT32 tmp0, tmp2, tmp10, tmp12;
3963
0
  INT32 z1, z2, z3;
3964
0
  JCOEFPTR inptr;
3965
0
  ISLOW_MULT_TYPE * quantptr;
3966
0
  INT32 * wsptr;
3967
0
  JSAMPROW outptr;
3968
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3969
0
  int ctr;
3970
0
  INT32 workspace[4*2]; /* buffers data between passes */
3971
  SHIFT_TEMPS
3972
3973
  /* Pass 1: process columns from input, store into work array. */
3974
3975
0
  inptr = coef_block;
3976
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3977
0
  wsptr = workspace;
3978
0
  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979
    /* Even part */
3980
3981
0
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3982
3983
    /* Odd part */
3984
3985
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3986
3987
    /* Final output stage */
3988
3989
0
    wsptr[4*0] = tmp10 + tmp0;
3990
0
    wsptr[4*1] = tmp10 - tmp0;
3991
0
  }
3992
3993
  /* Pass 2: process 2 rows from work array, store into output array.
3994
   * 4-point IDCT kernel,
3995
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3996
   */
3997
3998
0
  wsptr = workspace;
3999
0
  for (ctr = 0; ctr < 2; ctr++) {
4000
0
    outptr = output_buf[ctr] + output_col;
4001
4002
    /* Even part */
4003
4004
    /* Add range center and fudge factor for final descale and range-limit. */
4005
0
    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006
0
    tmp2 = wsptr[2];
4007
4008
0
    tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009
0
    tmp12 = (tmp0 - tmp2) << CONST_BITS;
4010
4011
    /* Odd part */
4012
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4013
4014
0
    z2 = wsptr[1];
4015
0
    z3 = wsptr[3];
4016
4017
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
4018
0
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019
0
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4020
4021
    /* Final output stage */
4022
4023
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024
0
                CONST_BITS+3)
4025
0
          & RANGE_MASK];
4026
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4027
0
                CONST_BITS+3)
4028
0
          & RANGE_MASK];
4029
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4030
0
                CONST_BITS+3)
4031
0
          & RANGE_MASK];
4032
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4033
0
                CONST_BITS+3)
4034
0
          & RANGE_MASK];
4035
4036
0
    wsptr += 4;   /* advance pointer to next row */
4037
0
  }
4038
0
}
4039
4040
4041
/*
4042
 * Perform dequantization and inverse DCT on one block of coefficients,
4043
 * producing a 2x1 output block.
4044
 *
4045
 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4046
 */
4047
4048
GLOBAL(void)
4049
jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4050
         JCOEFPTR coef_block,
4051
         JSAMPARRAY output_buf, JDIMENSION output_col)
4052
0
{
4053
0
  DCTELEM tmp0, tmp1;
4054
0
  ISLOW_MULT_TYPE * quantptr;
4055
0
  JSAMPROW outptr;
4056
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4057
0
  ISHIFT_TEMPS
4058
4059
  /* Pass 1: empty. */
4060
4061
  /* Pass 2: process 1 row from input, store into output array. */
4062
4063
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064
0
  outptr = output_buf[0] + output_col;
4065
4066
  /* Even part */
4067
4068
0
  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069
  /* Add range center and fudge factor for final descale and range-limit. */
4070
0
  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4071
4072
  /* Odd part */
4073
4074
0
  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4075
4076
  /* Final output stage */
4077
4078
0
  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079
0
  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4080
0
}
4081
4082
4083
/*
4084
 * Perform dequantization and inverse DCT on one block of coefficients,
4085
 * producing an 8x16 output block.
4086
 *
4087
 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4088
 */
4089
4090
GLOBAL(void)
4091
jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4092
    JCOEFPTR coef_block,
4093
    JSAMPARRAY output_buf, JDIMENSION output_col)
4094
0
{
4095
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097
0
  INT32 z1, z2, z3, z4;
4098
0
  JCOEFPTR inptr;
4099
0
  ISLOW_MULT_TYPE * quantptr;
4100
0
  int * wsptr;
4101
0
  JSAMPROW outptr;
4102
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103
0
  int ctr;
4104
0
  int workspace[8*16];  /* buffers data between passes */
4105
  SHIFT_TEMPS
4106
4107
  /* Pass 1: process columns from input, store into work array.
4108
   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4109
   */
4110
4111
0
  inptr = coef_block;
4112
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113
0
  wsptr = workspace;
4114
0
  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115
    /* Even part */
4116
4117
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118
0
    tmp0 <<= CONST_BITS;
4119
    /* Add fudge factor here for final descale. */
4120
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4121
4122
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123
0
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
4124
0
    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
4125
4126
0
    tmp10 = tmp0 + tmp1;
4127
0
    tmp11 = tmp0 - tmp1;
4128
0
    tmp12 = tmp0 + tmp2;
4129
0
    tmp13 = tmp0 - tmp2;
4130
4131
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4132
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4133
0
    z3 = z1 - z2;
4134
0
    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
4135
0
    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
4136
4137
0
    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
4138
0
    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
4139
0
    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140
0
    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4141
4142
0
    tmp20 = tmp10 + tmp0;
4143
0
    tmp27 = tmp10 - tmp0;
4144
0
    tmp21 = tmp12 + tmp1;
4145
0
    tmp26 = tmp12 - tmp1;
4146
0
    tmp22 = tmp13 + tmp2;
4147
0
    tmp25 = tmp13 - tmp2;
4148
0
    tmp23 = tmp11 + tmp3;
4149
0
    tmp24 = tmp11 - tmp3;
4150
4151
    /* Odd part */
4152
4153
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4154
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4155
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4156
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4157
4158
0
    tmp11 = z1 + z3;
4159
4160
0
    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
4161
0
    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
4162
0
    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
4163
0
    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
4164
0
    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
4165
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
4166
0
    tmp0  = tmp1 + tmp2 + tmp3 -
4167
0
      MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
4168
0
    tmp13 = tmp10 + tmp11 + tmp12 -
4169
0
      MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
4170
0
    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
4171
0
    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
4172
0
    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
4173
0
    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
4174
0
    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
4175
0
    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
4176
0
    z2    += z4;
4177
0
    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
4178
0
    tmp1  += z1;
4179
0
    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
4180
0
    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
4181
0
    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
4182
0
    tmp12 += z2;
4183
0
    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4184
0
    tmp2  += z2;
4185
0
    tmp3  += z2;
4186
0
    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
4187
0
    tmp10 += z2;
4188
0
    tmp11 += z2;
4189
4190
    /* Final output stage */
4191
4192
0
    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
4193
0
    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
4194
0
    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
4195
0
    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
4196
0
    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
4197
0
    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
4198
0
    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
4199
0
    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
4200
0
    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201
0
    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202
0
    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203
0
    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204
0
    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205
0
    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206
0
    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207
0
    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4208
0
  }
4209
4210
  /* Pass 2: process rows from work array, store into output array.
4211
   * Note that we must descale the results by a factor of 8 == 2**3,
4212
   * and also undo the PASS1_BITS scaling.
4213
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4214
   */
4215
4216
0
  wsptr = workspace;
4217
0
  for (ctr = 0; ctr < 16; ctr++) {
4218
0
    outptr = output_buf[ctr] + output_col;
4219
4220
    /* Even part: reverse the even part of the forward DCT.
4221
     * The rotator is c(-6).
4222
     */
4223
4224
    /* Add range center and fudge factor for final descale and range-limit. */
4225
0
    z2 = (INT32) wsptr[0] +
4226
0
     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227
0
      (ONE << (PASS1_BITS+2)));
4228
0
    z3 = (INT32) wsptr[4];
4229
4230
0
    tmp0 = (z2 + z3) << CONST_BITS;
4231
0
    tmp1 = (z2 - z3) << CONST_BITS;
4232
4233
0
    z2 = (INT32) wsptr[2];
4234
0
    z3 = (INT32) wsptr[6];
4235
4236
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
4237
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
4238
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
4239
4240
0
    tmp10 = tmp0 + tmp2;
4241
0
    tmp13 = tmp0 - tmp2;
4242
0
    tmp11 = tmp1 + tmp3;
4243
0
    tmp12 = tmp1 - tmp3;
4244
4245
    /* Odd part per figure 8; the matrix is unitary and hence its
4246
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
4247
     */
4248
4249
0
    tmp0 = (INT32) wsptr[7];
4250
0
    tmp1 = (INT32) wsptr[5];
4251
0
    tmp2 = (INT32) wsptr[3];
4252
0
    tmp3 = (INT32) wsptr[1];
4253
4254
0
    z2 = tmp0 + tmp2;
4255
0
    z3 = tmp1 + tmp3;
4256
4257
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
4258
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
4259
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
4260
0
    z2 += z1;
4261
0
    z3 += z1;
4262
4263
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
4265
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
4266
0
    tmp0 += z1 + z2;
4267
0
    tmp3 += z1 + z3;
4268
4269
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
4271
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
4272
0
    tmp1 += z1 + z3;
4273
0
    tmp2 += z1 + z2;
4274
4275
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4276
4277
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278
0
                CONST_BITS+PASS1_BITS+3)
4279
0
          & RANGE_MASK];
4280
0
    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281
0
                CONST_BITS+PASS1_BITS+3)
4282
0
          & RANGE_MASK];
4283
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284
0
                CONST_BITS+PASS1_BITS+3)
4285
0
          & RANGE_MASK];
4286
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287
0
                CONST_BITS+PASS1_BITS+3)
4288
0
          & RANGE_MASK];
4289
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290
0
                CONST_BITS+PASS1_BITS+3)
4291
0
          & RANGE_MASK];
4292
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4293
0
                CONST_BITS+PASS1_BITS+3)
4294
0
          & RANGE_MASK];
4295
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4296
0
                CONST_BITS+PASS1_BITS+3)
4297
0
          & RANGE_MASK];
4298
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4299
0
                CONST_BITS+PASS1_BITS+3)
4300
0
          & RANGE_MASK];
4301
4302
0
    wsptr += DCTSIZE;   /* advance pointer to next row */
4303
0
  }
4304
0
}
4305
4306
4307
/*
4308
 * Perform dequantization and inverse DCT on one block of coefficients,
4309
 * producing a 7x14 output block.
4310
 *
4311
 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4312
 */
4313
4314
GLOBAL(void)
4315
jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4316
    JCOEFPTR coef_block,
4317
    JSAMPARRAY output_buf, JDIMENSION output_col)
4318
0
{
4319
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321
0
  INT32 z1, z2, z3, z4;
4322
0
  JCOEFPTR inptr;
4323
0
  ISLOW_MULT_TYPE * quantptr;
4324
0
  int * wsptr;
4325
0
  JSAMPROW outptr;
4326
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327
0
  int ctr;
4328
0
  int workspace[7*14];  /* buffers data between passes */
4329
  SHIFT_TEMPS
4330
4331
  /* Pass 1: process columns from input, store into work array.
4332
   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4333
   */
4334
4335
0
  inptr = coef_block;
4336
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337
0
  wsptr = workspace;
4338
0
  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339
    /* Even part */
4340
4341
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342
0
    z1 <<= CONST_BITS;
4343
    /* Add fudge factor here for final descale. */
4344
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346
0
    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
4347
0
    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
4348
0
    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
4349
4350
0
    tmp10 = z1 + z2;
4351
0
    tmp11 = z1 + z3;
4352
0
    tmp12 = z1 - z4;
4353
4354
0
    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4355
0
      CONST_BITS-PASS1_BITS);
4356
4357
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4358
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4359
4360
0
    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
4361
4362
0
    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4363
0
    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4364
0
    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
4365
0
      MULTIPLY(z2, FIX(1.378756276));      /* c2 */
4366
4367
0
    tmp20 = tmp10 + tmp13;
4368
0
    tmp26 = tmp10 - tmp13;
4369
0
    tmp21 = tmp11 + tmp14;
4370
0
    tmp25 = tmp11 - tmp14;
4371
0
    tmp22 = tmp12 + tmp15;
4372
0
    tmp24 = tmp12 - tmp15;
4373
4374
    /* Odd part */
4375
4376
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4377
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4378
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4379
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4380
0
    tmp13 = z4 << CONST_BITS;
4381
4382
0
    tmp14 = z1 + z3;
4383
0
    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
4384
0
    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
4385
0
    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4386
0
    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
4387
0
    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
4388
0
    z1    -= z2;
4389
0
    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
4390
0
    tmp16 += tmp15;
4391
0
    z1    += z4;
4392
0
    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4393
0
    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
4394
0
    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
4395
0
    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
4396
0
    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4397
0
    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
4398
4399
0
    tmp13 = (z1 - z3) << PASS1_BITS;
4400
4401
    /* Final output stage */
4402
4403
0
    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404
0
    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405
0
    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406
0
    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407
0
    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408
0
    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409
0
    wsptr[7*3]  = (int) (tmp23 + tmp13);
4410
0
    wsptr[7*10] = (int) (tmp23 - tmp13);
4411
0
    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412
0
    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413
0
    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414
0
    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415
0
    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416
0
    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4417
0
  }
4418
4419
  /* Pass 2: process 14 rows from work array, store into output array.
4420
   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4421
   */
4422
4423
0
  wsptr = workspace;
4424
0
  for (ctr = 0; ctr < 14; ctr++) {
4425
0
    outptr = output_buf[ctr] + output_col;
4426
4427
    /* Even part */
4428
4429
    /* Add range center and fudge factor for final descale and range-limit. */
4430
0
    tmp23 = (INT32) wsptr[0] +
4431
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432
0
         (ONE << (PASS1_BITS+2)));
4433
0
    tmp23 <<= CONST_BITS;
4434
4435
0
    z1 = (INT32) wsptr[2];
4436
0
    z2 = (INT32) wsptr[4];
4437
0
    z3 = (INT32) wsptr[6];
4438
4439
0
    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
4440
0
    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
4441
0
    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442
0
    tmp10 = z1 + z3;
4443
0
    z2 -= tmp10;
4444
0
    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445
0
    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
4446
0
    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
4447
0
    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
4448
4449
    /* Odd part */
4450
4451
0
    z1 = (INT32) wsptr[1];
4452
0
    z2 = (INT32) wsptr[3];
4453
0
    z3 = (INT32) wsptr[5];
4454
4455
0
    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
4456
0
    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
4457
0
    tmp10 = tmp11 - tmp12;
4458
0
    tmp11 += tmp12;
4459
0
    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
4460
0
    tmp11 += tmp12;
4461
0
    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
4462
0
    tmp10 += z2;
4463
0
    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
4464
4465
    /* Final output stage */
4466
4467
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4468
0
                CONST_BITS+PASS1_BITS+3)
4469
0
          & RANGE_MASK];
4470
0
    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4471
0
                CONST_BITS+PASS1_BITS+3)
4472
0
          & RANGE_MASK];
4473
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4474
0
                CONST_BITS+PASS1_BITS+3)
4475
0
          & RANGE_MASK];
4476
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4477
0
                CONST_BITS+PASS1_BITS+3)
4478
0
          & RANGE_MASK];
4479
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4480
0
                CONST_BITS+PASS1_BITS+3)
4481
0
          & RANGE_MASK];
4482
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4483
0
                CONST_BITS+PASS1_BITS+3)
4484
0
          & RANGE_MASK];
4485
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4486
0
                CONST_BITS+PASS1_BITS+3)
4487
0
          & RANGE_MASK];
4488
4489
0
    wsptr += 7;   /* advance pointer to next row */
4490
0
  }
4491
0
}
4492
4493
4494
/*
4495
 * Perform dequantization and inverse DCT on one block of coefficients,
4496
 * producing a 6x12 output block.
4497
 *
4498
 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4499
 */
4500
4501
GLOBAL(void)
4502
jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4503
    JCOEFPTR coef_block,
4504
    JSAMPARRAY output_buf, JDIMENSION output_col)
4505
0
{
4506
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508
0
  INT32 z1, z2, z3, z4;
4509
0
  JCOEFPTR inptr;
4510
0
  ISLOW_MULT_TYPE * quantptr;
4511
0
  int * wsptr;
4512
0
  JSAMPROW outptr;
4513
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514
0
  int ctr;
4515
0
  int workspace[6*12];  /* buffers data between passes */
4516
  SHIFT_TEMPS
4517
4518
  /* Pass 1: process columns from input, store into work array.
4519
   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4520
   */
4521
4522
0
  inptr = coef_block;
4523
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524
0
  wsptr = workspace;
4525
0
  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526
    /* Even part */
4527
4528
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529
0
    z3 <<= CONST_BITS;
4530
    /* Add fudge factor here for final descale. */
4531
0
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4532
4533
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534
0
    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4535
4536
0
    tmp10 = z3 + z4;
4537
0
    tmp11 = z3 - z4;
4538
4539
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540
0
    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541
0
    z1 <<= CONST_BITS;
4542
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4543
0
    z2 <<= CONST_BITS;
4544
4545
0
    tmp12 = z1 - z2;
4546
4547
0
    tmp21 = z3 + tmp12;
4548
0
    tmp24 = z3 - tmp12;
4549
4550
0
    tmp12 = z4 + z2;
4551
4552
0
    tmp20 = tmp10 + tmp12;
4553
0
    tmp25 = tmp10 - tmp12;
4554
4555
0
    tmp12 = z4 - z1 - z2;
4556
4557
0
    tmp22 = tmp11 + tmp12;
4558
0
    tmp23 = tmp11 - tmp12;
4559
4560
    /* Odd part */
4561
4562
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4563
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4564
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4565
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4566
4567
0
    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
4568
0
    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
4569
4570
0
    tmp10 = z1 + z3;
4571
0
    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
4572
0
    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
4573
0
    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
4574
0
    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
4575
0
    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576
0
    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4577
0
    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
4578
0
       MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
4579
4580
0
    z1 -= z4;
4581
0
    z2 -= z3;
4582
0
    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
4583
0
    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
4584
0
    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
4585
4586
    /* Final output stage */
4587
4588
0
    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589
0
    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590
0
    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591
0
    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592
0
    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593
0
    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594
0
    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595
0
    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596
0
    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597
0
    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598
0
    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599
0
    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4600
0
  }
4601
4602
  /* Pass 2: process 12 rows from work array, store into output array.
4603
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4604
   */
4605
4606
0
  wsptr = workspace;
4607
0
  for (ctr = 0; ctr < 12; ctr++) {
4608
0
    outptr = output_buf[ctr] + output_col;
4609
4610
    /* Even part */
4611
4612
    /* Add range center and fudge factor for final descale and range-limit. */
4613
0
    tmp10 = (INT32) wsptr[0] +
4614
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615
0
         (ONE << (PASS1_BITS+2)));
4616
0
    tmp10 <<= CONST_BITS;
4617
0
    tmp12 = (INT32) wsptr[4];
4618
0
    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
4619
0
    tmp11 = tmp10 + tmp20;
4620
0
    tmp21 = tmp10 - tmp20 - tmp20;
4621
0
    tmp20 = (INT32) wsptr[2];
4622
0
    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
4623
0
    tmp20 = tmp11 + tmp10;
4624
0
    tmp22 = tmp11 - tmp10;
4625
4626
    /* Odd part */
4627
4628
0
    z1 = (INT32) wsptr[1];
4629
0
    z2 = (INT32) wsptr[3];
4630
0
    z3 = (INT32) wsptr[5];
4631
0
    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632
0
    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633
0
    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634
0
    tmp11 = (z1 - z2 - z3) << CONST_BITS;
4635
4636
    /* Final output stage */
4637
4638
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4639
0
                CONST_BITS+PASS1_BITS+3)
4640
0
          & RANGE_MASK];
4641
0
    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4642
0
                CONST_BITS+PASS1_BITS+3)
4643
0
          & RANGE_MASK];
4644
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4645
0
                CONST_BITS+PASS1_BITS+3)
4646
0
          & RANGE_MASK];
4647
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4648
0
                CONST_BITS+PASS1_BITS+3)
4649
0
          & RANGE_MASK];
4650
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4651
0
                CONST_BITS+PASS1_BITS+3)
4652
0
          & RANGE_MASK];
4653
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4654
0
                CONST_BITS+PASS1_BITS+3)
4655
0
          & RANGE_MASK];
4656
4657
0
    wsptr += 6;   /* advance pointer to next row */
4658
0
  }
4659
0
}
4660
4661
4662
/*
4663
 * Perform dequantization and inverse DCT on one block of coefficients,
4664
 * producing a 5x10 output block.
4665
 *
4666
 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4667
 */
4668
4669
GLOBAL(void)
4670
jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4671
    JCOEFPTR coef_block,
4672
    JSAMPARRAY output_buf, JDIMENSION output_col)
4673
0
{
4674
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675
0
  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676
0
  INT32 z1, z2, z3, z4, z5;
4677
0
  JCOEFPTR inptr;
4678
0
  ISLOW_MULT_TYPE * quantptr;
4679
0
  int * wsptr;
4680
0
  JSAMPROW outptr;
4681
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682
0
  int ctr;
4683
0
  int workspace[5*10];  /* buffers data between passes */
4684
  SHIFT_TEMPS
4685
4686
  /* Pass 1: process columns from input, store into work array.
4687
   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4688
   */
4689
4690
0
  inptr = coef_block;
4691
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692
0
  wsptr = workspace;
4693
0
  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694
    /* Even part */
4695
4696
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697
0
    z3 <<= CONST_BITS;
4698
    /* Add fudge factor here for final descale. */
4699
0
    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701
0
    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
4702
0
    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
4703
0
    tmp10 = z3 + z1;
4704
0
    tmp11 = z3 - z2;
4705
4706
0
    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
4707
0
      CONST_BITS-PASS1_BITS);
4708
4709
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4710
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4711
4712
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
4713
0
    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4714
0
    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4715
4716
0
    tmp20 = tmp10 + tmp12;
4717
0
    tmp24 = tmp10 - tmp12;
4718
0
    tmp21 = tmp11 + tmp13;
4719
0
    tmp23 = tmp11 - tmp13;
4720
4721
    /* Odd part */
4722
4723
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4724
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4725
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4726
0
    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4727
4728
0
    tmp11 = z2 + z4;
4729
0
    tmp13 = z2 - z4;
4730
4731
0
    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
4732
0
    z5 = z3 << CONST_BITS;
4733
4734
0
    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
4735
0
    z4 = z5 + tmp12;
4736
4737
0
    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4738
0
    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4739
4740
0
    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
4741
0
    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4742
4743
0
    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4744
4745
0
    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746
0
    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4747
4748
    /* Final output stage */
4749
4750
0
    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751
0
    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752
0
    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753
0
    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754
0
    wsptr[5*2] = (int) (tmp22 + tmp12);
4755
0
    wsptr[5*7] = (int) (tmp22 - tmp12);
4756
0
    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757
0
    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758
0
    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759
0
    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4760
0
  }
4761
4762
  /* Pass 2: process 10 rows from work array, store into output array.
4763
   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4764
   */
4765
4766
0
  wsptr = workspace;
4767
0
  for (ctr = 0; ctr < 10; ctr++) {
4768
0
    outptr = output_buf[ctr] + output_col;
4769
4770
    /* Even part */
4771
4772
    /* Add range center and fudge factor for final descale and range-limit. */
4773
0
    tmp12 = (INT32) wsptr[0] +
4774
0
        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775
0
         (ONE << (PASS1_BITS+2)));
4776
0
    tmp12 <<= CONST_BITS;
4777
0
    tmp13 = (INT32) wsptr[2];
4778
0
    tmp14 = (INT32) wsptr[4];
4779
0
    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780
0
    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781
0
    z3 = tmp12 + z2;
4782
0
    tmp10 = z3 + z1;
4783
0
    tmp11 = z3 - z1;
4784
0
    tmp12 -= z2 << 2;
4785
4786
    /* Odd part */
4787
4788
0
    z2 = (INT32) wsptr[1];
4789
0
    z3 = (INT32) wsptr[3];
4790
4791
0
    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
4792
0
    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
4793
0
    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
4794
4795
    /* Final output stage */
4796
4797
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4798
0
                CONST_BITS+PASS1_BITS+3)
4799
0
          & RANGE_MASK];
4800
0
    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4801
0
                CONST_BITS+PASS1_BITS+3)
4802
0
          & RANGE_MASK];
4803
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4804
0
                CONST_BITS+PASS1_BITS+3)
4805
0
          & RANGE_MASK];
4806
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4807
0
                CONST_BITS+PASS1_BITS+3)
4808
0
          & RANGE_MASK];
4809
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4810
0
                CONST_BITS+PASS1_BITS+3)
4811
0
          & RANGE_MASK];
4812
4813
0
    wsptr += 5;   /* advance pointer to next row */
4814
0
  }
4815
0
}
4816
4817
4818
/*
4819
 * Perform dequantization and inverse DCT on one block of coefficients,
4820
 * producing a 4x8 output block.
4821
 *
4822
 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4823
 */
4824
4825
GLOBAL(void)
4826
jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4827
         JCOEFPTR coef_block,
4828
         JSAMPARRAY output_buf, JDIMENSION output_col)
4829
0
{
4830
0
  INT32 tmp0, tmp1, tmp2, tmp3;
4831
0
  INT32 tmp10, tmp11, tmp12, tmp13;
4832
0
  INT32 z1, z2, z3;
4833
0
  JCOEFPTR inptr;
4834
0
  ISLOW_MULT_TYPE * quantptr;
4835
0
  int * wsptr;
4836
0
  JSAMPROW outptr;
4837
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838
0
  int ctr;
4839
0
  int workspace[4*8]; /* buffers data between passes */
4840
  SHIFT_TEMPS
4841
4842
  /* Pass 1: process columns from input, store into work array.
4843
   * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844
   * furthermore, we scale the results by 2**PASS1_BITS.
4845
   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4846
   */
4847
4848
0
  inptr = coef_block;
4849
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850
0
  wsptr = workspace;
4851
0
  for (ctr = 4; ctr > 0; ctr--) {
4852
    /* Due to quantization, we will usually find that many of the input
4853
     * coefficients are zero, especially the AC terms.  We can exploit this
4854
     * by short-circuiting the IDCT calculation for any column in which all
4855
     * the AC terms are zero.  In that case each output is equal to the
4856
     * DC coefficient (with scale factor as needed).
4857
     * With typical images and quantization tables, half or more of the
4858
     * column DCT calculations can be simplified this way.
4859
     */
4860
4861
0
    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862
0
  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863
0
  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864
0
  inptr[DCTSIZE*7] == 0) {
4865
      /* AC terms all zero */
4866
0
      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4867
4868
0
      wsptr[4*0] = dcval;
4869
0
      wsptr[4*1] = dcval;
4870
0
      wsptr[4*2] = dcval;
4871
0
      wsptr[4*3] = dcval;
4872
0
      wsptr[4*4] = dcval;
4873
0
      wsptr[4*5] = dcval;
4874
0
      wsptr[4*6] = dcval;
4875
0
      wsptr[4*7] = dcval;
4876
4877
0
      inptr++;      /* advance pointers to next column */
4878
0
      quantptr++;
4879
0
      wsptr++;
4880
0
      continue;
4881
0
    }
4882
4883
    /* Even part: reverse the even part of the forward DCT.
4884
     * The rotator is c(-6).
4885
     */
4886
4887
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889
0
    z2 <<= CONST_BITS;
4890
0
    z3 <<= CONST_BITS;
4891
    /* Add fudge factor here for final descale. */
4892
0
    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4893
4894
0
    tmp0 = z2 + z3;
4895
0
    tmp1 = z2 - z3;
4896
4897
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4899
4900
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
4901
0
    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
4902
0
    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
4903
4904
0
    tmp10 = tmp0 + tmp2;
4905
0
    tmp13 = tmp0 - tmp2;
4906
0
    tmp11 = tmp1 + tmp3;
4907
0
    tmp12 = tmp1 - tmp3;
4908
4909
    /* Odd part per figure 8; the matrix is unitary and hence its
4910
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
4911
     */
4912
4913
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914
0
    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916
0
    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4917
4918
0
    z2 = tmp0 + tmp2;
4919
0
    z3 = tmp1 + tmp3;
4920
4921
0
    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
4922
0
    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
4923
0
    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
4924
0
    z2 += z1;
4925
0
    z3 += z1;
4926
4927
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928
0
    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
4929
0
    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
4930
0
    tmp0 += z1 + z2;
4931
0
    tmp3 += z1 + z3;
4932
4933
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934
0
    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
4935
0
    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
4936
0
    tmp1 += z1 + z3;
4937
0
    tmp2 += z1 + z2;
4938
4939
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4940
4941
0
    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942
0
    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943
0
    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944
0
    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945
0
    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946
0
    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947
0
    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948
0
    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4949
4950
0
    inptr++;      /* advance pointers to next column */
4951
0
    quantptr++;
4952
0
    wsptr++;
4953
0
  }
4954
4955
  /* Pass 2: process 8 rows from work array, store into output array.
4956
   * 4-point IDCT kernel,
4957
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4958
   */
4959
4960
0
  wsptr = workspace;
4961
0
  for (ctr = 0; ctr < 8; ctr++) {
4962
0
    outptr = output_buf[ctr] + output_col;
4963
4964
    /* Even part */
4965
4966
    /* Add range center and fudge factor for final descale and range-limit. */
4967
0
    tmp0 = (INT32) wsptr[0] +
4968
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969
0
        (ONE << (PASS1_BITS+2)));
4970
0
    tmp2 = (INT32) wsptr[2];
4971
4972
0
    tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973
0
    tmp12 = (tmp0 - tmp2) << CONST_BITS;
4974
4975
    /* Odd part */
4976
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4977
4978
0
    z2 = (INT32) wsptr[1];
4979
0
    z3 = (INT32) wsptr[3];
4980
4981
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
4982
0
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983
0
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4984
4985
    /* Final output stage */
4986
4987
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988
0
                CONST_BITS+PASS1_BITS+3)
4989
0
          & RANGE_MASK];
4990
0
    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4991
0
                CONST_BITS+PASS1_BITS+3)
4992
0
          & RANGE_MASK];
4993
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4994
0
                CONST_BITS+PASS1_BITS+3)
4995
0
          & RANGE_MASK];
4996
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4997
0
                CONST_BITS+PASS1_BITS+3)
4998
0
          & RANGE_MASK];
4999
5000
0
    wsptr += 4;   /* advance pointer to next row */
5001
0
  }
5002
0
}
5003
5004
5005
/*
5006
 * Perform dequantization and inverse DCT on one block of coefficients,
5007
 * producing a 3x6 output block.
5008
 *
5009
 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5010
 */
5011
5012
GLOBAL(void)
5013
jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5014
         JCOEFPTR coef_block,
5015
         JSAMPARRAY output_buf, JDIMENSION output_col)
5016
0
{
5017
0
  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018
0
  INT32 z1, z2, z3;
5019
0
  JCOEFPTR inptr;
5020
0
  ISLOW_MULT_TYPE * quantptr;
5021
0
  int * wsptr;
5022
0
  JSAMPROW outptr;
5023
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024
0
  int ctr;
5025
0
  int workspace[3*6]; /* buffers data between passes */
5026
  SHIFT_TEMPS
5027
5028
  /* Pass 1: process columns from input, store into work array.
5029
   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5030
   */
5031
5032
0
  inptr = coef_block;
5033
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034
0
  wsptr = workspace;
5035
0
  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036
    /* Even part */
5037
5038
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039
0
    tmp0 <<= CONST_BITS;
5040
    /* Add fudge factor here for final descale. */
5041
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043
0
    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
5044
0
    tmp1 = tmp0 + tmp10;
5045
0
    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046
0
    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047
0
    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
5048
0
    tmp10 = tmp1 + tmp0;
5049
0
    tmp12 = tmp1 - tmp0;
5050
5051
    /* Odd part */
5052
5053
0
    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5054
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056
0
    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057
0
    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058
0
    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059
0
    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5060
5061
    /* Final output stage */
5062
5063
0
    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064
0
    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065
0
    wsptr[3*1] = (int) (tmp11 + tmp1);
5066
0
    wsptr[3*4] = (int) (tmp11 - tmp1);
5067
0
    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068
0
    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5069
0
  }
5070
5071
  /* Pass 2: process 6 rows from work array, store into output array.
5072
   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5073
   */
5074
5075
0
  wsptr = workspace;
5076
0
  for (ctr = 0; ctr < 6; ctr++) {
5077
0
    outptr = output_buf[ctr] + output_col;
5078
5079
    /* Even part */
5080
5081
    /* Add range center and fudge factor for final descale and range-limit. */
5082
0
    tmp0 = (INT32) wsptr[0] +
5083
0
       ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084
0
        (ONE << (PASS1_BITS+2)));
5085
0
    tmp0 <<= CONST_BITS;
5086
0
    tmp2 = (INT32) wsptr[2];
5087
0
    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088
0
    tmp10 = tmp0 + tmp12;
5089
0
    tmp2 = tmp0 - tmp12 - tmp12;
5090
5091
    /* Odd part */
5092
5093
0
    tmp12 = (INT32) wsptr[1];
5094
0
    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5095
5096
    /* Final output stage */
5097
5098
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099
0
                CONST_BITS+PASS1_BITS+3)
5100
0
          & RANGE_MASK];
5101
0
    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102
0
                CONST_BITS+PASS1_BITS+3)
5103
0
          & RANGE_MASK];
5104
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5105
0
                CONST_BITS+PASS1_BITS+3)
5106
0
          & RANGE_MASK];
5107
5108
0
    wsptr += 3;   /* advance pointer to next row */
5109
0
  }
5110
0
}
5111
5112
5113
/*
5114
 * Perform dequantization and inverse DCT on one block of coefficients,
5115
 * producing a 2x4 output block.
5116
 *
5117
 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5118
 */
5119
5120
GLOBAL(void)
5121
jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5122
         JCOEFPTR coef_block,
5123
         JSAMPARRAY output_buf, JDIMENSION output_col)
5124
0
{
5125
0
  INT32 tmp0, tmp2, tmp10, tmp12;
5126
0
  INT32 z1, z2, z3;
5127
0
  JCOEFPTR inptr;
5128
0
  ISLOW_MULT_TYPE * quantptr;
5129
0
  INT32 * wsptr;
5130
0
  JSAMPROW outptr;
5131
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132
0
  int ctr;
5133
0
  INT32 workspace[2*4]; /* buffers data between passes */
5134
  SHIFT_TEMPS
5135
5136
  /* Pass 1: process columns from input, store into work array.
5137
   * 4-point IDCT kernel,
5138
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5139
   */
5140
5141
0
  inptr = coef_block;
5142
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143
0
  wsptr = workspace;
5144
0
  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145
    /* Even part */
5146
5147
0
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148
0
    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5149
5150
0
    tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151
0
    tmp12 = (tmp0 - tmp2) << CONST_BITS;
5152
5153
    /* Odd part */
5154
    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5155
5156
0
    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5157
0
    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5158
5159
0
    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
5160
0
    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5161
0
    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5162
5163
    /* Final output stage */
5164
5165
0
    wsptr[2*0] = tmp10 + tmp0;
5166
0
    wsptr[2*3] = tmp10 - tmp0;
5167
0
    wsptr[2*1] = tmp12 + tmp2;
5168
0
    wsptr[2*2] = tmp12 - tmp2;
5169
0
  }
5170
5171
  /* Pass 2: process 4 rows from work array, store into output array. */
5172
5173
0
  wsptr = workspace;
5174
0
  for (ctr = 0; ctr < 4; ctr++) {
5175
0
    outptr = output_buf[ctr] + output_col;
5176
5177
    /* Even part */
5178
5179
    /* Add range center and fudge factor for final descale and range-limit. */
5180
0
    tmp10 = wsptr[0] +
5181
0
        ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5182
0
         (ONE << (CONST_BITS+2)));
5183
5184
    /* Odd part */
5185
5186
0
    tmp0 = wsptr[1];
5187
5188
    /* Final output stage */
5189
5190
0
    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5191
0
          & RANGE_MASK];
5192
0
    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5193
0
          & RANGE_MASK];
5194
5195
0
    wsptr += 2;   /* advance pointer to next row */
5196
0
  }
5197
0
}
5198
5199
5200
/*
5201
 * Perform dequantization and inverse DCT on one block of coefficients,
5202
 * producing a 1x2 output block.
5203
 *
5204
 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5205
 */
5206
5207
GLOBAL(void)
5208
jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5209
         JCOEFPTR coef_block,
5210
         JSAMPARRAY output_buf, JDIMENSION output_col)
5211
0
{
5212
0
  DCTELEM tmp0, tmp1;
5213
0
  ISLOW_MULT_TYPE * quantptr;
5214
0
  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5215
0
  ISHIFT_TEMPS
5216
5217
  /* Process 1 column from input, store into output array. */
5218
5219
0
  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5220
5221
  /* Even part */
5222
5223
0
  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5224
  /* Add range center and fudge factor for final descale and range-limit. */
5225
0
  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5226
5227
  /* Odd part */
5228
5229
0
  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5230
5231
  /* Final output stage */
5232
5233
0
  output_buf[0][output_col] =
5234
0
    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5235
0
  output_buf[1][output_col] =
5236
0
    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5237
0
}
5238
5239
#endif /* IDCT_SCALING_SUPPORTED */
5240
#endif /* DCT_ISLOW_SUPPORTED */