Coverage Report

Created: 2026-04-01 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ghostpdl/obj/jfdctint.c
Line
Count
Source
1
/*
2
 * jfdctint.c
3
 *
4
 * Copyright (C) 1991-1996, Thomas G. Lane.
5
 * Modification developed 2003-2026 by Guido Vollbeding.
6
 * This file is part of the Independent JPEG Group's software.
7
 * For conditions of distribution and use, see the accompanying README file.
8
 *
9
 * This file contains a slow-but-accurate integer implementation of the
10
 * forward DCT (Discrete Cosine Transform).
11
 *
12
 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13
 * on each column.  Direct algorithms are also available, but they are
14
 * much more complex and seem not to be any faster when reduced to code.
15
 *
16
 * This implementation is based on an algorithm described in
17
 *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18
 *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19
 *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20
 * The primary algorithm described there uses 11 multiplies and 29 adds.
21
 * We use their alternate method with 12 multiplies and 32 adds.
22
 * The advantage of this method is that no data path contains more than one
23
 * multiplication; this allows a very simple and accurate implementation in
24
 * scaled fixed-point arithmetic, with a minimal number of shifts.
25
 *
26
 * We also provide FDCT routines with various input sample block sizes for
27
 * direct resolution reduction or enlargement and for direct resolving the
28
 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
29
 * (N=1...16), 2NxN, and Nx2N (N=1...8) samples for one 8x8 output DCT block.
30
 *
31
 * For N<8 we fill the remaining block coefficients with zero.
32
 * For N>8 we apply a partial N-point FDCT on the input samples, computing
33
 * just the lower 8 frequency coefficients and discarding the rest.
34
 *
35
 * We must scale the output coefficients of the N-point FDCT appropriately
36
 * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
37
 * is folded into the constant multipliers (pass 2) and/or final/initial
38
 * shifting.
39
 *
40
 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
41
 * since there would be too many additional constants to pre-calculate.
42
 */
43
44
#define JPEG_INTERNALS
45
#include "jinclude.h"
46
#include "jpeglib.h"
47
#include "jdct.h"   /* Private declarations for DCT subsystem */
48
49
#ifdef DCT_ISLOW_SUPPORTED
50
51
52
/*
53
 * This module is specialized to the case DCTSIZE = 8.
54
 */
55
56
#if DCTSIZE != 8
57
  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
58
#endif
59
60
61
/*
62
 * The poop on this scaling stuff is as follows:
63
 *
64
 * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
65
 * larger than the true DCT outputs.  The final outputs are therefore
66
 * a factor of N larger than desired; since N=8 this can be cured by
67
 * a simple right shift at the end of the algorithm.  The advantage of
68
 * this arrangement is that we save two multiplications per 1-D DCT,
69
 * because the y0 and y4 outputs need not be divided by sqrt(N).
70
 * In the IJG code, this factor of 8 is removed by the quantization step
71
 * (in jcdctmgr.c), NOT in this module.
72
 *
73
 * We have to do addition and subtraction of the integer inputs, which
74
 * is no problem, and multiplication by fractional constants, which is
75
 * a problem to do in integer arithmetic.  We multiply all the constants
76
 * by CONST_SCALE and convert them to integer constants (thus retaining
77
 * CONST_BITS bits of precision in the constants).  After doing a
78
 * multiplication we have to divide the product by CONST_SCALE, with
79
 * proper rounding, to produce the correct output.  This division can
80
 * be done cheaply as a right shift of CONST_BITS bits.  We postpone
81
 * shifting as long as possible so that partial sums can be added
82
 * together with full fractional precision.
83
 *
84
 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
85
 * they are represented to better-than-integral precision.  These outputs
86
 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit
87
 * word with the recommended scaling.  (For higher bit depths, the
88
 * intermediate array is INT32 anyway.)
89
 *
90
 * To avoid overflow of the 32-bit intermediate results in pass 2, we
91
 * must have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error
92
 * analysis shows that the values given below are the most effective.
93
 */
94
95
#if BITS_IN_JSAMPLE <= 10 && JPEG_DATA_PRECISION <= 10
96
50.6M
#define CONST_BITS  13
97
62.9M
#define PASS1_BITS  (10 - BITS_IN_JSAMPLE)
98
37.9M
#define PASS2_BITS  (10 - JPEG_DATA_PRECISION)
99
#else
100
#if BITS_IN_JSAMPLE <= 13 && JPEG_DATA_PRECISION <= 13
101
#define CONST_BITS  13
102
#define PASS1_BITS  (13 - BITS_IN_JSAMPLE)
103
#define PASS2_BITS  (13 - JPEG_DATA_PRECISION)
104
#endif
105
#endif
106
107
/* Some C compilers fail to reduce "FIX(constant)" at compile time,
108
 * thus causing a lot of useless floating-point operations at run time.
109
 * To get around this we use the following pre-calculated constants.
110
 * If you change CONST_BITS you may want to add appropriate values.
111
 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
112
 */
113
114
#if CONST_BITS == 13
115
#define FIX_0_298631336  ((INT32)  2446)  /* FIX(0.298631336) */
116
#define FIX_0_390180644  ((INT32)  3196)  /* FIX(0.390180644) */
117
#define FIX_0_541196100  ((INT32)  4433)  /* FIX(0.541196100) */
118
#define FIX_0_765366865  ((INT32)  6270)  /* FIX(0.765366865) */
119
#define FIX_0_899976223  ((INT32)  7373)  /* FIX(0.899976223) */
120
#define FIX_1_175875602  ((INT32)  9633)  /* FIX(1.175875602) */
121
#define FIX_1_501321110  ((INT32)  12299) /* FIX(1.501321110) */
122
#define FIX_1_847759065  ((INT32)  15137) /* FIX(1.847759065) */
123
#define FIX_1_961570560  ((INT32)  16069) /* FIX(1.961570560) */
124
#define FIX_2_053119869  ((INT32)  16819) /* FIX(2.053119869) */
125
#define FIX_2_562915447  ((INT32)  20995) /* FIX(2.562915447) */
126
#define FIX_3_072711026  ((INT32)  25172) /* FIX(3.072711026) */
127
#else
128
#define FIX_0_298631336  FIX(0.298631336)
129
#define FIX_0_390180644  FIX(0.390180644)
130
#define FIX_0_541196100  FIX(0.541196100)
131
#define FIX_0_765366865  FIX(0.765366865)
132
#define FIX_0_899976223  FIX(0.899976223)
133
#define FIX_1_175875602  FIX(1.175875602)
134
#define FIX_1_501321110  FIX(1.501321110)
135
#define FIX_1_847759065  FIX(1.847759065)
136
#define FIX_1_961570560  FIX(1.961570560)
137
#define FIX_2_053119869  FIX(2.053119869)
138
#define FIX_2_562915447  FIX(2.562915447)
139
#define FIX_3_072711026  FIX(3.072711026)
140
#endif
141
142
143
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
144
 * For up to 10-bit data with the recommended scaling, all the variable
145
 * and constant values involved are no more than 16 bits wide, so a
146
 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
147
 * For higher bit depths, a full 32-bit multiplication will be needed.
148
 */
149
150
#if BITS_IN_JSAMPLE <= 10 && JPEG_DATA_PRECISION <= 10
151
660M
#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
152
#else
153
#define MULTIPLY(var,const)  ((var) * (const))
154
#endif
155
156
157
/* Pass 1 output: smart scale up. */
158
159
#if PASS1_BITS > 0
160
37.6M
#define PASS1_OUTPUT(x)  (DCTELEM) ((x) << PASS1_BITS)
161
#else
162
#define PASS1_OUTPUT(x)  (DCTELEM) (x)
163
#endif
164
165
166
/* Pass 2 output: smart scale down. */
167
168
#if PASS2_BITS > 0
169
25.3M
#define PASS2_OUTPUT(x)  (DCTELEM) RIGHT_SHIFT(x, PASS2_BITS)
170
#else
171
#define PASS2_OUTPUT(x)  (DCTELEM) (x)
172
#endif
173
174
175
/*
176
 * Perform the forward DCT on one block of samples.
177
 */
178
179
GLOBAL(void)
180
jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
181
1.58M
{
182
1.58M
  INT32 tmp0, tmp1, tmp2, tmp3;
183
1.58M
  INT32 tmp10, tmp11, tmp12, tmp13;
184
1.58M
  INT32 z1;
185
1.58M
  DCTELEM *dataptr;
186
1.58M
  JSAMPROW elemptr;
187
1.58M
  int ctr;
188
1.58M
  SHIFT_TEMPS
189
190
  /* Pass 1: process rows.
191
   * Note results are scaled up by sqrt(8) compared to a true DCT;
192
   * furthermore, we scale the results by 2**PASS1_BITS.
193
   * cK represents sqrt(2) * cos(K*pi/16).
194
   */
195
196
1.58M
  dataptr = data;
197
14.2M
  for (ctr = 0; ctr < DCTSIZE; ctr++) {
198
12.6M
    elemptr = sample_data[ctr] + start_col;
199
200
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
201
     * rotator "c1" should be "c6".
202
     */
203
204
12.6M
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
205
12.6M
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
206
12.6M
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
207
12.6M
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
208
209
12.6M
    tmp10 = tmp0 + tmp3;
210
12.6M
    tmp12 = tmp0 - tmp3;
211
12.6M
    tmp11 = tmp1 + tmp2;
212
12.6M
    tmp13 = tmp1 - tmp2;
213
214
12.6M
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
215
12.6M
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
216
12.6M
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
217
12.6M
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
218
219
    /* Apply unsigned->signed conversion. */
220
12.6M
    dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 8 * CENTERJSAMPLE);
221
12.6M
    dataptr[4] = PASS1_OUTPUT(tmp10 - tmp11);
222
223
12.6M
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
224
    /* Add fudge factor here for final descale. */
225
12.6M
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
226
227
12.6M
    dataptr[2] = (DCTELEM)
228
12.6M
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
229
12.6M
      CONST_BITS-PASS1_BITS);
230
12.6M
    dataptr[6] = (DCTELEM)
231
12.6M
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
232
12.6M
      CONST_BITS-PASS1_BITS);
233
234
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
235
     * i0..i3 in the paper are tmp0..tmp3 here.
236
     */
237
238
12.6M
    tmp12 = tmp0 + tmp2;
239
12.6M
    tmp13 = tmp1 + tmp3;
240
241
12.6M
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
242
    /* Add fudge factor here for final descale. */
243
12.6M
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
244
245
12.6M
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
246
12.6M
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
247
12.6M
    tmp12 += z1;
248
12.6M
    tmp13 += z1;
249
250
12.6M
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
251
12.6M
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
252
12.6M
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
253
12.6M
    tmp0 += z1 + tmp12;
254
12.6M
    tmp3 += z1 + tmp13;
255
256
12.6M
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
257
12.6M
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
258
12.6M
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
259
12.6M
    tmp1 += z1 + tmp13;
260
12.6M
    tmp2 += z1 + tmp12;
261
262
12.6M
    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
263
12.6M
    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
264
12.6M
    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
265
12.6M
    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
266
267
12.6M
    dataptr += DCTSIZE;   /* advance pointer to next row */
268
12.6M
  }
269
270
  /* Pass 2: process columns.
271
   * We apply the PASS2_BITS scaling, but leave the
272
   * results scaled up by an overall factor of 8.
273
   * cK represents sqrt(2) * cos(K*pi/16).
274
   */
275
276
1.58M
  dataptr = data;
277
14.2M
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
278
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
279
     * rotator "c1" should be "c6".
280
     */
281
282
12.6M
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
283
12.6M
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
284
12.6M
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
285
12.6M
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
286
287
    /* Add fudge factor here for final descale. */
288
12.6M
#if PASS2_BITS > 1
289
12.6M
    tmp10 = tmp0 + tmp3 + (ONE << (PASS2_BITS-1));
290
#else
291
#if PASS2_BITS > 0
292
    tmp10 = tmp0 + tmp3 + ONE;
293
#else
294
    tmp10 = tmp0 + tmp3;
295
#endif
296
#endif
297
12.6M
    tmp12 = tmp0 - tmp3;
298
12.6M
    tmp11 = tmp1 + tmp2;
299
12.6M
    tmp13 = tmp1 - tmp2;
300
301
12.6M
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
302
12.6M
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
303
12.6M
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
304
12.6M
    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
305
306
12.6M
    dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp10 + tmp11);
307
12.6M
    dataptr[DCTSIZE*4] = PASS2_OUTPUT(tmp10 - tmp11);
308
309
12.6M
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
310
    /* Add fudge factor here for final descale. */
311
12.6M
    z1 += ONE << (CONST_BITS+PASS2_BITS-1);
312
313
12.6M
    dataptr[DCTSIZE*2] = (DCTELEM)
314
12.6M
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
315
12.6M
      CONST_BITS+PASS2_BITS);
316
12.6M
    dataptr[DCTSIZE*6] = (DCTELEM)
317
12.6M
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
318
12.6M
      CONST_BITS+PASS2_BITS);
319
320
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
321
     * i0..i3 in the paper are tmp0..tmp3 here.
322
     */
323
324
12.6M
    tmp12 = tmp0 + tmp2;
325
12.6M
    tmp13 = tmp1 + tmp3;
326
327
12.6M
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
328
    /* Add fudge factor here for final descale. */
329
12.6M
    z1 += ONE << (CONST_BITS+PASS2_BITS-1);
330
331
12.6M
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
332
12.6M
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
333
12.6M
    tmp12 += z1;
334
12.6M
    tmp13 += z1;
335
336
12.6M
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
337
12.6M
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
338
12.6M
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
339
12.6M
    tmp0 += z1 + tmp12;
340
12.6M
    tmp3 += z1 + tmp13;
341
342
12.6M
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
343
12.6M
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
344
12.6M
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
345
12.6M
    tmp1 += z1 + tmp13;
346
12.6M
    tmp2 += z1 + tmp12;
347
348
12.6M
    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS);
349
12.6M
    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS);
350
12.6M
    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS);
351
12.6M
    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS);
352
353
12.6M
    dataptr++;      /* advance pointer to next column */
354
12.6M
  }
355
1.58M
}
356
357
#ifdef DCT_SCALING_SUPPORTED
358
359
360
/*
361
 * Perform the forward DCT on a 7x7 sample block.
362
 */
363
364
GLOBAL(void)
365
jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
366
0
{
367
0
  INT32 tmp0, tmp1, tmp2, tmp3;
368
0
  INT32 tmp10, tmp11, tmp12;
369
0
  INT32 z1, z2, z3;
370
0
  DCTELEM *dataptr;
371
0
  JSAMPROW elemptr;
372
0
  int ctr;
373
0
  SHIFT_TEMPS
374
375
  /* Pre-zero output coefficient block. */
376
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
377
378
  /* Pass 1: process rows.
379
   * Note results are scaled up by sqrt(8) compared to a true DCT;
380
   * furthermore, we scale the results by 2**PASS1_BITS.
381
   * cK represents sqrt(2) * cos(K*pi/14).
382
   */
383
384
0
  dataptr = data;
385
0
  for (ctr = 0; ctr < 7; ctr++) {
386
0
    elemptr = sample_data[ctr] + start_col;
387
388
    /* Even part */
389
390
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
391
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
392
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
393
0
    tmp3 = GETJSAMPLE(elemptr[3]);
394
395
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
396
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
397
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
398
399
0
    z1 = tmp0 + tmp2;
400
    /* Apply unsigned->signed conversion. */
401
0
    dataptr[0] = PASS1_OUTPUT(z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE);
402
0
    tmp3 += tmp3;
403
0
    z1 -= tmp3;
404
0
    z1 -= tmp3;
405
0
    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
406
0
    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
407
0
    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
408
0
    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
409
0
    z1 -= z2;
410
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
411
0
    dataptr[4] = (DCTELEM)
412
0
      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
413
0
        CONST_BITS-PASS1_BITS);
414
0
    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
415
416
    /* Odd part */
417
418
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
419
0
    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
420
0
    tmp0 = tmp1 - tmp2;
421
0
    tmp1 += tmp2;
422
0
    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
423
0
    tmp1 += tmp2;
424
0
    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
425
0
    tmp0 += tmp3;
426
0
    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
427
428
0
    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
429
0
    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
430
0
    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
431
432
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
433
0
  }
434
435
  /* Pass 2: process columns.
436
   * We apply the PASS2_BITS scaling, but leave the
437
   * results scaled up by an overall factor of 8.
438
   * We must also scale the output by (8/7)**2 = 64/49,
439
   * which we fold into the constant multipliers:
440
   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
441
   */
442
443
0
  dataptr = data;
444
0
  for (ctr = 0; ctr < 7; ctr++) {
445
    /* Even part */
446
447
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
448
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
449
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
450
0
    tmp3 = dataptr[DCTSIZE*3];
451
452
0
    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
453
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
454
0
    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
455
456
0
    z1 = tmp0 + tmp2;
457
0
    dataptr[DCTSIZE*0] = (DCTELEM)
458
0
      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
459
0
        CONST_BITS+PASS2_BITS);
460
0
    tmp3 += tmp3;
461
0
    z1 -= tmp3;
462
0
    z1 -= tmp3;
463
0
    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
464
0
    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
465
0
    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
466
0
    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS2_BITS);
467
0
    z1 -= z2;
468
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
469
0
    dataptr[DCTSIZE*4] = (DCTELEM)
470
0
      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
471
0
        CONST_BITS+PASS2_BITS);
472
0
    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS2_BITS);
473
474
    /* Odd part */
475
476
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
477
0
    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
478
0
    tmp0 = tmp1 - tmp2;
479
0
    tmp1 += tmp2;
480
0
    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
481
0
    tmp1 += tmp2;
482
0
    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
483
0
    tmp0 += tmp3;
484
0
    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
485
486
0
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS2_BITS);
487
0
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS2_BITS);
488
0
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS2_BITS);
489
490
0
    dataptr++;      /* advance pointer to next column */
491
0
  }
492
0
}
493
494
495
/*
496
 * Perform the forward DCT on a 6x6 sample block.
497
 */
498
499
GLOBAL(void)
500
jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
501
0
{
502
0
  INT32 tmp0, tmp1, tmp2;
503
0
  INT32 tmp10, tmp11, tmp12;
504
0
  DCTELEM *dataptr;
505
0
  JSAMPROW elemptr;
506
0
  int ctr;
507
0
  SHIFT_TEMPS
508
509
  /* Pre-zero output coefficient block. */
510
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
511
512
  /* Pass 1: process rows.
513
   * Note results are scaled up by sqrt(8) compared to a true DCT;
514
   * furthermore, we scale the results by 2**PASS1_BITS.
515
   * cK represents sqrt(2) * cos(K*pi/12).
516
   */
517
518
0
  dataptr = data;
519
0
  for (ctr = 0; ctr < 6; ctr++) {
520
0
    elemptr = sample_data[ctr] + start_col;
521
522
    /* Even part */
523
524
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
525
0
    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
526
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
527
528
0
    tmp10 = tmp0 + tmp2;
529
0
    tmp12 = tmp0 - tmp2;
530
531
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
532
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
533
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
534
535
    /* Apply unsigned->signed conversion. */
536
0
    dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 6 * CENTERJSAMPLE);
537
0
    dataptr[2] = (DCTELEM)
538
0
      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
539
0
        CONST_BITS-PASS1_BITS);
540
0
    dataptr[4] = (DCTELEM)
541
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
542
0
        CONST_BITS-PASS1_BITS);
543
544
    /* Odd part */
545
546
0
    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
547
0
        CONST_BITS-PASS1_BITS);
548
549
0
#if PASS1_BITS > 0
550
0
    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
551
0
    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
552
0
    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
553
#else
554
    dataptr[1] = (DCTELEM) (tmp10 + tmp0 + tmp1);
555
    dataptr[3] = (DCTELEM) (tmp0 - tmp1 - tmp2);
556
    dataptr[5] = (DCTELEM) (tmp10 + tmp2 - tmp1);
557
#endif
558
559
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
560
0
  }
561
562
  /* Pass 2: process columns.
563
   * We apply the PASS2_BITS scaling, but leave the
564
   * results scaled up by an overall factor of 8.
565
   * We must also scale the output by (8/6)**2 = 16/9,
566
   * which we fold into the constant multipliers:
567
   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
568
   */
569
570
0
  dataptr = data;
571
0
  for (ctr = 0; ctr < 6; ctr++) {
572
    /* Even part */
573
574
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
575
0
    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
576
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
577
578
0
    tmp10 = tmp0 + tmp2;
579
0
    tmp12 = tmp0 - tmp2;
580
581
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
582
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
583
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
584
585
0
    dataptr[DCTSIZE*0] = (DCTELEM)
586
0
      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
587
0
        CONST_BITS+PASS2_BITS);
588
0
    dataptr[DCTSIZE*2] = (DCTELEM)
589
0
      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
590
0
        CONST_BITS+PASS2_BITS);
591
0
    dataptr[DCTSIZE*4] = (DCTELEM)
592
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
593
0
        CONST_BITS+PASS2_BITS);
594
595
    /* Odd part */
596
597
0
    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
598
599
0
    dataptr[DCTSIZE*1] = (DCTELEM)
600
0
      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
601
0
        CONST_BITS+PASS2_BITS);
602
0
    dataptr[DCTSIZE*3] = (DCTELEM)
603
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
604
0
        CONST_BITS+PASS2_BITS);
605
0
    dataptr[DCTSIZE*5] = (DCTELEM)
606
0
      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
607
0
        CONST_BITS+PASS2_BITS);
608
609
0
    dataptr++;      /* advance pointer to next column */
610
0
  }
611
0
}
612
613
614
/*
615
 * Perform the forward DCT on a 5x5 sample block.
616
 */
617
618
GLOBAL(void)
619
jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
620
0
{
621
0
  INT32 tmp0, tmp1, tmp2;
622
0
  INT32 tmp10, tmp11;
623
0
  DCTELEM *dataptr;
624
0
  JSAMPROW elemptr;
625
0
  int ctr;
626
0
  SHIFT_TEMPS
627
628
  /* Pre-zero output coefficient block. */
629
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
630
631
  /* Pass 1: process rows.
632
   * Note results are scaled up by sqrt(8) compared to a true DCT;
633
   * furthermore, we scale the results by 2**PASS1_BITS.
634
   * We scale the results further by 2 as part of output adaption
635
   * scaling for different DCT size.
636
   * cK represents sqrt(2) * cos(K*pi/10).
637
   */
638
639
0
  dataptr = data;
640
0
  for (ctr = 0; ctr < 5; ctr++) {
641
0
    elemptr = sample_data[ctr] + start_col;
642
643
    /* Even part */
644
645
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
646
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
647
0
    tmp2 = GETJSAMPLE(elemptr[2]);
648
649
0
    tmp10 = tmp0 + tmp1;
650
0
    tmp11 = tmp0 - tmp1;
651
652
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
653
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
654
655
    /* Apply unsigned->signed conversion. */
656
0
    dataptr[0] = (DCTELEM)
657
0
      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
658
0
    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
659
0
    tmp10 -= tmp2 << 2;
660
0
    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
661
0
    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
662
0
    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
663
664
    /* Odd part */
665
666
0
    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
667
668
0
    dataptr[1] = (DCTELEM)
669
0
      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
670
0
        CONST_BITS-PASS1_BITS-1);
671
0
    dataptr[3] = (DCTELEM)
672
0
      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
673
0
        CONST_BITS-PASS1_BITS-1);
674
675
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
676
0
  }
677
678
  /* Pass 2: process columns.
679
   * We apply the PASS2_BITS scaling, but leave the
680
   * results scaled up by an overall factor of 8.
681
   * We must also scale the output by (8/5)**2 = 64/25, which we partially
682
   * fold into the constant multipliers (other part was done in pass 1):
683
   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
684
   */
685
686
0
  dataptr = data;
687
0
  for (ctr = 0; ctr < 5; ctr++) {
688
    /* Even part */
689
690
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
691
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
692
0
    tmp2 = dataptr[DCTSIZE*2];
693
694
0
    tmp10 = tmp0 + tmp1;
695
0
    tmp11 = tmp0 - tmp1;
696
697
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
698
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
699
700
0
    dataptr[DCTSIZE*0] = (DCTELEM)
701
0
      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
702
0
        CONST_BITS+PASS2_BITS);
703
0
    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
704
0
    tmp10 -= tmp2 << 2;
705
0
    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
706
0
    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS2_BITS);
707
0
    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS2_BITS);
708
709
    /* Odd part */
710
711
0
    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
712
713
0
    dataptr[DCTSIZE*1] = (DCTELEM)
714
0
      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
715
0
        CONST_BITS+PASS2_BITS);
716
0
    dataptr[DCTSIZE*3] = (DCTELEM)
717
0
      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
718
0
        CONST_BITS+PASS2_BITS);
719
720
0
    dataptr++;      /* advance pointer to next column */
721
0
  }
722
0
}
723
724
725
/*
726
 * Perform the forward DCT on a 4x4 sample block.
727
 */
728
729
GLOBAL(void)
730
jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
731
0
{
732
0
  INT32 tmp0, tmp1;
733
0
  INT32 tmp10, tmp11;
734
0
  DCTELEM *dataptr;
735
0
  JSAMPROW elemptr;
736
0
  int ctr;
737
0
  SHIFT_TEMPS
738
739
  /* Pre-zero output coefficient block. */
740
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
741
742
  /* Pass 1: process rows.
743
   * Note results are scaled up by sqrt(8) compared to a true DCT;
744
   * furthermore, we scale the results by 2**PASS1_BITS.
745
   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
746
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
747
   */
748
749
0
  dataptr = data;
750
0
  for (ctr = 0; ctr < 4; ctr++) {
751
0
    elemptr = sample_data[ctr] + start_col;
752
753
    /* Even part */
754
755
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
756
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
757
758
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
759
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
760
761
    /* Apply unsigned->signed conversion. */
762
0
    dataptr[0] = (DCTELEM)
763
0
      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
764
0
    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
765
766
    /* Odd part */
767
768
0
    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
769
    /* Add fudge factor here for final descale. */
770
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
771
772
0
    dataptr[1] = (DCTELEM)
773
0
      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
774
0
      CONST_BITS-PASS1_BITS-2);
775
0
    dataptr[3] = (DCTELEM)
776
0
      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
777
0
      CONST_BITS-PASS1_BITS-2);
778
779
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
780
0
  }
781
782
  /* Pass 2: process columns.
783
   * We apply the PASS2_BITS scaling, but leave the
784
   * results scaled up by an overall factor of 8.
785
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
786
   */
787
788
0
  dataptr = data;
789
0
  for (ctr = 0; ctr < 4; ctr++) {
790
    /* Even part */
791
792
    /* Add fudge factor here for final descale. */
793
0
#if PASS2_BITS > 1
794
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS2_BITS-1));
795
#else
796
#if PASS2_BITS > 0
797
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + ONE;
798
#else
799
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
800
#endif
801
#endif
802
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
803
804
0
    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
805
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
806
807
0
    dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp0 + tmp1);
808
0
    dataptr[DCTSIZE*2] = PASS2_OUTPUT(tmp0 - tmp1);
809
810
    /* Odd part */
811
812
0
    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
813
    /* Add fudge factor here for final descale. */
814
0
    tmp0 += ONE << (CONST_BITS+PASS2_BITS-1);
815
816
0
    dataptr[DCTSIZE*1] = (DCTELEM)
817
0
      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
818
0
      CONST_BITS+PASS2_BITS);
819
0
    dataptr[DCTSIZE*3] = (DCTELEM)
820
0
      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
821
0
      CONST_BITS+PASS2_BITS);
822
823
0
    dataptr++;      /* advance pointer to next column */
824
0
  }
825
0
}
826
827
828
/*
829
 * Perform the forward DCT on a 3x3 sample block.
830
 */
831
832
GLOBAL(void)
833
jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
834
0
{
835
0
  INT32 tmp0, tmp1, tmp2;
836
0
  DCTELEM *dataptr;
837
0
  JSAMPROW elemptr;
838
0
  int ctr;
839
0
  SHIFT_TEMPS
840
841
  /* Pre-zero output coefficient block. */
842
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
843
844
  /* Pass 1: process rows.
845
   * Note results are scaled up by sqrt(8) compared to a true DCT;
846
   * furthermore, we scale the results by 2**PASS1_BITS.
847
   * We scale the results further by 2**2 as part of output adaption
848
   * scaling for different DCT size.
849
   * cK represents sqrt(2) * cos(K*pi/6).
850
   */
851
852
0
  dataptr = data;
853
0
  for (ctr = 0; ctr < 3; ctr++) {
854
0
    elemptr = sample_data[ctr] + start_col;
855
856
    /* Even part */
857
858
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
859
0
    tmp1 = GETJSAMPLE(elemptr[1]);
860
861
0
    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
862
863
    /* Apply unsigned->signed conversion. */
864
0
    dataptr[0] = (DCTELEM)
865
0
      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
866
0
    dataptr[2] = (DCTELEM)
867
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
868
0
        CONST_BITS-PASS1_BITS-2);
869
870
    /* Odd part */
871
872
0
    dataptr[1] = (DCTELEM)
873
0
      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
874
0
        CONST_BITS-PASS1_BITS-2);
875
876
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
877
0
  }
878
879
  /* Pass 2: process columns.
880
   * We apply the PASS2_BITS scaling, but leave the
881
   * results scaled up by an overall factor of 8.
882
   * We must also scale the output by (8/3)**2 = 64/9, which we partially
883
   * fold into the constant multipliers (other part was done in pass 1):
884
   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
885
   */
886
887
0
  dataptr = data;
888
0
  for (ctr = 0; ctr < 3; ctr++) {
889
    /* Even part */
890
891
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
892
0
    tmp1 = dataptr[DCTSIZE*1];
893
894
0
    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
895
896
0
    dataptr[DCTSIZE*0] = (DCTELEM)
897
0
      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
898
0
        CONST_BITS+PASS2_BITS);
899
0
    dataptr[DCTSIZE*2] = (DCTELEM)
900
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
901
0
        CONST_BITS+PASS2_BITS);
902
903
    /* Odd part */
904
905
0
    dataptr[DCTSIZE*1] = (DCTELEM)
906
0
      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
907
0
        CONST_BITS+PASS2_BITS);
908
909
0
    dataptr++;      /* advance pointer to next column */
910
0
  }
911
0
}
912
913
914
/*
915
 * Perform the forward DCT on a 2x2 sample block.
916
 */
917
918
GLOBAL(void)
919
jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
920
0
{
921
0
  DCTELEM tmp0, tmp1, tmp2, tmp3;
922
0
  JSAMPROW elemptr;
923
924
  /* Pre-zero output coefficient block. */
925
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
926
927
  /* Pass 1: process rows.
928
   * Note results are scaled up by sqrt(8) compared to a true DCT.
929
   */
930
931
  /* Row 0 */
932
0
  elemptr = sample_data[0] + start_col;
933
934
0
  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
935
0
  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
936
937
  /* Row 1 */
938
0
  elemptr = sample_data[1] + start_col;
939
940
0
  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
941
0
  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
942
943
  /* Pass 2: process columns.
944
   * We leave the results scaled up by an overall factor of 8.
945
   * We must also scale the output by (8/2)**2 = 2**4.
946
   */
947
948
  /* Column 0 */
949
  /* Apply unsigned->signed conversion. */
950
951
0
#if PASS2_BITS < PASS1_BITS + 4
952
0
  data[DCTSIZE*0] =
953
0
    (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << (4+PASS1_BITS-PASS2_BITS);
954
0
  data[DCTSIZE*1] = (tmp0 - tmp2) << (4+PASS1_BITS-PASS2_BITS);
955
956
  /* Column 1 */
957
0
  data[DCTSIZE*0+1] = (tmp1 + tmp3) << (4+PASS1_BITS-PASS2_BITS);
958
0
  data[DCTSIZE*1+1] = (tmp1 - tmp3) << (4+PASS1_BITS-PASS2_BITS);
959
#else
960
  data[DCTSIZE*0] = tmp0 + tmp2 - 4 * CENTERJSAMPLE;
961
  data[DCTSIZE*1] = tmp0 - tmp2;
962
963
  /* Column 1 */
964
  data[DCTSIZE*0+1] = tmp1 + tmp3;
965
  data[DCTSIZE*1+1] = tmp1 - tmp3;
966
#endif
967
0
}
968
969
970
/*
971
 * Perform the forward DCT on a 1x1 sample block.
972
 */
973
974
GLOBAL(void)
975
jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
976
0
{
977
0
  DCTELEM dcval;
978
979
  /* Pre-zero output coefficient block. */
980
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
981
982
0
  dcval = GETJSAMPLE(sample_data[0][start_col]);
983
984
  /* We leave the result scaled up by an overall factor of 8. */
985
  /* We must also scale the output by (8/1)**2 = 2**6. */
986
  /* Apply unsigned->signed conversion. */
987
0
  data[0] = (dcval - CENTERJSAMPLE) << (6+PASS1_BITS-PASS2_BITS);
988
0
}
989
990
991
/* Pass 1 bits decrement scaling for block sizes 9, 10, 11. */
992
993
#if PASS1_BITS > 0
994
0
#define PASS1_DECR  (PASS1_BITS - 1)
995
#else
996
#define PASS1_DECR  0
997
#endif
998
999
#if PASS1_DECR > 0
1000
0
#define PASS1_OUTDEC(x)  (DCTELEM) ((x) << PASS1_DECR)
1001
#else
1002
#define PASS1_OUTDEC(x)  (DCTELEM) (x)
1003
#endif
1004
1005
1006
/*
1007
 * Perform the forward DCT on a 9x9 sample block.
1008
 */
1009
1010
GLOBAL(void)
1011
jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1012
0
{
1013
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1014
0
  INT32 tmp10, tmp11, tmp12, tmp13;
1015
0
  INT32 z1, z2;
1016
0
  DCTELEM workspace[8];
1017
0
  DCTELEM *dataptr;
1018
0
  DCTELEM *wsptr;
1019
0
  JSAMPROW elemptr;
1020
0
  int ctr;
1021
0
  SHIFT_TEMPS
1022
1023
  /* Pass 1: process rows.
1024
   * Note results are scaled up by sqrt(8) compared to a true DCT;
1025
   * furthermore, we scale the results by 2**PASS1_DECR.
1026
   * cK represents sqrt(2) * cos(K*pi/18).
1027
   */
1028
1029
0
  dataptr = data;
1030
0
  ctr = 0;
1031
0
  for (;;) {
1032
0
    elemptr = sample_data[ctr] + start_col;
1033
1034
    /* Even part */
1035
1036
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
1037
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
1038
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
1039
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
1040
0
    tmp4 = GETJSAMPLE(elemptr[4]);
1041
1042
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
1043
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
1044
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
1045
0
    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
1046
1047
0
    z1 = tmp0 + tmp2 + tmp3;
1048
0
    z2 = tmp1 + tmp4;
1049
    /* Apply unsigned->signed conversion. */
1050
0
    dataptr[0] = PASS1_OUTDEC(z1 + z2 - 9 * CENTERJSAMPLE);
1051
0
    dataptr[6] = (DCTELEM)
1052
0
      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
1053
0
        CONST_BITS-PASS1_DECR);
1054
0
    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
1055
0
    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
1056
0
    dataptr[2] = (DCTELEM)
1057
0
      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
1058
0
        + z1 + z2, CONST_BITS-PASS1_DECR);
1059
0
    dataptr[4] = (DCTELEM)
1060
0
      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
1061
0
        + z1 - z2, CONST_BITS-PASS1_DECR);
1062
1063
    /* Odd part */
1064
1065
0
    dataptr[3] = (DCTELEM)
1066
0
      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
1067
0
        CONST_BITS-PASS1_DECR);
1068
1069
0
    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
1070
0
    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
1071
0
    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
1072
1073
0
    dataptr[1] = (DCTELEM)
1074
0
      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-PASS1_DECR);
1075
1076
0
    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
1077
1078
0
    dataptr[5] = (DCTELEM)
1079
0
      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-PASS1_DECR);
1080
0
    dataptr[7] = (DCTELEM)
1081
0
      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-PASS1_DECR);
1082
1083
0
    ctr++;
1084
1085
0
    if (ctr != DCTSIZE) {
1086
0
      if (ctr == 9)
1087
0
  break;     /* Done. */
1088
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1089
0
    } else
1090
0
      dataptr = workspace; /* switch pointer to extended workspace */
1091
0
  }
1092
1093
  /* Pass 2: process columns.
1094
   * We remove the PASS1_DECR scaling, but leave the results scaled up
1095
   * by an overall factor of 8.
1096
   * We must also scale the output by (8/9)**2 = 64/81, which we partially
1097
   * fold into the constant multipliers and final shifting:
1098
   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
1099
   */
1100
1101
0
  dataptr = data;
1102
0
  wsptr = workspace;
1103
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1104
    /* Even part */
1105
1106
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
1107
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
1108
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
1109
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
1110
0
    tmp4 = dataptr[DCTSIZE*4];
1111
1112
0
    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
1113
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
1114
0
    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
1115
0
    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
1116
1117
0
    z1 = tmp0 + tmp2 + tmp3;
1118
0
    z2 = tmp1 + tmp4;
1119
0
    dataptr[DCTSIZE*0] = (DCTELEM)
1120
0
      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
1121
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1122
0
    dataptr[DCTSIZE*6] = (DCTELEM)
1123
0
      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
1124
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1125
0
    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
1126
0
    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
1127
0
    dataptr[DCTSIZE*2] = (DCTELEM)
1128
0
      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
1129
0
        + z1 + z2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1130
0
    dataptr[DCTSIZE*4] = (DCTELEM)
1131
0
      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
1132
0
        + z1 - z2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1133
1134
    /* Odd part */
1135
1136
0
    dataptr[DCTSIZE*3] = (DCTELEM)
1137
0
      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
1138
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1139
1140
0
    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
1141
0
    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
1142
0
    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
1143
1144
0
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1,
1145
0
      CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1146
1147
0
    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
1148
1149
0
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2,
1150
0
      CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1151
0
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2,
1152
0
      CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1153
1154
0
    dataptr++;      /* advance pointer to next column */
1155
0
    wsptr++;      /* advance pointer to next column */
1156
0
  }
1157
0
}
1158
1159
1160
/*
1161
 * Perform the forward DCT on a 10x10 sample block.
1162
 */
1163
1164
GLOBAL(void)
1165
jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1166
0
{
1167
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1168
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1169
0
  DCTELEM workspace[8*2];
1170
0
  DCTELEM *dataptr;
1171
0
  DCTELEM *wsptr;
1172
0
  JSAMPROW elemptr;
1173
0
  int ctr;
1174
0
  SHIFT_TEMPS
1175
1176
  /* Pass 1: process rows.
1177
   * Note results are scaled up by sqrt(8) compared to a true DCT;
1178
   * furthermore, we scale the results by 2**PASS1_DECR.
1179
   * cK represents sqrt(2) * cos(K*pi/20).
1180
   */
1181
1182
0
  dataptr = data;
1183
0
  ctr = 0;
1184
0
  for (;;) {
1185
0
    elemptr = sample_data[ctr] + start_col;
1186
1187
    /* Even part */
1188
1189
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1190
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1191
0
    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1192
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1193
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1194
1195
0
    tmp10 = tmp0 + tmp4;
1196
0
    tmp13 = tmp0 - tmp4;
1197
0
    tmp11 = tmp1 + tmp3;
1198
0
    tmp14 = tmp1 - tmp3;
1199
1200
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1201
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1202
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1203
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1204
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1205
1206
    /* Apply unsigned->signed conversion. */
1207
0
    dataptr[0] =
1208
0
      PASS1_OUTDEC(tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE);
1209
0
    tmp12 += tmp12;
1210
0
    dataptr[4] = (DCTELEM)
1211
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1212
0
        MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
1213
0
        CONST_BITS-PASS1_DECR);
1214
0
    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
1215
0
    dataptr[2] = (DCTELEM)
1216
0
      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
1217
0
        CONST_BITS-PASS1_DECR);
1218
0
    dataptr[6] = (DCTELEM)
1219
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
1220
0
        CONST_BITS-PASS1_DECR);
1221
1222
    /* Odd part */
1223
1224
0
    tmp10 = tmp0 + tmp4;
1225
0
    tmp11 = tmp1 - tmp3;
1226
0
    dataptr[5] = PASS1_OUTDEC(tmp10 - tmp11 - tmp2);
1227
0
    tmp2 <<= CONST_BITS;
1228
0
    dataptr[1] = (DCTELEM)
1229
0
      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
1230
0
        MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
1231
0
        MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
1232
0
        MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
1233
0
        CONST_BITS-PASS1_DECR);
1234
0
    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
1235
0
      MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
1236
0
    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
1237
0
      (tmp11 << (CONST_BITS - 1)) - tmp2;
1238
0
    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_DECR);
1239
0
    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_DECR);
1240
1241
0
    ctr++;
1242
1243
0
    if (ctr != DCTSIZE) {
1244
0
      if (ctr == 10)
1245
0
  break;     /* Done. */
1246
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1247
0
    } else
1248
0
      dataptr = workspace; /* switch pointer to extended workspace */
1249
0
  }
1250
1251
  /* Pass 2: process columns.
1252
   * We remove the PASS1_DECR scaling, but leave the results scaled up
1253
   * by an overall factor of 8.
1254
   * We must also scale the output by (8/10)**2 = 16/25, which we partially
1255
   * fold into the constant multipliers and final shifting:
1256
   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
1257
   */
1258
1259
0
  dataptr = data;
1260
0
  wsptr = workspace;
1261
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1262
    /* Even part */
1263
1264
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
1265
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
1266
0
    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
1267
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
1268
0
    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
1269
1270
0
    tmp10 = tmp0 + tmp4;
1271
0
    tmp13 = tmp0 - tmp4;
1272
0
    tmp11 = tmp1 + tmp3;
1273
0
    tmp14 = tmp1 - tmp3;
1274
1275
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
1276
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
1277
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
1278
0
    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
1279
0
    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
1280
1281
0
    dataptr[DCTSIZE*0] = (DCTELEM)
1282
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
1283
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1284
0
    tmp12 += tmp12;
1285
0
    dataptr[DCTSIZE*4] = (DCTELEM)
1286
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
1287
0
        MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
1288
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1289
0
    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
1290
0
    dataptr[DCTSIZE*2] = (DCTELEM)
1291
0
      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
1292
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1293
0
    dataptr[DCTSIZE*6] = (DCTELEM)
1294
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
1295
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1296
1297
    /* Odd part */
1298
1299
0
    tmp10 = tmp0 + tmp4;
1300
0
    tmp11 = tmp1 - tmp3;
1301
0
    dataptr[DCTSIZE*5] = (DCTELEM)
1302
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
1303
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1304
0
    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
1305
0
    dataptr[DCTSIZE*1] = (DCTELEM)
1306
0
      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
1307
0
        MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
1308
0
        MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
1309
0
        MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
1310
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1311
0
    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
1312
0
      MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
1313
0
    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
1314
0
      MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
1315
0
    dataptr[DCTSIZE*3] = (DCTELEM)
1316
0
      DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1317
0
    dataptr[DCTSIZE*7] = (DCTELEM)
1318
0
      DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1319
1320
0
    dataptr++;      /* advance pointer to next column */
1321
0
    wsptr++;      /* advance pointer to next column */
1322
0
  }
1323
0
}
1324
1325
1326
/*
1327
 * Perform the forward DCT on an 11x11 sample block.
1328
 */
1329
1330
GLOBAL(void)
1331
jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1332
0
{
1333
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1334
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1335
0
  INT32 z1, z2, z3;
1336
0
  DCTELEM workspace[8*3];
1337
0
  DCTELEM *dataptr;
1338
0
  DCTELEM *wsptr;
1339
0
  JSAMPROW elemptr;
1340
0
  int ctr;
1341
0
  SHIFT_TEMPS
1342
1343
  /* Pass 1: process rows.
1344
   * Note results are scaled up by sqrt(8) compared to a true DCT;
1345
   * furthermore, we scale the results by 2**PASS1_DECR.
1346
   * cK represents sqrt(2) * cos(K*pi/22).
1347
   */
1348
1349
0
  dataptr = data;
1350
0
  ctr = 0;
1351
0
  for (;;) {
1352
0
    elemptr = sample_data[ctr] + start_col;
1353
1354
    /* Even part */
1355
1356
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1357
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1358
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1359
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1360
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1361
0
    tmp5 = GETJSAMPLE(elemptr[5]);
1362
1363
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1364
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1365
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1366
0
    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1367
0
    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1368
1369
    /* Apply unsigned->signed conversion. */
1370
0
    dataptr[0] =
1371
0
      PASS1_OUTDEC(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE);
1372
0
    tmp5 += tmp5;
1373
0
    tmp0 -= tmp5;
1374
0
    tmp1 -= tmp5;
1375
0
    tmp2 -= tmp5;
1376
0
    tmp3 -= tmp5;
1377
0
    tmp4 -= tmp5;
1378
0
    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
1379
0
   MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
1380
0
    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
1381
0
    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
1382
0
    dataptr[2] = (DCTELEM)
1383
0
      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1384
0
        - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
1385
0
        CONST_BITS-PASS1_DECR);
1386
0
    dataptr[4] = (DCTELEM)
1387
0
      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1388
0
        - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
1389
0
        + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
1390
0
        CONST_BITS-PASS1_DECR);
1391
0
    dataptr[6] = (DCTELEM)
1392
0
      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
1393
0
        - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
1394
0
        CONST_BITS-PASS1_DECR);
1395
1396
    /* Odd part */
1397
1398
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
1399
0
    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
1400
0
    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
1401
0
    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
1402
0
     + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
1403
0
    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
1404
0
    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
1405
0
    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
1406
0
      - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
1407
0
    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
1408
0
    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
1409
0
      + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
1410
0
    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
1411
0
      - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
1412
1413
0
    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_DECR);
1414
0
    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_DECR);
1415
0
    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_DECR);
1416
0
    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_DECR);
1417
1418
0
    ctr++;
1419
1420
0
    if (ctr != DCTSIZE) {
1421
0
      if (ctr == 11)
1422
0
  break;     /* Done. */
1423
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1424
0
    } else
1425
0
      dataptr = workspace; /* switch pointer to extended workspace */
1426
0
  }
1427
1428
  /* Pass 2: process columns.
1429
   * We remove the PASS1_DECR scaling, but leave the results scaled up
1430
   * by an overall factor of 8.
1431
   * We must also scale the output by (8/11)**2 = 64/121, which we partially
1432
   * fold into the constant multipliers and final shifting:
1433
   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
1434
   */
1435
1436
0
  dataptr = data;
1437
0
  wsptr = workspace;
1438
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1439
    /* Even part */
1440
1441
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
1442
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
1443
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
1444
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
1445
0
    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
1446
0
    tmp5 = dataptr[DCTSIZE*5];
1447
1448
0
    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
1449
0
    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
1450
0
    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
1451
0
    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
1452
0
    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
1453
1454
0
    dataptr[DCTSIZE*0] = (DCTELEM)
1455
0
      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
1456
0
           FIX(1.057851240)),                /* 128/121 */
1457
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1458
0
    tmp5 += tmp5;
1459
0
    tmp0 -= tmp5;
1460
0
    tmp1 -= tmp5;
1461
0
    tmp2 -= tmp5;
1462
0
    tmp3 -= tmp5;
1463
0
    tmp4 -= tmp5;
1464
0
    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
1465
0
   MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
1466
0
    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
1467
0
    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
1468
0
    dataptr[DCTSIZE*2] = (DCTELEM)
1469
0
      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
1470
0
        - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
1471
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1472
0
    dataptr[DCTSIZE*4] = (DCTELEM)
1473
0
      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
1474
0
        - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
1475
0
        + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
1476
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1477
0
    dataptr[DCTSIZE*6] = (DCTELEM)
1478
0
      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
1479
0
        - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
1480
0
        CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1481
1482
    /* Odd part */
1483
1484
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
1485
0
    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
1486
0
    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
1487
0
    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
1488
0
     + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
1489
0
    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
1490
0
    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
1491
0
    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
1492
0
      - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
1493
0
    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
1494
0
    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
1495
0
      + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
1496
0
    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
1497
0
      - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
1498
1499
0
    dataptr[DCTSIZE*1] = (DCTELEM)
1500
0
      DESCALE(tmp0, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1501
0
    dataptr[DCTSIZE*3] = (DCTELEM)
1502
0
      DESCALE(tmp1, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1503
0
    dataptr[DCTSIZE*5] = (DCTELEM)
1504
0
      DESCALE(tmp2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1505
0
    dataptr[DCTSIZE*7] = (DCTELEM)
1506
0
      DESCALE(tmp3, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1507
1508
0
    dataptr++;      /* advance pointer to next column */
1509
0
    wsptr++;      /* advance pointer to next column */
1510
0
  }
1511
0
}
1512
1513
1514
/*
1515
 * Perform the forward DCT on a 12x12 sample block.
1516
 */
1517
1518
GLOBAL(void)
1519
jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1520
0
{
1521
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1522
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1523
0
  DCTELEM workspace[8*4];
1524
0
  DCTELEM *dataptr;
1525
0
  DCTELEM *wsptr;
1526
0
  JSAMPROW elemptr;
1527
0
  int ctr;
1528
0
  SHIFT_TEMPS
1529
1530
  /* Pass 1: process rows.
1531
   * Note results are scaled up by sqrt(8) compared to a true DCT.
1532
   * cK represents sqrt(2) * cos(K*pi/24).
1533
   */
1534
1535
0
  dataptr = data;
1536
0
  ctr = 0;
1537
0
  for (;;) {
1538
0
    elemptr = sample_data[ctr] + start_col;
1539
1540
    /* Even part */
1541
1542
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1543
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1544
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1545
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1546
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1547
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1548
1549
0
    tmp10 = tmp0 + tmp5;
1550
0
    tmp13 = tmp0 - tmp5;
1551
0
    tmp11 = tmp1 + tmp4;
1552
0
    tmp14 = tmp1 - tmp4;
1553
0
    tmp12 = tmp2 + tmp3;
1554
0
    tmp15 = tmp2 - tmp3;
1555
1556
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1557
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1558
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1559
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1560
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1561
0
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1562
1563
    /* Apply unsigned->signed conversion. */
1564
0
    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1565
0
    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1566
0
    dataptr[4] = (DCTELEM)
1567
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1568
0
        CONST_BITS);
1569
0
    dataptr[2] = (DCTELEM)
1570
0
      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1571
0
        CONST_BITS);
1572
1573
    /* Odd part */
1574
1575
0
    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
1576
0
    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
1577
0
    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
1578
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
1579
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
1580
0
    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1581
0
      + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
1582
0
    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1583
0
    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1584
0
      + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
1585
0
    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
1586
0
      - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
1587
0
    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
1588
0
      - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
1589
1590
0
    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
1591
0
    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
1592
0
    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
1593
0
    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
1594
1595
0
    ctr++;
1596
1597
0
    if (ctr != DCTSIZE) {
1598
0
      if (ctr == 12)
1599
0
  break;     /* Done. */
1600
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1601
0
    } else
1602
0
      dataptr = workspace; /* switch pointer to extended workspace */
1603
0
  }
1604
1605
  /* Pass 2: process columns.
1606
   * We leave the results scaled up by an overall factor of 8.
1607
   * We must also scale the output by (8/12)**2 = 4/9, which we partially
1608
   * fold into the constant multipliers and final shifting:
1609
   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
1610
   */
1611
1612
0
  dataptr = data;
1613
0
  wsptr = workspace;
1614
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1615
    /* Even part */
1616
1617
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
1618
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
1619
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
1620
0
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
1621
0
    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
1622
0
    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
1623
1624
0
    tmp10 = tmp0 + tmp5;
1625
0
    tmp13 = tmp0 - tmp5;
1626
0
    tmp11 = tmp1 + tmp4;
1627
0
    tmp14 = tmp1 - tmp4;
1628
0
    tmp12 = tmp2 + tmp3;
1629
0
    tmp15 = tmp2 - tmp3;
1630
1631
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
1632
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
1633
0
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
1634
0
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
1635
0
    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
1636
0
    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
1637
1638
0
    dataptr[DCTSIZE*0] = (DCTELEM)
1639
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
1640
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1641
0
    dataptr[DCTSIZE*6] = (DCTELEM)
1642
0
      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
1643
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1644
0
    dataptr[DCTSIZE*4] = (DCTELEM)
1645
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
1646
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1647
0
    dataptr[DCTSIZE*2] = (DCTELEM)
1648
0
      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
1649
0
        MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
1650
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1651
1652
    /* Odd part */
1653
1654
0
    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
1655
0
    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
1656
0
    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
1657
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
1658
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
1659
0
    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
1660
0
      + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
1661
0
    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
1662
0
    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
1663
0
      + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
1664
0
    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
1665
0
      - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
1666
0
    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
1667
0
      - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
1668
1669
0
    dataptr[DCTSIZE*1] = (DCTELEM)
1670
0
      DESCALE(tmp10, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1671
0
    dataptr[DCTSIZE*3] = (DCTELEM)
1672
0
      DESCALE(tmp11, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1673
0
    dataptr[DCTSIZE*5] = (DCTELEM)
1674
0
      DESCALE(tmp12, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1675
0
    dataptr[DCTSIZE*7] = (DCTELEM)
1676
0
      DESCALE(tmp13, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1677
1678
0
    dataptr++;      /* advance pointer to next column */
1679
0
    wsptr++;      /* advance pointer to next column */
1680
0
  }
1681
0
}
1682
1683
1684
/*
1685
 * Perform the forward DCT on a 13x13 sample block.
1686
 */
1687
1688
GLOBAL(void)
1689
jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1690
0
{
1691
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1692
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1693
0
  INT32 z1, z2;
1694
0
  DCTELEM workspace[8*5];
1695
0
  DCTELEM *dataptr;
1696
0
  DCTELEM *wsptr;
1697
0
  JSAMPROW elemptr;
1698
0
  int ctr;
1699
0
  SHIFT_TEMPS
1700
1701
  /* Pass 1: process rows.
1702
   * Note results are scaled up by sqrt(8) compared to a true DCT.
1703
   * cK represents sqrt(2) * cos(K*pi/26).
1704
   */
1705
1706
0
  dataptr = data;
1707
0
  ctr = 0;
1708
0
  for (;;) {
1709
0
    elemptr = sample_data[ctr] + start_col;
1710
1711
    /* Even part */
1712
1713
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1714
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1715
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1716
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1717
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1718
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1719
0
    tmp6 = GETJSAMPLE(elemptr[6]);
1720
1721
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1722
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1723
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1724
0
    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1725
0
    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1726
0
    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1727
1728
    /* Apply unsigned->signed conversion. */
1729
0
    dataptr[0] = (DCTELEM)
1730
0
      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1731
0
    tmp6 += tmp6;
1732
0
    tmp0 -= tmp6;
1733
0
    tmp1 -= tmp6;
1734
0
    tmp2 -= tmp6;
1735
0
    tmp3 -= tmp6;
1736
0
    tmp4 -= tmp6;
1737
0
    tmp5 -= tmp6;
1738
0
    dataptr[2] = (DCTELEM)
1739
0
      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
1740
0
        MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
1741
0
        MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
1742
0
        MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
1743
0
        MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
1744
0
        MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
1745
0
        CONST_BITS);
1746
0
    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1747
0
   MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1748
0
   MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
1749
0
    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
1750
0
   MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
1751
0
   MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
1752
1753
0
    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
1754
0
    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
1755
1756
    /* Odd part */
1757
1758
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
1759
0
    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
1760
0
    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
1761
0
     MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
1762
0
    tmp0 = tmp1 + tmp2 + tmp3 -
1763
0
     MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
1764
0
     MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
1765
0
    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
1766
0
     MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
1767
0
    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
1768
0
    tmp1 += tmp4 + tmp5 +
1769
0
      MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
1770
0
      MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
1771
0
    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
1772
0
    tmp2 += tmp4 + tmp6 -
1773
0
      MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
1774
0
      MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
1775
0
    tmp3 += tmp5 + tmp6 +
1776
0
      MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
1777
0
      MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
1778
1779
0
    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
1780
0
    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
1781
0
    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
1782
0
    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
1783
1784
0
    ctr++;
1785
1786
0
    if (ctr != DCTSIZE) {
1787
0
      if (ctr == 13)
1788
0
  break;     /* Done. */
1789
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1790
0
    } else
1791
0
      dataptr = workspace; /* switch pointer to extended workspace */
1792
0
  }
1793
1794
  /* Pass 2: process columns.
1795
   * We leave the results scaled up by an overall factor of 8.
1796
   * We must also scale the output by (8/13)**2 = 64/169, which we partially
1797
   * fold into the constant multipliers and final shifting:
1798
   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
1799
   */
1800
1801
0
  dataptr = data;
1802
0
  wsptr = workspace;
1803
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1804
    /* Even part */
1805
1806
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
1807
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
1808
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
1809
0
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
1810
0
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
1811
0
    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
1812
0
    tmp6 = dataptr[DCTSIZE*6];
1813
1814
0
    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
1815
0
    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
1816
0
    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
1817
0
    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
1818
0
    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
1819
0
    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
1820
1821
0
    dataptr[DCTSIZE*0] = (DCTELEM)
1822
0
      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
1823
0
           FIX(0.757396450)),          /* 128/169 */
1824
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1825
0
    tmp6 += tmp6;
1826
0
    tmp0 -= tmp6;
1827
0
    tmp1 -= tmp6;
1828
0
    tmp2 -= tmp6;
1829
0
    tmp3 -= tmp6;
1830
0
    tmp4 -= tmp6;
1831
0
    tmp5 -= tmp6;
1832
0
    dataptr[DCTSIZE*2] = (DCTELEM)
1833
0
      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
1834
0
        MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
1835
0
        MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
1836
0
        MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
1837
0
        MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
1838
0
        MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
1839
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1840
0
    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
1841
0
   MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
1842
0
   MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
1843
0
    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
1844
0
   MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
1845
0
   MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
1846
1847
0
    dataptr[DCTSIZE*4] = (DCTELEM)
1848
0
      DESCALE(z1 + z2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1849
0
    dataptr[DCTSIZE*6] = (DCTELEM)
1850
0
      DESCALE(z1 - z2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1851
1852
    /* Odd part */
1853
1854
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
1855
0
    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
1856
0
    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
1857
0
     MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
1858
0
    tmp0 = tmp1 + tmp2 + tmp3 -
1859
0
     MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
1860
0
     MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
1861
0
    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
1862
0
     MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
1863
0
    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
1864
0
    tmp1 += tmp4 + tmp5 +
1865
0
      MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
1866
0
      MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
1867
0
    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
1868
0
    tmp2 += tmp4 + tmp6 -
1869
0
      MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
1870
0
      MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
1871
0
    tmp3 += tmp5 + tmp6 +
1872
0
      MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
1873
0
      MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
1874
1875
0
    dataptr[DCTSIZE*1] = (DCTELEM)
1876
0
      DESCALE(tmp0, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1877
0
    dataptr[DCTSIZE*3] = (DCTELEM)
1878
0
      DESCALE(tmp1, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1879
0
    dataptr[DCTSIZE*5] = (DCTELEM)
1880
0
      DESCALE(tmp2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1881
0
    dataptr[DCTSIZE*7] = (DCTELEM)
1882
0
      DESCALE(tmp3, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1883
1884
0
    dataptr++;      /* advance pointer to next column */
1885
0
    wsptr++;      /* advance pointer to next column */
1886
0
  }
1887
0
}
1888
1889
1890
/*
1891
 * Perform the forward DCT on a 14x14 sample block.
1892
 */
1893
1894
GLOBAL(void)
1895
jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1896
0
{
1897
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1898
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1899
0
  DCTELEM workspace[8*6];
1900
0
  DCTELEM *dataptr;
1901
0
  DCTELEM *wsptr;
1902
0
  JSAMPROW elemptr;
1903
0
  int ctr;
1904
0
  SHIFT_TEMPS
1905
1906
  /* Pass 1: process rows.
1907
   * Note results are scaled up by sqrt(8) compared to a true DCT.
1908
   * cK represents sqrt(2) * cos(K*pi/28).
1909
   */
1910
1911
0
  dataptr = data;
1912
0
  ctr = 0;
1913
0
  for (;;) {
1914
0
    elemptr = sample_data[ctr] + start_col;
1915
1916
    /* Even part */
1917
1918
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1919
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1920
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1921
0
    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1922
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1923
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1924
0
    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1925
1926
0
    tmp10 = tmp0 + tmp6;
1927
0
    tmp14 = tmp0 - tmp6;
1928
0
    tmp11 = tmp1 + tmp5;
1929
0
    tmp15 = tmp1 - tmp5;
1930
0
    tmp12 = tmp2 + tmp4;
1931
0
    tmp16 = tmp2 - tmp4;
1932
1933
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1934
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1935
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1936
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1937
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1938
0
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1939
0
    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1940
1941
    /* Apply unsigned->signed conversion. */
1942
0
    dataptr[0] = (DCTELEM)
1943
0
      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1944
0
    tmp13 += tmp13;
1945
0
    dataptr[4] = (DCTELEM)
1946
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1947
0
        MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1948
0
        MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
1949
0
        CONST_BITS);
1950
1951
0
    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
1952
1953
0
    dataptr[2] = (DCTELEM)
1954
0
      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
1955
0
        + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
1956
0
        CONST_BITS);
1957
0
    dataptr[6] = (DCTELEM)
1958
0
      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
1959
0
        - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
1960
0
        CONST_BITS);
1961
1962
    /* Odd part */
1963
1964
0
    tmp10 = tmp1 + tmp2;
1965
0
    tmp11 = tmp5 - tmp4;
1966
0
    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
1967
0
    tmp3 <<= CONST_BITS;
1968
0
    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
1969
0
    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
1970
0
    tmp10 += tmp11 - tmp3;
1971
0
    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
1972
0
      MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
1973
0
    dataptr[5] = (DCTELEM)
1974
0
      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
1975
0
        + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
1976
0
        CONST_BITS);
1977
0
    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
1978
0
      MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
1979
0
    dataptr[3] = (DCTELEM)
1980
0
      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
1981
0
        - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
1982
0
        CONST_BITS);
1983
0
    dataptr[1] = (DCTELEM)
1984
0
      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
1985
0
        MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
1986
0
        CONST_BITS);
1987
1988
0
    ctr++;
1989
1990
0
    if (ctr != DCTSIZE) {
1991
0
      if (ctr == 14)
1992
0
  break;     /* Done. */
1993
0
      dataptr += DCTSIZE; /* advance pointer to next row */
1994
0
    } else
1995
0
      dataptr = workspace; /* switch pointer to extended workspace */
1996
0
  }
1997
1998
  /* Pass 2: process columns.
1999
   * We leave the results scaled up by an overall factor of 8.
2000
   * We must also scale the output by (8/14)**2 = 16/49, which we partially
2001
   * fold into the constant multipliers and final shifting:
2002
   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
2003
   */
2004
2005
0
  dataptr = data;
2006
0
  wsptr = workspace;
2007
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2008
    /* Even part */
2009
2010
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
2011
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
2012
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
2013
0
    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
2014
0
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
2015
0
    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
2016
0
    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
2017
2018
0
    tmp10 = tmp0 + tmp6;
2019
0
    tmp14 = tmp0 - tmp6;
2020
0
    tmp11 = tmp1 + tmp5;
2021
0
    tmp15 = tmp1 - tmp5;
2022
0
    tmp12 = tmp2 + tmp4;
2023
0
    tmp16 = tmp2 - tmp4;
2024
2025
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
2026
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
2027
0
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
2028
0
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
2029
0
    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
2030
0
    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
2031
0
    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
2032
2033
0
    dataptr[DCTSIZE*0] = (DCTELEM)
2034
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
2035
0
           FIX(0.653061224)),                 /* 32/49 */
2036
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2037
0
    tmp13 += tmp13;
2038
0
    dataptr[DCTSIZE*4] = (DCTELEM)
2039
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
2040
0
        MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
2041
0
        MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
2042
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2043
2044
0
    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
2045
2046
0
    dataptr[DCTSIZE*2] = (DCTELEM)
2047
0
      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
2048
0
        + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
2049
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2050
0
    dataptr[DCTSIZE*6] = (DCTELEM)
2051
0
      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
2052
0
        - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
2053
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2054
2055
    /* Odd part */
2056
2057
0
    tmp10 = tmp1 + tmp2;
2058
0
    tmp11 = tmp5 - tmp4;
2059
0
    dataptr[DCTSIZE*7] = (DCTELEM)
2060
0
      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
2061
0
           FIX(0.653061224)),                 /* 32/49 */
2062
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2063
0
    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
2064
0
    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
2065
0
    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
2066
0
    tmp10 += tmp11 - tmp3;
2067
0
    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
2068
0
      MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
2069
0
    dataptr[DCTSIZE*5] = (DCTELEM)
2070
0
      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
2071
0
        + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
2072
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2073
0
    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
2074
0
      MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
2075
0
    dataptr[DCTSIZE*3] = (DCTELEM)
2076
0
      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
2077
0
        - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
2078
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2079
0
    dataptr[DCTSIZE*1] = (DCTELEM)
2080
0
      DESCALE(tmp11 + tmp12 + tmp3
2081
0
        - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
2082
0
        - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
2083
0
        CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2084
2085
0
    dataptr++;      /* advance pointer to next column */
2086
0
    wsptr++;      /* advance pointer to next column */
2087
0
  }
2088
0
}
2089
2090
2091
/*
2092
 * Perform the forward DCT on a 15x15 sample block.
2093
 */
2094
2095
GLOBAL(void)
2096
jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2097
0
{
2098
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2099
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2100
0
  INT32 z1, z2, z3;
2101
0
  DCTELEM workspace[8*7];
2102
0
  DCTELEM *dataptr;
2103
0
  DCTELEM *wsptr;
2104
0
  JSAMPROW elemptr;
2105
0
  int ctr;
2106
0
  SHIFT_TEMPS
2107
2108
  /* Pass 1: process rows.
2109
   * Note results are scaled up by sqrt(8) compared to a true DCT.
2110
   * cK represents sqrt(2) * cos(K*pi/30).
2111
   */
2112
2113
0
  dataptr = data;
2114
0
  ctr = 0;
2115
0
  for (;;) {
2116
0
    elemptr = sample_data[ctr] + start_col;
2117
2118
    /* Even part */
2119
2120
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2121
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2122
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2123
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2124
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2125
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2126
0
    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2127
0
    tmp7 = GETJSAMPLE(elemptr[7]);
2128
2129
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2130
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2131
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2132
0
    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2133
0
    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2134
0
    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2135
0
    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2136
2137
0
    z1 = tmp0 + tmp4 + tmp5;
2138
0
    z2 = tmp1 + tmp3 + tmp6;
2139
0
    z3 = tmp2 + tmp7;
2140
    /* Apply unsigned->signed conversion. */
2141
0
    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2142
0
    z3 += z3;
2143
0
    dataptr[6] = (DCTELEM)
2144
0
      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2145
0
        MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
2146
0
        CONST_BITS);
2147
0
    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2148
0
    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
2149
0
         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
2150
0
    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
2151
0
   MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
2152
0
    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
2153
0
   MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
2154
0
   MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
2155
2156
0
    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2157
0
    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2158
2159
    /* Odd part */
2160
2161
0
    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2162
0
        FIX(1.224744871));                         /* c5 */
2163
0
    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
2164
0
     MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
2165
0
    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
2166
0
    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
2167
0
     MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
2168
0
     MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
2169
0
    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
2170
0
     MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
2171
0
     MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
2172
0
    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
2173
0
     MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
2174
0
     MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
2175
2176
0
    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
2177
0
    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
2178
0
    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
2179
0
    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
2180
2181
0
    ctr++;
2182
2183
0
    if (ctr != DCTSIZE) {
2184
0
      if (ctr == 15)
2185
0
  break;     /* Done. */
2186
0
      dataptr += DCTSIZE; /* advance pointer to next row */
2187
0
    } else
2188
0
      dataptr = workspace; /* switch pointer to extended workspace */
2189
0
  }
2190
2191
  /* Pass 2: process columns.
2192
   * We leave the results scaled up by an overall factor of 8.
2193
   * We must also scale the output by (8/15)**2 = 64/225, which we partially
2194
   * fold into the constant multipliers and final shifting:
2195
   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
2196
   */
2197
2198
0
  dataptr = data;
2199
0
  wsptr = workspace;
2200
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2201
    /* Even part */
2202
2203
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
2204
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
2205
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
2206
0
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
2207
0
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
2208
0
    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
2209
0
    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
2210
0
    tmp7 = dataptr[DCTSIZE*7];
2211
2212
0
    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
2213
0
    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
2214
0
    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
2215
0
    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
2216
0
    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
2217
0
    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
2218
0
    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
2219
2220
0
    z1 = tmp0 + tmp4 + tmp5;
2221
0
    z2 = tmp1 + tmp3 + tmp6;
2222
0
    z3 = tmp2 + tmp7;
2223
0
    dataptr[DCTSIZE*0] = (DCTELEM)
2224
0
      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
2225
0
        CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2226
0
    z3 += z3;
2227
0
    dataptr[DCTSIZE*6] = (DCTELEM)
2228
0
      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
2229
0
        MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
2230
0
        CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2231
0
    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2232
0
    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
2233
0
         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
2234
0
    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
2235
0
   MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
2236
0
    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
2237
0
   MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
2238
0
   MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
2239
2240
0
    dataptr[DCTSIZE*2] = (DCTELEM)
2241
0
      DESCALE(z1 + z3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2242
0
    dataptr[DCTSIZE*4] = (DCTELEM)
2243
0
      DESCALE(z2 + z3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2244
2245
    /* Odd part */
2246
2247
0
    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2248
0
        FIX(1.393487498));                         /* c5 */
2249
0
    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
2250
0
     MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
2251
0
    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
2252
0
    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
2253
0
     MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
2254
0
     MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
2255
0
    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
2256
0
     MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
2257
0
     MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
2258
0
    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
2259
0
     MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
2260
0
     MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
2261
2262
0
    dataptr[DCTSIZE*1] = (DCTELEM)
2263
0
      DESCALE(tmp0, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2264
0
    dataptr[DCTSIZE*3] = (DCTELEM)
2265
0
      DESCALE(tmp1, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2266
0
    dataptr[DCTSIZE*5] = (DCTELEM)
2267
0
      DESCALE(tmp2, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2268
0
    dataptr[DCTSIZE*7] = (DCTELEM)
2269
0
      DESCALE(tmp3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2270
2271
0
    dataptr++;      /* advance pointer to next column */
2272
0
    wsptr++;      /* advance pointer to next column */
2273
0
  }
2274
0
}
2275
2276
2277
/*
2278
 * Perform the forward DCT on a 16x16 sample block.
2279
 */
2280
2281
GLOBAL(void)
2282
jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2283
771k
{
2284
771k
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2285
771k
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2286
771k
  DCTELEM workspace[DCTSIZE2];
2287
771k
  DCTELEM *dataptr;
2288
771k
  DCTELEM *wsptr;
2289
771k
  JSAMPROW elemptr;
2290
771k
  int ctr;
2291
771k
  SHIFT_TEMPS
2292
2293
  /* Pass 1: process rows.
2294
   * Note results are scaled up by sqrt(8) compared to a true DCT;
2295
   * furthermore, we scale the results by 2**PASS1_BITS.
2296
   * cK represents sqrt(2) * cos(K*pi/32).
2297
   */
2298
2299
771k
  dataptr = data;
2300
771k
  ctr = 0;
2301
12.3M
  for (;;) {
2302
12.3M
    elemptr = sample_data[ctr] + start_col;
2303
2304
    /* Even part */
2305
2306
12.3M
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2307
12.3M
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2308
12.3M
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2309
12.3M
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2310
12.3M
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2311
12.3M
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2312
12.3M
    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2313
12.3M
    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2314
2315
12.3M
    tmp10 = tmp0 + tmp7;
2316
12.3M
    tmp14 = tmp0 - tmp7;
2317
12.3M
    tmp11 = tmp1 + tmp6;
2318
12.3M
    tmp15 = tmp1 - tmp6;
2319
12.3M
    tmp12 = tmp2 + tmp5;
2320
12.3M
    tmp16 = tmp2 - tmp5;
2321
12.3M
    tmp13 = tmp3 + tmp4;
2322
12.3M
    tmp17 = tmp3 - tmp4;
2323
2324
12.3M
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2325
12.3M
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2326
12.3M
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2327
12.3M
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2328
12.3M
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2329
12.3M
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2330
12.3M
    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2331
12.3M
    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2332
2333
    /* Apply unsigned->signed conversion. */
2334
12.3M
    dataptr[0] =
2335
12.3M
      PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE);
2336
12.3M
    dataptr[4] = (DCTELEM)
2337
12.3M
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2338
12.3M
        MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
2339
12.3M
        CONST_BITS-PASS1_BITS);
2340
2341
12.3M
    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
2342
12.3M
      MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
2343
2344
12.3M
    dataptr[2] = (DCTELEM)
2345
12.3M
      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
2346
12.3M
        + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
2347
12.3M
        CONST_BITS-PASS1_BITS);
2348
12.3M
    dataptr[6] = (DCTELEM)
2349
12.3M
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
2350
12.3M
        - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
2351
12.3M
        CONST_BITS-PASS1_BITS);
2352
2353
    /* Odd part */
2354
2355
12.3M
    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
2356
12.3M
      MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
2357
12.3M
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
2358
12.3M
      MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
2359
12.3M
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
2360
12.3M
      MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
2361
12.3M
    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
2362
12.3M
      MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
2363
12.3M
    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
2364
12.3M
      MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
2365
12.3M
    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
2366
12.3M
      MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
2367
12.3M
    tmp10 = tmp11 + tmp12 + tmp13 -
2368
12.3M
      MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
2369
12.3M
      MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
2370
12.3M
    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2371
12.3M
       - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
2372
12.3M
    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2373
12.3M
       + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
2374
12.3M
    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2375
12.3M
       + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
2376
2377
12.3M
    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2378
12.3M
    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2379
12.3M
    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2380
12.3M
    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2381
2382
12.3M
    ctr++;
2383
2384
12.3M
    if (ctr != DCTSIZE) {
2385
11.5M
      if (ctr == DCTSIZE * 2)
2386
771k
  break;      /* Done. */
2387
10.7M
      dataptr += DCTSIZE; /* advance pointer to next row */
2388
10.7M
    } else
2389
771k
      dataptr = workspace; /* switch pointer to extended workspace */
2390
12.3M
  }
2391
2392
  /* Pass 2: process columns.
2393
   * We apply the PASS2_BITS scaling, but leave the
2394
   * results scaled up by an overall factor of 8.
2395
   * We must also scale the output by (8/16)**2 = 1/2**2.
2396
   * cK represents sqrt(2) * cos(K*pi/32).
2397
   */
2398
2399
771k
  dataptr = data;
2400
771k
  wsptr = workspace;
2401
6.94M
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2402
    /* Even part */
2403
2404
6.17M
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2405
6.17M
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2406
6.17M
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2407
6.17M
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2408
6.17M
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2409
6.17M
    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2410
6.17M
    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2411
6.17M
    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2412
2413
6.17M
    tmp10 = tmp0 + tmp7;
2414
6.17M
    tmp14 = tmp0 - tmp7;
2415
6.17M
    tmp11 = tmp1 + tmp6;
2416
6.17M
    tmp15 = tmp1 - tmp6;
2417
6.17M
    tmp12 = tmp2 + tmp5;
2418
6.17M
    tmp16 = tmp2 - tmp5;
2419
6.17M
    tmp13 = tmp3 + tmp4;
2420
6.17M
    tmp17 = tmp3 - tmp4;
2421
2422
6.17M
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
2423
6.17M
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
2424
6.17M
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
2425
6.17M
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
2426
6.17M
    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
2427
6.17M
    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
2428
6.17M
    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
2429
6.17M
    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
2430
2431
6.17M
    dataptr[DCTSIZE*0] = (DCTELEM)
2432
6.17M
      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS2_BITS+2);
2433
6.17M
    dataptr[DCTSIZE*4] = (DCTELEM)
2434
6.17M
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2435
6.17M
        MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
2436
6.17M
        CONST_BITS+PASS2_BITS+2);
2437
2438
6.17M
    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
2439
6.17M
      MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
2440
2441
6.17M
    dataptr[DCTSIZE*2] = (DCTELEM)
2442
6.17M
      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
2443
6.17M
        + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
2444
6.17M
        CONST_BITS+PASS2_BITS+2);
2445
6.17M
    dataptr[DCTSIZE*6] = (DCTELEM)
2446
6.17M
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
2447
6.17M
        - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
2448
6.17M
        CONST_BITS+PASS2_BITS+2);
2449
2450
    /* Odd part */
2451
2452
6.17M
    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
2453
6.17M
      MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
2454
6.17M
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
2455
6.17M
      MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
2456
6.17M
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
2457
6.17M
      MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
2458
6.17M
    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
2459
6.17M
      MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
2460
6.17M
    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
2461
6.17M
      MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
2462
6.17M
    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
2463
6.17M
      MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
2464
6.17M
    tmp10 = tmp11 + tmp12 + tmp13 -
2465
6.17M
      MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
2466
6.17M
      MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
2467
6.17M
    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2468
6.17M
       - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
2469
6.17M
    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2470
6.17M
       + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
2471
6.17M
    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2472
6.17M
       + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
2473
2474
6.17M
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS+2);
2475
6.17M
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS+2);
2476
6.17M
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS+2);
2477
6.17M
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS+2);
2478
2479
6.17M
    dataptr++;      /* advance pointer to next column */
2480
6.17M
    wsptr++;      /* advance pointer to next column */
2481
6.17M
  }
2482
771k
}
2483
2484
2485
/*
2486
 * Perform the forward DCT on a 16x8 sample block.
2487
 *
2488
 * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2489
 */
2490
2491
GLOBAL(void)
2492
jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2493
0
{
2494
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2495
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2496
0
  INT32 z1;
2497
0
  DCTELEM *dataptr;
2498
0
  JSAMPROW elemptr;
2499
0
  int ctr;
2500
0
  SHIFT_TEMPS
2501
2502
  /* Pass 1: process rows.
2503
   * Note results are scaled up by sqrt(8) compared to a true DCT;
2504
   * furthermore, we scale the results by 2**PASS1_BITS.
2505
   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2506
   */
2507
2508
0
  dataptr = data;
2509
0
  ctr = 0;
2510
0
  for (ctr = 0; ctr < DCTSIZE; ctr++) {
2511
0
    elemptr = sample_data[ctr] + start_col;
2512
2513
    /* Even part */
2514
2515
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2516
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2517
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2518
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2519
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2520
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2521
0
    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2522
0
    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2523
2524
0
    tmp10 = tmp0 + tmp7;
2525
0
    tmp14 = tmp0 - tmp7;
2526
0
    tmp11 = tmp1 + tmp6;
2527
0
    tmp15 = tmp1 - tmp6;
2528
0
    tmp12 = tmp2 + tmp5;
2529
0
    tmp16 = tmp2 - tmp5;
2530
0
    tmp13 = tmp3 + tmp4;
2531
0
    tmp17 = tmp3 - tmp4;
2532
2533
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2534
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2535
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2536
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2537
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2538
0
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2539
0
    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2540
0
    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2541
2542
    /* Apply unsigned->signed conversion. */
2543
0
    dataptr[0] =
2544
0
      PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE);
2545
0
    dataptr[4] = (DCTELEM)
2546
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2547
0
        MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
2548
0
        CONST_BITS-PASS1_BITS);
2549
2550
0
    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
2551
0
      MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
2552
2553
0
    dataptr[2] = (DCTELEM)
2554
0
      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
2555
0
        + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
2556
0
        CONST_BITS-PASS1_BITS);
2557
0
    dataptr[6] = (DCTELEM)
2558
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
2559
0
        - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
2560
0
        CONST_BITS-PASS1_BITS);
2561
2562
    /* Odd part */
2563
2564
0
    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
2565
0
      MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
2566
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
2567
0
      MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
2568
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
2569
0
      MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
2570
0
    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
2571
0
      MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
2572
0
    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
2573
0
      MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
2574
0
    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
2575
0
      MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
2576
0
    tmp10 = tmp11 + tmp12 + tmp13 -
2577
0
      MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
2578
0
      MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
2579
0
    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2580
0
       - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
2581
0
    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2582
0
       + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
2583
0
    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2584
0
       + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
2585
2586
0
    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2587
0
    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2588
0
    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2589
0
    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2590
2591
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
2592
0
  }
2593
2594
  /* Pass 2: process columns.
2595
   * We apply the PASS2_BITS scaling, but leave the
2596
   * results scaled up by an overall factor of 8.
2597
   * We must also scale the output by 8/16 = 1/2.
2598
   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2599
   */
2600
2601
0
  dataptr = data;
2602
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2603
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
2604
     * rotator "c1" should be "c6".
2605
     */
2606
2607
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2608
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2609
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2610
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2611
2612
    /* Add fudge factor here for final descale. */
2613
0
#if PASS2_BITS > 0
2614
0
    tmp10 = tmp0 + tmp3 + (ONE << PASS2_BITS);
2615
#else
2616
    tmp10 = tmp0 + tmp3 + ONE;
2617
#endif
2618
0
    tmp12 = tmp0 - tmp3;
2619
0
    tmp11 = tmp1 + tmp2;
2620
0
    tmp13 = tmp1 - tmp2;
2621
2622
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2623
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2624
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2625
0
    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2626
2627
0
    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS2_BITS+1);
2628
0
    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS2_BITS+1);
2629
2630
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
2631
    /* Add fudge factor here for final descale. */
2632
0
    z1 += ONE << (CONST_BITS+PASS2_BITS);
2633
2634
0
    dataptr[DCTSIZE*2] = (DCTELEM)
2635
0
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
2636
0
      CONST_BITS+PASS2_BITS+1);
2637
0
    dataptr[DCTSIZE*6] = (DCTELEM)
2638
0
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
2639
0
      CONST_BITS+PASS2_BITS+1);
2640
2641
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2642
     * i0..i3 in the paper are tmp0..tmp3 here.
2643
     */
2644
2645
0
    tmp12 = tmp0 + tmp2;
2646
0
    tmp13 = tmp1 + tmp3;
2647
2648
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
2649
    /* Add fudge factor here for final descale. */
2650
0
    z1 += ONE << (CONST_BITS+PASS2_BITS);
2651
2652
0
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
2653
0
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
2654
0
    tmp12 += z1;
2655
0
    tmp13 += z1;
2656
2657
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
2658
0
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
2659
0
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
2660
0
    tmp0 += z1 + tmp12;
2661
0
    tmp3 += z1 + tmp13;
2662
2663
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
2664
0
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
2665
0
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
2666
0
    tmp1 += z1 + tmp13;
2667
0
    tmp2 += z1 + tmp12;
2668
2669
0
    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS+1);
2670
0
    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS+1);
2671
0
    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS+1);
2672
0
    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS+1);
2673
2674
0
    dataptr++;      /* advance pointer to next column */
2675
0
  }
2676
0
}
2677
2678
2679
/*
2680
 * Perform the forward DCT on a 14x7 sample block.
2681
 *
2682
 * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2683
 */
2684
2685
GLOBAL(void)
2686
jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2687
0
{
2688
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2689
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2690
0
  INT32 z1, z2, z3;
2691
0
  DCTELEM *dataptr;
2692
0
  JSAMPROW elemptr;
2693
0
  int ctr;
2694
0
  SHIFT_TEMPS
2695
2696
  /* Zero bottom row of output coefficient block. */
2697
0
  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2698
2699
  /* Pass 1: process rows.
2700
   * Note results are scaled up by sqrt(8) compared to a true DCT;
2701
   * furthermore, we scale the results by 2**PASS1_BITS.
2702
   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
2703
   */
2704
2705
0
  dataptr = data;
2706
0
  for (ctr = 0; ctr < 7; ctr++) {
2707
0
    elemptr = sample_data[ctr] + start_col;
2708
2709
    /* Even part */
2710
2711
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2712
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2713
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2714
0
    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2715
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2716
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2717
0
    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2718
2719
0
    tmp10 = tmp0 + tmp6;
2720
0
    tmp14 = tmp0 - tmp6;
2721
0
    tmp11 = tmp1 + tmp5;
2722
0
    tmp15 = tmp1 - tmp5;
2723
0
    tmp12 = tmp2 + tmp4;
2724
0
    tmp16 = tmp2 - tmp4;
2725
2726
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2727
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2728
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2729
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2730
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2731
0
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2732
0
    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2733
2734
    /* Apply unsigned->signed conversion. */
2735
0
    dataptr[0] =
2736
0
      PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
2737
0
    tmp13 += tmp13;
2738
0
    dataptr[4] = (DCTELEM)
2739
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2740
0
        MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2741
0
        MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
2742
0
        CONST_BITS-PASS1_BITS);
2743
2744
0
    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
2745
2746
0
    dataptr[2] = (DCTELEM)
2747
0
      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
2748
0
        + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
2749
0
        CONST_BITS-PASS1_BITS);
2750
0
    dataptr[6] = (DCTELEM)
2751
0
      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
2752
0
        - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
2753
0
        CONST_BITS-PASS1_BITS);
2754
2755
    /* Odd part */
2756
2757
0
    tmp10 = tmp1 + tmp2;
2758
0
    tmp11 = tmp5 - tmp4;
2759
0
    dataptr[7] = PASS1_OUTPUT(tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
2760
0
    tmp3 <<= CONST_BITS;
2761
0
    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
2762
0
    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
2763
0
    tmp10 += tmp11 - tmp3;
2764
0
    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
2765
0
      MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
2766
0
    dataptr[5] = (DCTELEM)
2767
0
      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
2768
0
        + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
2769
0
        CONST_BITS-PASS1_BITS);
2770
0
    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
2771
0
      MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
2772
0
    dataptr[3] = (DCTELEM)
2773
0
      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
2774
0
        - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
2775
0
        CONST_BITS-PASS1_BITS);
2776
0
    dataptr[1] = (DCTELEM)
2777
0
      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
2778
0
        MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
2779
0
        CONST_BITS-PASS1_BITS);
2780
2781
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
2782
0
  }
2783
2784
  /* Pass 2: process columns.
2785
   * We apply the PASS2_BITS scaling, but leave the
2786
   * results scaled up by an overall factor of 8.
2787
   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
2788
   * partially fold into the constant multipliers and final shifting:
2789
   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
2790
   */
2791
2792
0
  dataptr = data;
2793
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2794
    /* Even part */
2795
2796
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
2797
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
2798
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
2799
0
    tmp3 = dataptr[DCTSIZE*3];
2800
2801
0
    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
2802
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
2803
0
    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
2804
2805
0
    z1 = tmp0 + tmp2;
2806
0
    dataptr[DCTSIZE*0] = (DCTELEM)
2807
0
      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
2808
0
        CONST_BITS+PASS2_BITS+1);
2809
0
    tmp3 += tmp3;
2810
0
    z1 -= tmp3;
2811
0
    z1 -= tmp3;
2812
0
    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
2813
0
    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
2814
0
    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
2815
0
    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS2_BITS+1);
2816
0
    z1 -= z2;
2817
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
2818
0
    dataptr[DCTSIZE*4] = (DCTELEM)
2819
0
      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
2820
0
        CONST_BITS+PASS2_BITS+1);
2821
0
    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS2_BITS+1);
2822
2823
    /* Odd part */
2824
2825
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
2826
0
    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
2827
0
    tmp0 = tmp1 - tmp2;
2828
0
    tmp1 += tmp2;
2829
0
    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
2830
0
    tmp1 += tmp2;
2831
0
    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
2832
0
    tmp0 += tmp3;
2833
0
    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
2834
2835
0
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS2_BITS+1);
2836
0
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS2_BITS+1);
2837
0
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS2_BITS+1);
2838
2839
0
    dataptr++;      /* advance pointer to next column */
2840
0
  }
2841
0
}
2842
2843
2844
/*
2845
 * Perform the forward DCT on a 12x6 sample block.
2846
 *
2847
 * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2848
 */
2849
2850
GLOBAL(void)
2851
jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2852
0
{
2853
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2854
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2855
0
  DCTELEM *dataptr;
2856
0
  JSAMPROW elemptr;
2857
0
  int ctr;
2858
0
  SHIFT_TEMPS
2859
2860
  /* Zero 2 bottom rows of output coefficient block. */
2861
0
  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2862
2863
  /* Pass 1: process rows.
2864
   * Note results are scaled up by sqrt(8) compared to a true DCT;
2865
   * furthermore, we scale the results by 2**PASS1_BITS.
2866
   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
2867
   */
2868
2869
0
  dataptr = data;
2870
0
  for (ctr = 0; ctr < 6; ctr++) {
2871
0
    elemptr = sample_data[ctr] + start_col;
2872
2873
    /* Even part */
2874
2875
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2876
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2877
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2878
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2879
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2880
0
    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2881
2882
0
    tmp10 = tmp0 + tmp5;
2883
0
    tmp13 = tmp0 - tmp5;
2884
0
    tmp11 = tmp1 + tmp4;
2885
0
    tmp14 = tmp1 - tmp4;
2886
0
    tmp12 = tmp2 + tmp3;
2887
0
    tmp15 = tmp2 - tmp3;
2888
2889
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2890
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2891
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2892
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2893
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2894
0
    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2895
2896
    /* Apply unsigned->signed conversion. */
2897
0
    dataptr[0] =
2898
0
      PASS1_OUTPUT(tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
2899
0
    dataptr[6] = PASS1_OUTPUT(tmp13 - tmp14 - tmp15);
2900
0
    dataptr[4] = (DCTELEM)
2901
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2902
0
        CONST_BITS-PASS1_BITS);
2903
0
    dataptr[2] = (DCTELEM)
2904
0
      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2905
0
        CONST_BITS-PASS1_BITS);
2906
2907
    /* Odd part */
2908
2909
0
    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
2910
0
    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
2911
0
    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
2912
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
2913
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
2914
0
    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2915
0
      + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
2916
0
    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2917
0
    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
2918
0
      + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
2919
0
    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
2920
0
      - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
2921
0
    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
2922
0
      - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
2923
2924
0
    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2925
0
    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2926
0
    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2927
0
    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2928
2929
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
2930
0
  }
2931
2932
  /* Pass 2: process columns.
2933
   * We apply the PASS2_BITS scaling, but leave the
2934
   * results scaled up by an overall factor of 8.
2935
   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
2936
   * partially fold into the constant multipliers and final shifting:
2937
   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
2938
   */
2939
2940
0
  dataptr = data;
2941
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2942
    /* Even part */
2943
2944
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
2945
0
    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
2946
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
2947
2948
0
    tmp10 = tmp0 + tmp2;
2949
0
    tmp12 = tmp0 - tmp2;
2950
2951
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
2952
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
2953
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
2954
2955
0
    dataptr[DCTSIZE*0] = (DCTELEM)
2956
0
      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
2957
0
        CONST_BITS+PASS2_BITS+1);
2958
0
    dataptr[DCTSIZE*2] = (DCTELEM)
2959
0
      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
2960
0
        CONST_BITS+PASS2_BITS+1);
2961
0
    dataptr[DCTSIZE*4] = (DCTELEM)
2962
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
2963
0
        CONST_BITS+PASS2_BITS+1);
2964
2965
    /* Odd part */
2966
2967
0
    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
2968
2969
0
    dataptr[DCTSIZE*1] = (DCTELEM)
2970
0
      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
2971
0
        CONST_BITS+PASS2_BITS+1);
2972
0
    dataptr[DCTSIZE*3] = (DCTELEM)
2973
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
2974
0
        CONST_BITS+PASS2_BITS+1);
2975
0
    dataptr[DCTSIZE*5] = (DCTELEM)
2976
0
      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
2977
0
        CONST_BITS+PASS2_BITS+1);
2978
2979
0
    dataptr++;      /* advance pointer to next column */
2980
0
  }
2981
0
}
2982
2983
2984
/*
2985
 * Perform the forward DCT on a 10x5 sample block.
2986
 *
2987
 * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2988
 */
2989
2990
GLOBAL(void)
2991
jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2992
0
{
2993
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2994
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2995
0
  DCTELEM *dataptr;
2996
0
  JSAMPROW elemptr;
2997
0
  int ctr;
2998
0
  SHIFT_TEMPS
2999
3000
  /* Zero 3 bottom rows of output coefficient block. */
3001
0
  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
3002
3003
  /* Pass 1: process rows.
3004
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3005
   * furthermore, we scale the results by 2**PASS1_BITS.
3006
   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3007
   */
3008
3009
0
  dataptr = data;
3010
0
  for (ctr = 0; ctr < 5; ctr++) {
3011
0
    elemptr = sample_data[ctr] + start_col;
3012
3013
    /* Even part */
3014
3015
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
3016
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
3017
0
    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
3018
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
3019
0
    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
3020
3021
0
    tmp10 = tmp0 + tmp4;
3022
0
    tmp13 = tmp0 - tmp4;
3023
0
    tmp11 = tmp1 + tmp3;
3024
0
    tmp14 = tmp1 - tmp3;
3025
3026
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
3027
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
3028
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
3029
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
3030
0
    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
3031
3032
    /* Apply unsigned->signed conversion. */
3033
0
    dataptr[0] =
3034
0
      PASS1_OUTPUT(tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE);
3035
0
    tmp12 += tmp12;
3036
0
    dataptr[4] = (DCTELEM)
3037
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
3038
0
        MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
3039
0
        CONST_BITS-PASS1_BITS);
3040
0
    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
3041
0
    dataptr[2] = (DCTELEM)
3042
0
      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
3043
0
        CONST_BITS-PASS1_BITS);
3044
0
    dataptr[6] = (DCTELEM)
3045
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
3046
0
        CONST_BITS-PASS1_BITS);
3047
3048
    /* Odd part */
3049
3050
0
    tmp10 = tmp0 + tmp4;
3051
0
    tmp11 = tmp1 - tmp3;
3052
0
    dataptr[5] = PASS1_OUTPUT(tmp10 - tmp11 - tmp2);
3053
0
    tmp2 <<= CONST_BITS;
3054
0
    dataptr[1] = (DCTELEM)
3055
0
      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
3056
0
        MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
3057
0
        MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
3058
0
        MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
3059
0
        CONST_BITS-PASS1_BITS);
3060
0
    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
3061
0
      MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
3062
0
    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
3063
0
      (tmp11 << (CONST_BITS - 1)) - tmp2;
3064
0
    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
3065
0
    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
3066
3067
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
3068
0
  }
3069
3070
  /* Pass 2: process columns.
3071
   * We apply the PASS2_BITS scaling, but leave the
3072
   * results scaled up by an overall factor of 8.
3073
   * We must also scale the output by (8/10)*(8/5) = 32/25,
3074
   * which we fold into the constant multipliers:
3075
   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
3076
   */
3077
3078
0
  dataptr = data;
3079
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3080
    /* Even part */
3081
3082
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
3083
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
3084
0
    tmp2 = dataptr[DCTSIZE*2];
3085
3086
0
    tmp10 = tmp0 + tmp1;
3087
0
    tmp11 = tmp0 - tmp1;
3088
3089
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
3090
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
3091
3092
0
    dataptr[DCTSIZE*0] = (DCTELEM)
3093
0
      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
3094
0
        CONST_BITS+PASS2_BITS);
3095
0
    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
3096
0
    tmp10 -= tmp2 << 2;
3097
0
    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
3098
0
    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS2_BITS);
3099
0
    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS2_BITS);
3100
3101
    /* Odd part */
3102
3103
0
    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
3104
3105
0
    dataptr[DCTSIZE*1] = (DCTELEM)
3106
0
      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
3107
0
        CONST_BITS+PASS2_BITS);
3108
0
    dataptr[DCTSIZE*3] = (DCTELEM)
3109
0
      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
3110
0
        CONST_BITS+PASS2_BITS);
3111
3112
0
    dataptr++;      /* advance pointer to next column */
3113
0
  }
3114
0
}
3115
3116
3117
/*
3118
 * Perform the forward DCT on an 8x4 sample block.
3119
 *
3120
 * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
3121
 */
3122
3123
GLOBAL(void)
3124
jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3125
0
{
3126
0
  INT32 tmp0, tmp1, tmp2, tmp3;
3127
0
  INT32 tmp10, tmp11, tmp12, tmp13;
3128
0
  INT32 z1;
3129
0
  DCTELEM *dataptr;
3130
0
  JSAMPROW elemptr;
3131
0
  int ctr;
3132
0
  SHIFT_TEMPS
3133
3134
  /* Zero 4 bottom rows of output coefficient block. */
3135
0
  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3136
3137
  /* Pass 1: process rows.
3138
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3139
   * furthermore, we scale the results by 2**PASS1_BITS.
3140
   * We must also scale the output by 8/4 = 2, which we add here.
3141
   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3142
   */
3143
3144
0
  dataptr = data;
3145
0
  for (ctr = 0; ctr < 4; ctr++) {
3146
0
    elemptr = sample_data[ctr] + start_col;
3147
3148
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
3149
     * rotator "c1" should be "c6".
3150
     */
3151
3152
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3153
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3154
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3155
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3156
3157
0
    tmp10 = tmp0 + tmp3;
3158
0
    tmp12 = tmp0 - tmp3;
3159
0
    tmp11 = tmp1 + tmp2;
3160
0
    tmp13 = tmp1 - tmp2;
3161
3162
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3163
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3164
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3165
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3166
3167
    /* Apply unsigned->signed conversion. */
3168
0
    dataptr[0] = (DCTELEM)
3169
0
      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3170
0
    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3171
3172
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
3173
    /* Add fudge factor here for final descale. */
3174
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3175
3176
0
    dataptr[2] = (DCTELEM)
3177
0
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3178
0
      CONST_BITS-PASS1_BITS-1);
3179
0
    dataptr[6] = (DCTELEM)
3180
0
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3181
0
      CONST_BITS-PASS1_BITS-1);
3182
3183
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3184
     * i0..i3 in the paper are tmp0..tmp3 here.
3185
     */
3186
3187
0
    tmp12 = tmp0 + tmp2;
3188
0
    tmp13 = tmp1 + tmp3;
3189
3190
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
3191
    /* Add fudge factor here for final descale. */
3192
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3193
3194
0
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
3195
0
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
3196
0
    tmp12 += z1;
3197
0
    tmp13 += z1;
3198
3199
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
3200
0
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
3201
0
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
3202
0
    tmp0 += z1 + tmp12;
3203
0
    tmp3 += z1 + tmp13;
3204
3205
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
3206
0
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
3207
0
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
3208
0
    tmp1 += z1 + tmp13;
3209
0
    tmp2 += z1 + tmp12;
3210
3211
0
    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
3212
0
    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
3213
0
    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
3214
0
    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
3215
3216
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
3217
0
  }
3218
3219
  /* Pass 2: process columns.
3220
   * We apply the PASS2_BITS scaling, but leave the
3221
   * results scaled up by an overall factor of 8.
3222
   * 4-point FDCT kernel,
3223
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3224
   */
3225
3226
0
  dataptr = data;
3227
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3228
    /* Even part */
3229
3230
    /* Add fudge factor here for final descale. */
3231
0
#if PASS2_BITS > 1
3232
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS2_BITS-1));
3233
#else
3234
#if PASS2_BITS > 0
3235
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + ONE;
3236
#else
3237
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
3238
#endif
3239
#endif
3240
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3241
3242
0
    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3243
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3244
3245
0
    dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp0 + tmp1);
3246
0
    dataptr[DCTSIZE*2] = PASS2_OUTPUT(tmp0 - tmp1);
3247
3248
    /* Odd part */
3249
3250
0
    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
3251
    /* Add fudge factor here for final descale. */
3252
0
    tmp0 += ONE << (CONST_BITS+PASS2_BITS-1);
3253
3254
0
    dataptr[DCTSIZE*1] = (DCTELEM)
3255
0
      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3256
0
      CONST_BITS+PASS2_BITS);
3257
0
    dataptr[DCTSIZE*3] = (DCTELEM)
3258
0
      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3259
0
      CONST_BITS+PASS2_BITS);
3260
3261
0
    dataptr++;      /* advance pointer to next column */
3262
0
  }
3263
0
}
3264
3265
3266
/*
3267
 * Perform the forward DCT on a 6x3 sample block.
3268
 *
3269
 * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3270
 */
3271
3272
GLOBAL(void)
3273
jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3274
0
{
3275
0
  INT32 tmp0, tmp1, tmp2;
3276
0
  INT32 tmp10, tmp11, tmp12;
3277
0
  DCTELEM *dataptr;
3278
0
  JSAMPROW elemptr;
3279
0
  int ctr;
3280
0
  SHIFT_TEMPS
3281
3282
  /* Pre-zero output coefficient block. */
3283
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3284
3285
  /* Pass 1: process rows.
3286
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3287
   * furthermore, we scale the results by 2**PASS1_BITS.
3288
   * We scale the results further by 2 as part of output adaption
3289
   * scaling for different DCT size.
3290
   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3291
   */
3292
3293
0
  dataptr = data;
3294
0
  for (ctr = 0; ctr < 3; ctr++) {
3295
0
    elemptr = sample_data[ctr] + start_col;
3296
3297
    /* Even part */
3298
3299
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3300
0
    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3301
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3302
3303
0
    tmp10 = tmp0 + tmp2;
3304
0
    tmp12 = tmp0 - tmp2;
3305
3306
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3307
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3308
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3309
3310
    /* Apply unsigned->signed conversion. */
3311
0
    dataptr[0] = (DCTELEM)
3312
0
      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3313
0
    dataptr[2] = (DCTELEM)
3314
0
      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
3315
0
        CONST_BITS-PASS1_BITS-1);
3316
0
    dataptr[4] = (DCTELEM)
3317
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3318
0
        CONST_BITS-PASS1_BITS-1);
3319
3320
    /* Odd part */
3321
3322
0
    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
3323
0
        CONST_BITS-PASS1_BITS-1);
3324
3325
0
    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3326
0
    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3327
0
    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3328
3329
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
3330
0
  }
3331
3332
  /* Pass 2: process columns.
3333
   * We apply the PASS2_BITS scaling, but leave the
3334
   * results scaled up by an overall factor of 8.
3335
   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
3336
   * fold into the constant multipliers (other part was done in pass 1):
3337
   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
3338
   */
3339
3340
0
  dataptr = data;
3341
0
  for (ctr = 0; ctr < 6; ctr++) {
3342
    /* Even part */
3343
3344
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
3345
0
    tmp1 = dataptr[DCTSIZE*1];
3346
3347
0
    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
3348
3349
0
    dataptr[DCTSIZE*0] = (DCTELEM)
3350
0
      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
3351
0
        CONST_BITS+PASS2_BITS);
3352
0
    dataptr[DCTSIZE*2] = (DCTELEM)
3353
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
3354
0
        CONST_BITS+PASS2_BITS);
3355
3356
    /* Odd part */
3357
3358
0
    dataptr[DCTSIZE*1] = (DCTELEM)
3359
0
      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
3360
0
        CONST_BITS+PASS2_BITS);
3361
3362
0
    dataptr++;      /* advance pointer to next column */
3363
0
  }
3364
0
}
3365
3366
3367
/*
3368
 * Perform the forward DCT on a 4x2 sample block.
3369
 *
3370
 * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3371
 */
3372
3373
GLOBAL(void)
3374
jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3375
0
{
3376
0
  DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
3377
0
  INT32 tmp1, tmp3, tmp11, tmp13;
3378
0
  INT32 z1, z2, z3;
3379
0
  JSAMPROW elemptr;
3380
0
  SHIFT_TEMPS
3381
#if PASS2_BITS > PASS1_BITS + 3
3382
  ISHIFT_TEMPS
3383
#endif
3384
3385
  /* Pre-zero output coefficient block. */
3386
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3387
3388
  /* Pass 1: process rows.
3389
   * Note results are scaled up by sqrt(8) compared to a true DCT.
3390
   * 4-point FDCT kernel,
3391
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3392
   */
3393
3394
  /* Row 0 */
3395
0
  elemptr = sample_data[0] + start_col;
3396
3397
  /* Even part */
3398
3399
0
  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3400
0
  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3401
3402
#if PASS2_BITS > PASS1_BITS + 3
3403
  /* Add fudge factor here for final downscale. */
3404
#if PASS2_BITS > PASS1_BITS + 4
3405
  tmp4 += 1 << (PASS2_BITS-PASS1_BITS-3-1);
3406
#else
3407
  tmp4 += 1;
3408
#endif
3409
#endif
3410
3411
0
  tmp0 = tmp4 + tmp5;
3412
0
  tmp2 = tmp4 - tmp5;
3413
3414
  /* Odd part */
3415
3416
0
  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3417
0
  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3418
3419
0
  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
3420
  /* Add fudge factor here for final descale. */
3421
0
  z1 += ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-3-1);
3422
0
  tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3423
0
  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3424
3425
  /* Row 1 */
3426
0
  elemptr = sample_data[1] + start_col;
3427
3428
  /* Even part */
3429
3430
0
  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3431
0
  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3432
3433
0
  tmp10 = tmp4 + tmp5;
3434
0
  tmp12 = tmp4 - tmp5;
3435
3436
  /* Odd part */
3437
3438
0
  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3439
0
  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3440
3441
0
  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
3442
0
  tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3443
0
  tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3444
3445
  /* Pass 2: process columns.
3446
   * We leave the results scaled up by an overall factor of 8.
3447
   * We must also scale the output by (8/4)*(8/2) = 2**3.
3448
   */
3449
3450
  /* Column 0 */
3451
  /* Apply unsigned->signed conversion. */
3452
3453
0
#if PASS2_BITS < PASS1_BITS + 3
3454
0
  data[DCTSIZE*0] =
3455
0
    (tmp0 + tmp10 - 8 * CENTERJSAMPLE) << (3+PASS1_BITS-PASS2_BITS);
3456
0
  data[DCTSIZE*1] = (tmp0 - tmp10) << (3+PASS1_BITS-PASS2_BITS);
3457
3458
  /* Column 2 */
3459
0
  data[DCTSIZE*0+2] = (tmp2 + tmp12) << (3+PASS1_BITS-PASS2_BITS);
3460
0
  data[DCTSIZE*1+2] = (tmp2 - tmp12) << (3+PASS1_BITS-PASS2_BITS);
3461
#else
3462
#if PASS2_BITS == PASS1_BITS + 3
3463
  data[DCTSIZE*0] = tmp0 + tmp10 - 8 * CENTERJSAMPLE;
3464
  data[DCTSIZE*1] = tmp0 - tmp10;
3465
3466
  /* Column 2 */
3467
  data[DCTSIZE*0+2] = tmp2 + tmp12;
3468
  data[DCTSIZE*1+2] = tmp2 - tmp12;
3469
#else
3470
  data[DCTSIZE*0] =
3471
    IRIGHT_SHIFT(tmp0 + tmp10 - 8 * CENTERJSAMPLE,
3472
     PASS2_BITS-PASS1_BITS-3);
3473
  data[DCTSIZE*1] =
3474
    IRIGHT_SHIFT(tmp0 - tmp10, PASS2_BITS-PASS1_BITS-3);
3475
3476
  /* Column 2 */
3477
  data[DCTSIZE*0+2] =
3478
    IRIGHT_SHIFT(tmp2 + tmp12, PASS2_BITS-PASS1_BITS-3);
3479
  data[DCTSIZE*1+2] =
3480
    IRIGHT_SHIFT(tmp2 - tmp12, PASS2_BITS-PASS1_BITS-3);
3481
#endif
3482
#endif
3483
3484
  /* Column 1 */
3485
0
  data[DCTSIZE*0+1] = (DCTELEM)
3486
0
    RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3487
0
  data[DCTSIZE*1+1] = (DCTELEM)
3488
0
    RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3489
3490
  /* Column 3 */
3491
0
  data[DCTSIZE*0+3] = (DCTELEM)
3492
0
    RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3493
0
  data[DCTSIZE*1+3] = (DCTELEM)
3494
0
    RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3495
0
}
3496
3497
3498
/*
3499
 * Perform the forward DCT on a 2x1 sample block.
3500
 *
3501
 * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3502
 */
3503
3504
GLOBAL(void)
3505
jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3506
0
{
3507
0
  DCTELEM tmp0, tmp1;
3508
0
  JSAMPROW elemptr;
3509
3510
  /* Pre-zero output coefficient block. */
3511
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3512
3513
0
  elemptr = sample_data[0] + start_col;
3514
3515
0
  tmp0 = GETJSAMPLE(elemptr[0]);
3516
0
  tmp1 = GETJSAMPLE(elemptr[1]);
3517
3518
  /* We leave the results scaled up by an overall factor of 8.
3519
   * We must also scale the output by (8/2)*(8/1) = 2**5.
3520
   */
3521
3522
  /* Even part */
3523
3524
  /* Apply unsigned->signed conversion. */
3525
0
  data[0] =
3526
0
    (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << (5+PASS1_BITS-PASS2_BITS);
3527
3528
  /* Odd part */
3529
3530
0
  data[1] = (tmp0 - tmp1) << (5+PASS1_BITS-PASS2_BITS);
3531
0
}
3532
3533
3534
/*
3535
 * Perform the forward DCT on an 8x16 sample block.
3536
 *
3537
 * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3538
 */
3539
3540
GLOBAL(void)
3541
jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3542
0
{
3543
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3544
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3545
0
  INT32 z1;
3546
0
  DCTELEM workspace[DCTSIZE2];
3547
0
  DCTELEM *dataptr;
3548
0
  DCTELEM *wsptr;
3549
0
  JSAMPROW elemptr;
3550
0
  int ctr;
3551
0
  SHIFT_TEMPS
3552
3553
  /* Pass 1: process rows.
3554
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3555
   * furthermore, we scale the results by 2**PASS1_BITS.
3556
   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3557
   */
3558
3559
0
  dataptr = data;
3560
0
  ctr = 0;
3561
0
  for (;;) {
3562
0
    elemptr = sample_data[ctr] + start_col;
3563
3564
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
3565
     * rotator "c1" should be "c6".
3566
     */
3567
3568
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3569
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3570
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3571
0
    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3572
3573
0
    tmp10 = tmp0 + tmp3;
3574
0
    tmp12 = tmp0 - tmp3;
3575
0
    tmp11 = tmp1 + tmp2;
3576
0
    tmp13 = tmp1 - tmp2;
3577
3578
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3579
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3580
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3581
0
    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3582
3583
    /* Apply unsigned->signed conversion. */
3584
0
    dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 8 * CENTERJSAMPLE);
3585
0
    dataptr[4] = PASS1_OUTPUT(tmp10 - tmp11);
3586
3587
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
3588
    /* Add fudge factor here for final descale. */
3589
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3590
3591
0
    dataptr[2] = (DCTELEM)
3592
0
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3593
0
      CONST_BITS-PASS1_BITS);
3594
0
    dataptr[6] = (DCTELEM)
3595
0
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3596
0
      CONST_BITS-PASS1_BITS);
3597
3598
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3599
     * i0..i3 in the paper are tmp0..tmp3 here.
3600
     */
3601
3602
0
    tmp12 = tmp0 + tmp2;
3603
0
    tmp13 = tmp1 + tmp3;
3604
3605
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
3606
    /* Add fudge factor here for final descale. */
3607
0
    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3608
3609
0
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
3610
0
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
3611
0
    tmp12 += z1;
3612
0
    tmp13 += z1;
3613
3614
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
3615
0
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
3616
0
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
3617
0
    tmp0 += z1 + tmp12;
3618
0
    tmp3 += z1 + tmp13;
3619
3620
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
3621
0
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
3622
0
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
3623
0
    tmp1 += z1 + tmp13;
3624
0
    tmp2 += z1 + tmp12;
3625
3626
0
    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
3627
0
    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
3628
0
    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3629
0
    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
3630
3631
0
    ctr++;
3632
3633
0
    if (ctr != DCTSIZE) {
3634
0
      if (ctr == DCTSIZE * 2)
3635
0
  break;     /* Done. */
3636
0
      dataptr += DCTSIZE; /* advance pointer to next row */
3637
0
    } else
3638
0
      dataptr = workspace; /* switch pointer to extended workspace */
3639
0
  }
3640
3641
  /* Pass 2: process columns.
3642
   * We apply the PASS2_BITS scaling, but leave the
3643
   * results scaled up by an overall factor of 8.
3644
   * We must also scale the output by 8/16 = 1/2.
3645
   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3646
   */
3647
3648
0
  dataptr = data;
3649
0
  wsptr = workspace;
3650
0
  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3651
    /* Even part */
3652
3653
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
3654
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
3655
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
3656
0
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
3657
0
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
3658
0
    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
3659
0
    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
3660
0
    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
3661
3662
0
    tmp10 = tmp0 + tmp7;
3663
0
    tmp14 = tmp0 - tmp7;
3664
0
    tmp11 = tmp1 + tmp6;
3665
0
    tmp15 = tmp1 - tmp6;
3666
0
    tmp12 = tmp2 + tmp5;
3667
0
    tmp16 = tmp2 - tmp5;
3668
0
    tmp13 = tmp3 + tmp4;
3669
0
    tmp17 = tmp3 - tmp4;
3670
3671
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
3672
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
3673
0
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
3674
0
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
3675
0
    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
3676
0
    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
3677
0
    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
3678
0
    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
3679
3680
0
    dataptr[DCTSIZE*0] = (DCTELEM)
3681
0
#if PASS2_BITS > 0
3682
0
      RIGHT_SHIFT(tmp10 + tmp11 + tmp12 + tmp13 + (ONE << PASS2_BITS),
3683
0
      PASS2_BITS+1);
3684
#else
3685
      RIGHT_SHIFT(tmp10 + tmp11 + tmp12 + tmp13 + ONE, 1);
3686
#endif
3687
0
    dataptr[DCTSIZE*4] = (DCTELEM)
3688
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
3689
0
        MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
3690
0
        CONST_BITS+PASS2_BITS+1);
3691
3692
0
    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
3693
0
      MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
3694
3695
0
    dataptr[DCTSIZE*2] = (DCTELEM)
3696
0
      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
3697
0
        + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
3698
0
        CONST_BITS+PASS2_BITS+1);
3699
0
    dataptr[DCTSIZE*6] = (DCTELEM)
3700
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
3701
0
        - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
3702
0
        CONST_BITS+PASS2_BITS+1);
3703
3704
    /* Odd part */
3705
3706
0
    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
3707
0
      MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
3708
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
3709
0
      MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
3710
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
3711
0
      MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
3712
0
    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
3713
0
      MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
3714
0
    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
3715
0
      MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
3716
0
    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
3717
0
      MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
3718
0
    tmp10 = tmp11 + tmp12 + tmp13 -
3719
0
      MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
3720
0
      MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
3721
0
    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
3722
0
       - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
3723
0
    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
3724
0
       + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
3725
0
    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
3726
0
       + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
3727
3728
0
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS+1);
3729
0
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS+1);
3730
0
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS+1);
3731
0
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS+1);
3732
3733
0
    dataptr++;      /* advance pointer to next column */
3734
0
    wsptr++;      /* advance pointer to next column */
3735
0
  }
3736
0
}
3737
3738
3739
/*
3740
 * Perform the forward DCT on a 7x14 sample block.
3741
 *
3742
 * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3743
 */
3744
3745
GLOBAL(void)
3746
jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3747
0
{
3748
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3749
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3750
0
  INT32 z1, z2, z3;
3751
0
  DCTELEM workspace[8*6];
3752
0
  DCTELEM *dataptr;
3753
0
  DCTELEM *wsptr;
3754
0
  JSAMPROW elemptr;
3755
0
  int ctr;
3756
0
  SHIFT_TEMPS
3757
3758
  /* Pre-zero output coefficient block. */
3759
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3760
3761
  /* Pass 1: process rows.
3762
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3763
   * furthermore, we scale the results by 2**PASS1_BITS.
3764
   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3765
   */
3766
3767
0
  dataptr = data;
3768
0
  ctr = 0;
3769
0
  for (;;) {
3770
0
    elemptr = sample_data[ctr] + start_col;
3771
3772
    /* Even part */
3773
3774
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3775
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3776
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3777
0
    tmp3 = GETJSAMPLE(elemptr[3]);
3778
3779
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3780
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3781
0
    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3782
3783
0
    z1 = tmp0 + tmp2;
3784
    /* Apply unsigned->signed conversion. */
3785
0
    dataptr[0] = PASS1_OUTPUT(z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE);
3786
0
    tmp3 += tmp3;
3787
0
    z1 -= tmp3;
3788
0
    z1 -= tmp3;
3789
0
    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
3790
0
    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
3791
0
    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
3792
0
    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3793
0
    z1 -= z2;
3794
0
    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
3795
0
    dataptr[4] = (DCTELEM)
3796
0
      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3797
0
        CONST_BITS-PASS1_BITS);
3798
0
    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3799
3800
    /* Odd part */
3801
3802
0
    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
3803
0
    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
3804
0
    tmp0 = tmp1 - tmp2;
3805
0
    tmp1 += tmp2;
3806
0
    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
3807
0
    tmp1 += tmp2;
3808
0
    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
3809
0
    tmp0 += tmp3;
3810
0
    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
3811
3812
0
    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3813
0
    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3814
0
    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3815
3816
0
    ctr++;
3817
3818
0
    if (ctr != DCTSIZE) {
3819
0
      if (ctr == 14)
3820
0
  break;     /* Done. */
3821
0
      dataptr += DCTSIZE; /* advance pointer to next row */
3822
0
    } else
3823
0
      dataptr = workspace; /* switch pointer to extended workspace */
3824
0
  }
3825
3826
  /* Pass 2: process columns.
3827
   * We apply the PASS2_BITS scaling, but leave the
3828
   * results scaled up by an overall factor of 8.
3829
   * We must also scale the output by (8/7)*(8/14) = 32/49,
3830
   * which we fold into the constant multipliers:
3831
   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
3832
   */
3833
3834
0
  dataptr = data;
3835
0
  wsptr = workspace;
3836
0
  for (ctr = 0; ctr < 7; ctr++) {
3837
    /* Even part */
3838
3839
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
3840
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
3841
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
3842
0
    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
3843
0
    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
3844
0
    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
3845
0
    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
3846
3847
0
    tmp10 = tmp0 + tmp6;
3848
0
    tmp14 = tmp0 - tmp6;
3849
0
    tmp11 = tmp1 + tmp5;
3850
0
    tmp15 = tmp1 - tmp5;
3851
0
    tmp12 = tmp2 + tmp4;
3852
0
    tmp16 = tmp2 - tmp4;
3853
3854
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
3855
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
3856
0
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
3857
0
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
3858
0
    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
3859
0
    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
3860
0
    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
3861
3862
0
    dataptr[DCTSIZE*0] = (DCTELEM)
3863
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
3864
0
           FIX(0.653061224)),                 /* 32/49 */
3865
0
        CONST_BITS+PASS2_BITS);
3866
0
    tmp13 += tmp13;
3867
0
    dataptr[DCTSIZE*4] = (DCTELEM)
3868
0
      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
3869
0
        MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
3870
0
        MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
3871
0
        CONST_BITS+PASS2_BITS);
3872
3873
0
    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
3874
3875
0
    dataptr[DCTSIZE*2] = (DCTELEM)
3876
0
      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
3877
0
        + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
3878
0
        CONST_BITS+PASS2_BITS);
3879
0
    dataptr[DCTSIZE*6] = (DCTELEM)
3880
0
      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
3881
0
        - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
3882
0
        CONST_BITS+PASS2_BITS);
3883
3884
    /* Odd part */
3885
3886
0
    tmp10 = tmp1 + tmp2;
3887
0
    tmp11 = tmp5 - tmp4;
3888
0
    dataptr[DCTSIZE*7] = (DCTELEM)
3889
0
      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
3890
0
           FIX(0.653061224)),                 /* 32/49 */
3891
0
        CONST_BITS+PASS2_BITS);
3892
0
    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
3893
0
    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
3894
0
    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
3895
0
    tmp10 += tmp11 - tmp3;
3896
0
    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
3897
0
      MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
3898
0
    dataptr[DCTSIZE*5] = (DCTELEM)
3899
0
      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
3900
0
        + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
3901
0
        CONST_BITS+PASS2_BITS);
3902
0
    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
3903
0
      MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
3904
0
    dataptr[DCTSIZE*3] = (DCTELEM)
3905
0
      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
3906
0
        - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
3907
0
        CONST_BITS+PASS2_BITS);
3908
0
    dataptr[DCTSIZE*1] = (DCTELEM)
3909
0
      DESCALE(tmp11 + tmp12 + tmp3
3910
0
        - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
3911
0
        - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
3912
0
        CONST_BITS+PASS2_BITS);
3913
3914
0
    dataptr++;      /* advance pointer to next column */
3915
0
    wsptr++;      /* advance pointer to next column */
3916
0
  }
3917
0
}
3918
3919
3920
/*
3921
 * Perform the forward DCT on a 6x12 sample block.
3922
 *
3923
 * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3924
 */
3925
3926
GLOBAL(void)
3927
jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3928
0
{
3929
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3930
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3931
0
  DCTELEM workspace[8*4];
3932
0
  DCTELEM *dataptr;
3933
0
  DCTELEM *wsptr;
3934
0
  JSAMPROW elemptr;
3935
0
  int ctr;
3936
0
  SHIFT_TEMPS
3937
3938
  /* Pre-zero output coefficient block. */
3939
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3940
3941
  /* Pass 1: process rows.
3942
   * Note results are scaled up by sqrt(8) compared to a true DCT;
3943
   * furthermore, we scale the results by 2**PASS1_BITS.
3944
   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3945
   */
3946
3947
0
  dataptr = data;
3948
0
  ctr = 0;
3949
0
  for (;;) {
3950
0
    elemptr = sample_data[ctr] + start_col;
3951
3952
    /* Even part */
3953
3954
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3955
0
    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3956
0
    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3957
3958
0
    tmp10 = tmp0 + tmp2;
3959
0
    tmp12 = tmp0 - tmp2;
3960
3961
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3962
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3963
0
    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3964
3965
    /* Apply unsigned->signed conversion. */
3966
0
    dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 6 * CENTERJSAMPLE);
3967
0
    dataptr[2] = (DCTELEM)
3968
0
      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
3969
0
        CONST_BITS-PASS1_BITS);
3970
0
    dataptr[4] = (DCTELEM)
3971
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3972
0
        CONST_BITS-PASS1_BITS);
3973
3974
    /* Odd part */
3975
3976
0
    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
3977
0
        CONST_BITS-PASS1_BITS);
3978
3979
0
#if PASS1_BITS > 0
3980
0
    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3981
0
    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3982
0
    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3983
#else
3984
    dataptr[1] = (DCTELEM) (tmp10 + tmp0 + tmp1);
3985
    dataptr[3] = (DCTELEM) (tmp0 - tmp1 - tmp2);
3986
    dataptr[5] = (DCTELEM) (tmp10 + tmp2 - tmp1);
3987
#endif
3988
3989
0
    ctr++;
3990
3991
0
    if (ctr != DCTSIZE) {
3992
0
      if (ctr == 12)
3993
0
  break;     /* Done. */
3994
0
      dataptr += DCTSIZE; /* advance pointer to next row */
3995
0
    } else
3996
0
      dataptr = workspace; /* switch pointer to extended workspace */
3997
0
  }
3998
3999
  /* Pass 2: process columns.
4000
   * We apply the PASS2_BITS scaling, but leave the
4001
   * results scaled up by an overall factor of 8.
4002
   * We must also scale the output by (8/6)*(8/12) = 8/9,
4003
   * which we fold into the constant multipliers:
4004
   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
4005
   */
4006
4007
0
  dataptr = data;
4008
0
  wsptr = workspace;
4009
0
  for (ctr = 0; ctr < 6; ctr++) {
4010
    /* Even part */
4011
4012
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
4013
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
4014
0
    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
4015
0
    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
4016
0
    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
4017
0
    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
4018
4019
0
    tmp10 = tmp0 + tmp5;
4020
0
    tmp13 = tmp0 - tmp5;
4021
0
    tmp11 = tmp1 + tmp4;
4022
0
    tmp14 = tmp1 - tmp4;
4023
0
    tmp12 = tmp2 + tmp3;
4024
0
    tmp15 = tmp2 - tmp3;
4025
4026
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
4027
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
4028
0
    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
4029
0
    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
4030
0
    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
4031
0
    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
4032
4033
0
    dataptr[DCTSIZE*0] = (DCTELEM)
4034
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
4035
0
        CONST_BITS+PASS2_BITS);
4036
0
    dataptr[DCTSIZE*6] = (DCTELEM)
4037
0
      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
4038
0
        CONST_BITS+PASS2_BITS);
4039
0
    dataptr[DCTSIZE*4] = (DCTELEM)
4040
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
4041
0
        CONST_BITS+PASS2_BITS);
4042
0
    dataptr[DCTSIZE*2] = (DCTELEM)
4043
0
      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
4044
0
        MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
4045
0
        CONST_BITS+PASS2_BITS);
4046
4047
    /* Odd part */
4048
4049
0
    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
4050
0
    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
4051
0
    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
4052
0
    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
4053
0
    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
4054
0
    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
4055
0
      + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
4056
0
    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
4057
0
    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
4058
0
      + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
4059
0
    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
4060
0
      - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
4061
0
    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
4062
0
      - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
4063
4064
0
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS);
4065
0
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS);
4066
0
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS);
4067
0
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS);
4068
4069
0
    dataptr++;      /* advance pointer to next column */
4070
0
    wsptr++;      /* advance pointer to next column */
4071
0
  }
4072
0
}
4073
4074
4075
/*
4076
 * Perform the forward DCT on a 5x10 sample block.
4077
 *
4078
 * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
4079
 */
4080
4081
GLOBAL(void)
4082
jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4083
0
{
4084
0
  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
4085
0
  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4086
0
  DCTELEM workspace[8*2];
4087
0
  DCTELEM *dataptr;
4088
0
  DCTELEM *wsptr;
4089
0
  JSAMPROW elemptr;
4090
0
  int ctr;
4091
0
  SHIFT_TEMPS
4092
4093
  /* Pre-zero output coefficient block. */
4094
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4095
4096
  /* Pass 1: process rows.
4097
   * Note results are scaled up by sqrt(8) compared to a true DCT;
4098
   * furthermore, we scale the results by 2**PASS1_BITS.
4099
   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4100
   */
4101
4102
0
  dataptr = data;
4103
0
  ctr = 0;
4104
0
  for (;;) {
4105
0
    elemptr = sample_data[ctr] + start_col;
4106
4107
    /* Even part */
4108
4109
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
4110
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
4111
0
    tmp2 = GETJSAMPLE(elemptr[2]);
4112
4113
0
    tmp10 = tmp0 + tmp1;
4114
0
    tmp11 = tmp0 - tmp1;
4115
4116
0
    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
4117
0
    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
4118
4119
    /* Apply unsigned->signed conversion. */
4120
0
    dataptr[0] = PASS1_OUTPUT(tmp10 + tmp2 - 5 * CENTERJSAMPLE);
4121
0
    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
4122
0
    tmp10 -= tmp2 << 2;
4123
0
    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
4124
0
    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
4125
0
    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
4126
4127
    /* Odd part */
4128
4129
0
    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
4130
4131
0
    dataptr[1] = (DCTELEM)
4132
0
      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
4133
0
        CONST_BITS-PASS1_BITS);
4134
0
    dataptr[3] = (DCTELEM)
4135
0
      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
4136
0
        CONST_BITS-PASS1_BITS);
4137
4138
0
    ctr++;
4139
4140
0
    if (ctr != DCTSIZE) {
4141
0
      if (ctr == 10)
4142
0
  break;     /* Done. */
4143
0
      dataptr += DCTSIZE; /* advance pointer to next row */
4144
0
    } else
4145
0
      dataptr = workspace; /* switch pointer to extended workspace */
4146
0
  }
4147
4148
  /* Pass 2: process columns.
4149
   * We apply the PASS2_BITS scaling, but leave the
4150
   * results scaled up by an overall factor of 8.
4151
   * We must also scale the output by (8/5)*(8/10) = 32/25,
4152
   * which we fold into the constant multipliers:
4153
   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
4154
   */
4155
4156
0
  dataptr = data;
4157
0
  wsptr = workspace;
4158
0
  for (ctr = 0; ctr < 5; ctr++) {
4159
    /* Even part */
4160
4161
0
    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
4162
0
    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
4163
0
    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
4164
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
4165
0
    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
4166
4167
0
    tmp10 = tmp0 + tmp4;
4168
0
    tmp13 = tmp0 - tmp4;
4169
0
    tmp11 = tmp1 + tmp3;
4170
0
    tmp14 = tmp1 - tmp3;
4171
4172
0
    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
4173
0
    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
4174
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
4175
0
    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
4176
0
    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
4177
4178
0
    dataptr[DCTSIZE*0] = (DCTELEM)
4179
0
      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
4180
0
        CONST_BITS+PASS2_BITS);
4181
0
    tmp12 += tmp12;
4182
0
    dataptr[DCTSIZE*4] = (DCTELEM)
4183
0
      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
4184
0
        MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
4185
0
        CONST_BITS+PASS2_BITS);
4186
0
    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
4187
0
    dataptr[DCTSIZE*2] = (DCTELEM)
4188
0
      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
4189
0
        CONST_BITS+PASS2_BITS);
4190
0
    dataptr[DCTSIZE*6] = (DCTELEM)
4191
0
      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
4192
0
        CONST_BITS+PASS2_BITS);
4193
4194
    /* Odd part */
4195
4196
0
    tmp10 = tmp0 + tmp4;
4197
0
    tmp11 = tmp1 - tmp3;
4198
0
    dataptr[DCTSIZE*5] = (DCTELEM)
4199
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
4200
0
        CONST_BITS+PASS2_BITS);
4201
0
    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
4202
0
    dataptr[DCTSIZE*1] = (DCTELEM)
4203
0
      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
4204
0
        MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
4205
0
        MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
4206
0
        MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
4207
0
        CONST_BITS+PASS2_BITS);
4208
0
    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
4209
0
      MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
4210
0
    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
4211
0
      MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
4212
0
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS2_BITS);
4213
0
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS2_BITS);
4214
4215
0
    dataptr++;      /* advance pointer to next column */
4216
0
    wsptr++;      /* advance pointer to next column */
4217
0
  }
4218
0
}
4219
4220
4221
/*
4222
 * Perform the forward DCT on a 4x8 sample block.
4223
 *
4224
 * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4225
 */
4226
4227
GLOBAL(void)
4228
jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4229
0
{
4230
0
  INT32 tmp0, tmp1, tmp2, tmp3;
4231
0
  INT32 tmp10, tmp11, tmp12, tmp13;
4232
0
  INT32 z1;
4233
0
  DCTELEM *dataptr;
4234
0
  JSAMPROW elemptr;
4235
0
  int ctr;
4236
0
  SHIFT_TEMPS
4237
4238
  /* Pre-zero output coefficient block. */
4239
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4240
4241
  /* Pass 1: process rows.
4242
   * Note results are scaled up by sqrt(8) compared to a true DCT;
4243
   * furthermore, we scale the results by 2**PASS1_BITS.
4244
   * We must also scale the output by 8/4 = 2, which we add here.
4245
   * 4-point FDCT kernel,
4246
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4247
   */
4248
4249
0
  dataptr = data;
4250
0
  for (ctr = 0; ctr < DCTSIZE; ctr++) {
4251
0
    elemptr = sample_data[ctr] + start_col;
4252
4253
    /* Even part */
4254
4255
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4256
0
    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4257
4258
0
    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4259
0
    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4260
4261
    /* Apply unsigned->signed conversion. */
4262
0
    dataptr[0] = (DCTELEM)
4263
0
      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4264
0
    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4265
4266
    /* Odd part */
4267
4268
0
    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
4269
    /* Add fudge factor here for final descale. */
4270
0
    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4271
4272
0
    dataptr[1] = (DCTELEM)
4273
0
      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4274
0
      CONST_BITS-PASS1_BITS-1);
4275
0
    dataptr[3] = (DCTELEM)
4276
0
      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4277
0
      CONST_BITS-PASS1_BITS-1);
4278
4279
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
4280
0
  }
4281
4282
  /* Pass 2: process columns.
4283
   * We apply the PASS2_BITS scaling, but leave the
4284
   * results scaled up by an overall factor of 8.
4285
   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4286
   */
4287
4288
0
  dataptr = data;
4289
0
  for (ctr = 0; ctr < 4; ctr++) {
4290
    /* Even part per LL&M figure 1 --- note that published figure is faulty;
4291
     * rotator "c1" should be "c6".
4292
     */
4293
4294
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4295
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4296
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4297
0
    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4298
4299
    /* Add fudge factor here for final descale. */
4300
0
#if PASS2_BITS > 1
4301
0
    tmp10 = tmp0 + tmp3 + (ONE << (PASS2_BITS-1));
4302
#else
4303
#if PASS2_BITS > 0
4304
    tmp10 = tmp0 + tmp3 + ONE;
4305
#else
4306
    tmp10 = tmp0 + tmp3;
4307
#endif
4308
#endif
4309
0
    tmp12 = tmp0 - tmp3;
4310
0
    tmp11 = tmp1 + tmp2;
4311
0
    tmp13 = tmp1 - tmp2;
4312
4313
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4314
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4315
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4316
0
    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4317
4318
0
    dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp10 + tmp11);
4319
0
    dataptr[DCTSIZE*4] = PASS2_OUTPUT(tmp10 - tmp11);
4320
4321
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
4322
    /* Add fudge factor here for final descale. */
4323
0
    z1 += ONE << (CONST_BITS+PASS2_BITS-1);
4324
4325
0
    dataptr[DCTSIZE*2] = (DCTELEM)
4326
0
      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
4327
0
      CONST_BITS+PASS2_BITS);
4328
0
    dataptr[DCTSIZE*6] = (DCTELEM)
4329
0
      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
4330
0
      CONST_BITS+PASS2_BITS);
4331
4332
    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4333
     * i0..i3 in the paper are tmp0..tmp3 here.
4334
     */
4335
4336
0
    tmp12 = tmp0 + tmp2;
4337
0
    tmp13 = tmp1 + tmp3;
4338
4339
0
    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
4340
    /* Add fudge factor here for final descale. */
4341
0
    z1 += ONE << (CONST_BITS+PASS2_BITS-1);
4342
4343
0
    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
4344
0
    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
4345
0
    tmp12 += z1;
4346
0
    tmp13 += z1;
4347
4348
0
    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
4349
0
    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
4350
0
    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
4351
0
    tmp0 += z1 + tmp12;
4352
0
    tmp3 += z1 + tmp13;
4353
4354
0
    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
4355
0
    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
4356
0
    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
4357
0
    tmp1 += z1 + tmp13;
4358
0
    tmp2 += z1 + tmp12;
4359
4360
0
    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS);
4361
0
    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS);
4362
0
    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS);
4363
0
    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS);
4364
4365
0
    dataptr++;      /* advance pointer to next column */
4366
0
  }
4367
0
}
4368
4369
4370
/*
4371
 * Perform the forward DCT on a 3x6 sample block.
4372
 *
4373
 * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4374
 */
4375
4376
GLOBAL(void)
4377
jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4378
0
{
4379
0
  INT32 tmp0, tmp1, tmp2;
4380
0
  INT32 tmp10, tmp11, tmp12;
4381
0
  DCTELEM *dataptr;
4382
0
  JSAMPROW elemptr;
4383
0
  int ctr;
4384
0
  SHIFT_TEMPS
4385
4386
  /* Pre-zero output coefficient block. */
4387
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4388
4389
  /* Pass 1: process rows.
4390
   * Note results are scaled up by sqrt(8) compared to a true DCT;
4391
   * furthermore, we scale the results by 2**PASS1_BITS.
4392
   * We scale the results further by 2 as part of output adaption
4393
   * scaling for different DCT size.
4394
   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4395
   */
4396
4397
0
  dataptr = data;
4398
0
  for (ctr = 0; ctr < 6; ctr++) {
4399
0
    elemptr = sample_data[ctr] + start_col;
4400
4401
    /* Even part */
4402
4403
0
    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4404
0
    tmp1 = GETJSAMPLE(elemptr[1]);
4405
4406
0
    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4407
4408
    /* Apply unsigned->signed conversion. */
4409
0
    dataptr[0] = (DCTELEM)
4410
0
      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4411
0
    dataptr[2] = (DCTELEM)
4412
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4413
0
        CONST_BITS-PASS1_BITS-1);
4414
4415
    /* Odd part */
4416
4417
0
    dataptr[1] = (DCTELEM)
4418
0
      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
4419
0
        CONST_BITS-PASS1_BITS-1);
4420
4421
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
4422
0
  }
4423
4424
  /* Pass 2: process columns.
4425
   * We apply the PASS2_BITS scaling, but leave the
4426
   * results scaled up by an overall factor of 8.
4427
   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4428
   * fold into the constant multipliers (other part was done in pass 1):
4429
   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
4430
   */
4431
4432
0
  dataptr = data;
4433
0
  for (ctr = 0; ctr < 3; ctr++) {
4434
    /* Even part */
4435
4436
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
4437
0
    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
4438
0
    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
4439
4440
0
    tmp10 = tmp0 + tmp2;
4441
0
    tmp12 = tmp0 - tmp2;
4442
4443
0
    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
4444
0
    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
4445
0
    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
4446
4447
0
    dataptr[DCTSIZE*0] = (DCTELEM)
4448
0
      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
4449
0
        CONST_BITS+PASS2_BITS);
4450
0
    dataptr[DCTSIZE*2] = (DCTELEM)
4451
0
      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
4452
0
        CONST_BITS+PASS2_BITS);
4453
0
    dataptr[DCTSIZE*4] = (DCTELEM)
4454
0
      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
4455
0
        CONST_BITS+PASS2_BITS);
4456
4457
    /* Odd part */
4458
4459
0
    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
4460
4461
0
    dataptr[DCTSIZE*1] = (DCTELEM)
4462
0
      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
4463
0
        CONST_BITS+PASS2_BITS);
4464
0
    dataptr[DCTSIZE*3] = (DCTELEM)
4465
0
      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
4466
0
        CONST_BITS+PASS2_BITS);
4467
0
    dataptr[DCTSIZE*5] = (DCTELEM)
4468
0
      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
4469
0
        CONST_BITS+PASS2_BITS);
4470
4471
0
    dataptr++;      /* advance pointer to next column */
4472
0
  }
4473
0
}
4474
4475
4476
/*
4477
 * Perform the forward DCT on a 2x4 sample block.
4478
 *
4479
 * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4480
 */
4481
4482
GLOBAL(void)
4483
jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4484
0
{
4485
0
  INT32 tmp0, tmp1;
4486
0
  INT32 tmp10, tmp11;
4487
0
  DCTELEM *dataptr;
4488
0
  JSAMPROW elemptr;
4489
0
  int ctr;
4490
0
  SHIFT_TEMPS
4491
4492
  /* Pre-zero output coefficient block. */
4493
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4494
4495
  /* Pass 1: process rows.
4496
   * Note results are scaled up by sqrt(8) compared to a true DCT.
4497
   */
4498
4499
0
  dataptr = data;
4500
0
  for (ctr = 0; ctr < 4; ctr++) {
4501
0
    elemptr = sample_data[ctr] + start_col;
4502
4503
    /* Even part */
4504
4505
0
    tmp0 = GETJSAMPLE(elemptr[0]);
4506
0
    tmp1 = GETJSAMPLE(elemptr[1]);
4507
4508
    /* Apply unsigned->signed conversion. */
4509
0
    dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
4510
4511
    /* Odd part */
4512
4513
0
    dataptr[1] = (DCTELEM) (tmp0 - tmp1);
4514
4515
0
    dataptr += DCTSIZE;   /* advance pointer to next row */
4516
0
  }
4517
4518
  /* Pass 2: process columns.
4519
   * We leave the results scaled up by an overall factor of 8.
4520
   * We must also scale the output by (8/2)*(8/4) = 2**3.
4521
   * 4-point FDCT kernel,
4522
   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4523
   */
4524
4525
0
  dataptr = data;
4526
0
  for (ctr = 0; ctr < 2; ctr++) {
4527
    /* Even part */
4528
4529
0
    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4530
0
    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
4531
4532
0
    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
4533
0
    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
4534
4535
0
#if PASS2_BITS < PASS1_BITS + 3
4536
0
    dataptr[DCTSIZE*0] = (DCTELEM)
4537
0
      ((tmp0 + tmp1) << (3+PASS1_BITS-PASS2_BITS));
4538
0
    dataptr[DCTSIZE*2] = (DCTELEM)
4539
0
      ((tmp0 - tmp1) << (3+PASS1_BITS-PASS2_BITS));
4540
#else
4541
#if PASS2_BITS == PASS1_BITS + 3
4542
    dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
4543
    dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
4544
#else
4545
    /* Add fudge factor for descale. */
4546
    tmp0 += ONE << (PASS2_BITS-PASS1_BITS-3-1);
4547
4548
    dataptr[DCTSIZE*0] = (DCTELEM)
4549
      RIGHT_SHIFT(tmp0 + tmp1, PASS2_BITS-PASS1_BITS-3);
4550
    dataptr[DCTSIZE*2] = (DCTELEM)
4551
      RIGHT_SHIFT(tmp0 - tmp1, PASS2_BITS-PASS1_BITS-3);
4552
#endif
4553
#endif
4554
4555
    /* Odd part */
4556
4557
0
    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
4558
    /* Add fudge factor for descale. */
4559
0
    tmp0 += ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-3-1);
4560
4561
0
    dataptr[DCTSIZE*1] = (DCTELEM)
4562
0
      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4563
0
      CONST_BITS+PASS2_BITS-PASS1_BITS-3);
4564
0
    dataptr[DCTSIZE*3] = (DCTELEM)
4565
0
      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4566
0
      CONST_BITS+PASS2_BITS-PASS1_BITS-3);
4567
4568
0
    dataptr++;      /* advance pointer to next column */
4569
0
  }
4570
0
}
4571
4572
4573
/*
4574
 * Perform the forward DCT on a 1x2 sample block.
4575
 *
4576
 * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4577
 */
4578
4579
GLOBAL(void)
4580
jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4581
0
{
4582
0
  DCTELEM tmp0, tmp1;
4583
4584
  /* Pre-zero output coefficient block. */
4585
0
  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4586
4587
  /* Pass 1: empty. */
4588
4589
  /* Pass 2: process columns.
4590
   * We leave the results scaled up by an overall factor of 8.
4591
   * We must also scale the output by (8/1)*(8/2) = 2**5.
4592
   */
4593
4594
  /* Even part */
4595
4596
0
  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4597
0
  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4598
4599
  /* Apply unsigned->signed conversion. */
4600
0
  data[DCTSIZE*0] =
4601
0
    (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << (5+PASS1_BITS-PASS2_BITS);
4602
4603
  /* Odd part */
4604
4605
0
  data[DCTSIZE*1] = (tmp0 - tmp1) << (5+PASS1_BITS-PASS2_BITS);
4606
0
}
4607
4608
#endif /* DCT_SCALING_SUPPORTED */
4609
#endif /* DCT_ISLOW_SUPPORTED */