Coverage Report

Created: 2026-06-12 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.main/src/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024-2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#ifdef WITH_SIMD
24
#include "../simd/jsimddct.h"
25
#endif
26
#ifdef WITH_PROFILE
27
#include "tjutil.h"
28
#endif
29
30
31
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) || \
32
    defined(DCT_FLOAT_SUPPORTED)
33
34
/* Private subobject for this module */
35
36
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
37
38
typedef struct {
39
  struct jpeg_forward_dct pub;  /* public fields */
40
41
  /* Pointer to the DCT routine actually in use */
42
  forward_DCT_method_ptr dct;
43
  convsamp_method_ptr convsamp;
44
  quantize_method_ptr quantize;
45
46
  /* The actual post-DCT divisors --- not identical to the quant table
47
   * entries, because of scaling (especially for an unnormalized DCT).
48
   * Each table is given in normal array order.
49
   */
50
  DCTELEM *divisors[NUM_QUANT_TBLS];
51
52
  /* work area for FDCT subroutine */
53
  DCTELEM *workspace;
54
55
#ifdef DCT_FLOAT_SUPPORTED
56
  /* Same as above for the floating-point case. */
57
  float_DCT_method_ptr float_dct;
58
  float_convsamp_method_ptr float_convsamp;
59
  float_quantize_method_ptr float_quantize;
60
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
61
  FAST_FLOAT *float_workspace;
62
#endif
63
} my_fdct_controller;
64
65
typedef my_fdct_controller *my_fdct_ptr;
66
67
68
#if BITS_IN_JSAMPLE == 8
69
70
/*
71
 * Find the highest bit in an integer through binary search.
72
 */
73
74
LOCAL(int)
75
flss(UINT16 val)
76
10.5M
{
77
10.5M
  int bit;
78
79
10.5M
  bit = 16;
80
81
10.5M
  if (!val)
82
0
    return 0;
83
84
10.5M
  if (!(val & 0xff00)) {
85
5.26M
    bit -= 8;
86
5.26M
    val <<= 8;
87
5.26M
  }
88
10.5M
  if (!(val & 0xf000)) {
89
7.13M
    bit -= 4;
90
7.13M
    val <<= 4;
91
7.13M
  }
92
10.5M
  if (!(val & 0xc000)) {
93
3.66M
    bit -= 2;
94
3.66M
    val <<= 2;
95
3.66M
  }
96
10.5M
  if (!(val & 0x8000)) {
97
4.59M
    bit -= 1;
98
4.59M
    val <<= 1;
99
4.59M
  }
100
101
10.5M
  return bit;
102
10.5M
}
103
104
105
/*
106
 * Compute values to do a division using reciprocal.
107
 *
108
 * This implementation is based on an algorithm described in
109
 *   "Optimizing subroutines in assembly language:
110
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
111
 * More information about the basic algorithm can be found in
112
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
113
 *
114
 * The basic idea is to replace x/d by x * d^-1. In order to store
115
 * d^-1 with enough precision we shift it left a few places. It turns
116
 * out that this algoright gives just enough precision, and also fits
117
 * into DCTELEM:
118
 *
119
 *   b = (the number of significant bits in divisor) - 1
120
 *   r = (word size) + b
121
 *   f = 2^r / divisor
122
 *
123
 * f will not be an integer for most cases, so we need to compensate
124
 * for the rounding error introduced:
125
 *
126
 *   no fractional part:
127
 *
128
 *       result = input >> r
129
 *
130
 *   fractional part of f < 0.5:
131
 *
132
 *       round f down to nearest integer
133
 *       result = ((input + 1) * f) >> r
134
 *
135
 *   fractional part of f > 0.5:
136
 *
137
 *       round f up to nearest integer
138
 *       result = (input * f) >> r
139
 *
140
 * This is the original algorithm that gives truncated results. But we
141
 * want properly rounded results, so we replace "input" with
142
 * "input + divisor/2".
143
 *
144
 * In order to allow SIMD implementations we also tweak the values to
145
 * allow the same calculation to be made at all times:
146
 *
147
 *   dctbl[0] = f rounded to nearest integer
148
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
149
 *   dctbl[2] = 1 << ((word size) * 2 - r)
150
 *   dctbl[3] = r - (word size)
151
 *
152
 * dctbl[2] is for stupid instruction sets where the shift operation
153
 * isn't member wise (e.g. MMX).
154
 *
155
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
156
 * is that most SIMD implementations have a "multiply and store top
157
 * half" operation.
158
 *
159
 * Lastly, we store each of the values in their own table instead
160
 * of in a consecutive manner, yet again in order to allow SIMD
161
 * routines.
162
 */
163
164
LOCAL(int)
165
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
166
10.5M
{
167
10.5M
  UDCTELEM2 fq, fr;
168
10.5M
  UDCTELEM c;
169
10.5M
  int b, r;
170
171
10.5M
  if (divisor <= 1) {
172
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
173
     * values will cause the C quantization algorithm to act like the
174
     * identity function.  Since only the C quantization algorithm is used in
175
     * these cases, the scale value is irrelevant.
176
     *
177
     * divisor == 0 can never happen in a normal program, because
178
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
179
     * abuse the API by manually modifying the exposed quantization table just
180
     * before calling jpeg_start_compress().  Thus, we effectively clamp
181
     * values < 1 here as well, to avoid dividing by 0.
182
     */
183
35.6k
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
184
35.6k
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
185
35.6k
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
186
35.6k
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
187
35.6k
    return 0;
188
35.6k
  }
189
190
10.5M
  b = flss(divisor) - 1;
191
10.5M
  r  = sizeof(DCTELEM) * 8 + b;
192
193
10.5M
  fq = ((UDCTELEM2)1 << r) / divisor;
194
10.5M
  fr = ((UDCTELEM2)1 << r) % divisor;
195
196
10.5M
  c = divisor / 2;                      /* for rounding */
197
198
10.5M
  if (fr == 0) {                        /* divisor is power of two */
199
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
200
1.69M
    fq >>= 1;
201
1.69M
    r--;
202
8.81M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
203
2.35M
    c++;
204
6.45M
  } else {                              /* fractional part is > 0.5 */
205
6.45M
    fq++;
206
6.45M
  }
207
208
10.5M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
209
10.5M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
210
10.5M
#ifdef WITH_SIMD
211
10.5M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
212
#else
213
  dtbl[DCTSIZE2 * 2] = 1;
214
#endif
215
10.5M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
216
217
10.5M
  if (r <= 16) return 0;
218
10.4M
  else return 1;
219
10.5M
}
220
221
#endif
222
223
224
/*
225
 * Initialize for a processing pass.
226
 * Verify that all referenced Q-tables are present, and set up
227
 * the divisor table for each one.
228
 * In the current implementation, DCT of all components is done during
229
 * the first pass, even if only some components will be output in the
230
 * first scan.  Hence all components should be examined here.
231
 */
232
233
METHODDEF(void)
234
start_pass_fdctmgr(j_compress_ptr cinfo)
235
80.5k
{
236
80.5k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
80.5k
  int ci, qtblno, i;
238
80.5k
  jpeg_component_info *compptr;
239
80.5k
  JQUANT_TBL *qtbl;
240
80.5k
  DCTELEM *dtbl;
241
242
278k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
198k
       ci++, compptr++) {
244
198k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
198k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
198k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
198k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
198k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
154k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
154k
      if (fdct->divisors[qtblno] == NULL) {
259
95.4k
        fdct->divisors[qtblno] = (DCTELEM *)
260
95.4k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
95.4k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
95.4k
      }
263
154k
      dtbl = fdct->divisors[qtblno];
264
10.0M
      for (i = 0; i < DCTSIZE2; i++) {
265
#if BITS_IN_JSAMPLE == 8
266
#ifdef WITH_SIMD
267
5.06M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
0
            fdct->quantize != quantize)
269
0
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
#endif
276
9.87M
      }
277
154k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
35.2k
    case JDCT_IFAST:
281
35.2k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
35.2k
#define CONST_BITS  14
289
35.2k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
35.2k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
35.2k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
35.2k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
35.2k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
35.2k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
35.2k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
35.2k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
35.2k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
35.2k
        };
300
35.2k
        SHIFT_TEMPS
301
302
35.2k
        if (fdct->divisors[qtblno] == NULL) {
303
24.4k
          fdct->divisors[qtblno] = (DCTELEM *)
304
24.4k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
24.4k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
24.4k
        }
307
35.2k
        dtbl = fdct->divisors[qtblno];
308
2.29M
        for (i = 0; i < DCTSIZE2; i++) {
309
#if BITS_IN_JSAMPLE == 8
310
#ifdef WITH_SIMD
311
1.28M
          if (!compute_reciprocal(
312
1.28M
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
1.28M
                                      (JLONG)aanscales[i]),
314
1.28M
                        CONST_BITS - 3), &dtbl[i]) &&
315
64.2k
              fdct->quantize != quantize)
316
2.14k
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
          dtbl[i] = (DCTELEM)
325
968k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
                                  (JLONG)aanscales[i]),
327
                    CONST_BITS - 3);
328
#endif
329
2.25M
        }
330
35.2k
      }
331
35.2k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
8.67k
    case JDCT_FLOAT:
335
8.67k
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
8.67k
        FAST_FLOAT *fdtbl;
345
8.67k
        int row, col;
346
8.67k
        static const double aanscalefactor[DCTSIZE] = {
347
8.67k
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
8.67k
          1.0, 0.785694958, 0.541196100, 0.275899379
349
8.67k
        };
350
351
8.67k
        if (fdct->float_divisors[qtblno] == NULL) {
352
6.43k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
6.43k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
6.43k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
6.43k
        }
356
8.67k
        fdtbl = fdct->float_divisors[qtblno];
357
8.67k
        i = 0;
358
78.0k
        for (row = 0; row < DCTSIZE; row++) {
359
624k
          for (col = 0; col < DCTSIZE; col++) {
360
554k
            fdtbl[i] = (FAST_FLOAT)
361
554k
              (1.0 / (((double)qtbl->quantval[i] *
362
554k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
554k
            i++;
364
554k
          }
365
69.3k
        }
366
8.67k
      }
367
8.67k
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
198k
    }
373
198k
  }
374
80.5k
}
jcdctmgr-8.c:start_pass_fdctmgr
Line
Count
Source
235
45.0k
{
236
45.0k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
45.0k
  int ci, qtblno, i;
238
45.0k
  jpeg_component_info *compptr;
239
45.0k
  JQUANT_TBL *qtbl;
240
45.0k
  DCTELEM *dtbl;
241
242
153k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
108k
       ci++, compptr++) {
244
108k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
108k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
108k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
108k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
108k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
79.2k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
79.2k
      if (fdct->divisors[qtblno] == NULL) {
259
50.0k
        fdct->divisors[qtblno] = (DCTELEM *)
260
50.0k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
50.0k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
50.0k
      }
263
79.2k
      dtbl = fdct->divisors[qtblno];
264
5.14M
      for (i = 0; i < DCTSIZE2; i++) {
265
5.06M
#if BITS_IN_JSAMPLE == 8
266
5.06M
#ifdef WITH_SIMD
267
5.06M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
0
            fdct->quantize != quantize)
269
0
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
#endif
276
5.06M
      }
277
79.2k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
20.1k
    case JDCT_IFAST:
281
20.1k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
20.1k
#define CONST_BITS  14
289
20.1k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
20.1k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
20.1k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
20.1k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
20.1k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
20.1k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
20.1k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
20.1k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
20.1k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
20.1k
        };
300
20.1k
        SHIFT_TEMPS
301
302
20.1k
        if (fdct->divisors[qtblno] == NULL) {
303
14.3k
          fdct->divisors[qtblno] = (DCTELEM *)
304
14.3k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
14.3k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
14.3k
        }
307
20.1k
        dtbl = fdct->divisors[qtblno];
308
1.30M
        for (i = 0; i < DCTSIZE2; i++) {
309
1.28M
#if BITS_IN_JSAMPLE == 8
310
1.28M
#ifdef WITH_SIMD
311
1.28M
          if (!compute_reciprocal(
312
1.28M
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
1.28M
                                      (JLONG)aanscales[i]),
314
1.28M
                        CONST_BITS - 3), &dtbl[i]) &&
315
64.2k
              fdct->quantize != quantize)
316
2.14k
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
          dtbl[i] = (DCTELEM)
325
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
                                  (JLONG)aanscales[i]),
327
                    CONST_BITS - 3);
328
#endif
329
1.28M
        }
330
20.1k
      }
331
20.1k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
8.67k
    case JDCT_FLOAT:
335
8.67k
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
8.67k
        FAST_FLOAT *fdtbl;
345
8.67k
        int row, col;
346
8.67k
        static const double aanscalefactor[DCTSIZE] = {
347
8.67k
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
8.67k
          1.0, 0.785694958, 0.541196100, 0.275899379
349
8.67k
        };
350
351
8.67k
        if (fdct->float_divisors[qtblno] == NULL) {
352
6.43k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
6.43k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
6.43k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
6.43k
        }
356
8.67k
        fdtbl = fdct->float_divisors[qtblno];
357
8.67k
        i = 0;
358
78.0k
        for (row = 0; row < DCTSIZE; row++) {
359
624k
          for (col = 0; col < DCTSIZE; col++) {
360
554k
            fdtbl[i] = (FAST_FLOAT)
361
554k
              (1.0 / (((double)qtbl->quantval[i] *
362
554k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
554k
            i++;
364
554k
          }
365
69.3k
        }
366
8.67k
      }
367
8.67k
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
108k
    }
373
108k
  }
374
45.0k
}
jcdctmgr-12.c:start_pass_fdctmgr
Line
Count
Source
235
35.5k
{
236
35.5k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
35.5k
  int ci, qtblno, i;
238
35.5k
  jpeg_component_info *compptr;
239
35.5k
  JQUANT_TBL *qtbl;
240
35.5k
  DCTELEM *dtbl;
241
242
125k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
90.1k
       ci++, compptr++) {
244
90.1k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
90.1k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
90.1k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
90.1k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
90.1k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
75.0k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
75.0k
      if (fdct->divisors[qtblno] == NULL) {
259
45.3k
        fdct->divisors[qtblno] = (DCTELEM *)
260
45.3k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
45.3k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
45.3k
      }
263
75.0k
      dtbl = fdct->divisors[qtblno];
264
4.87M
      for (i = 0; i < DCTSIZE2; i++) {
265
#if BITS_IN_JSAMPLE == 8
266
#ifdef WITH_SIMD
267
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
            fdct->quantize != quantize)
269
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
4.80M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
4.80M
#endif
276
4.80M
      }
277
75.0k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
15.1k
    case JDCT_IFAST:
281
15.1k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
15.1k
#define CONST_BITS  14
289
15.1k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
15.1k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
15.1k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
15.1k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
15.1k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
15.1k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
15.1k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
15.1k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
15.1k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
15.1k
        };
300
15.1k
        SHIFT_TEMPS
301
302
15.1k
        if (fdct->divisors[qtblno] == NULL) {
303
10.0k
          fdct->divisors[qtblno] = (DCTELEM *)
304
10.0k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
10.0k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
10.0k
        }
307
15.1k
        dtbl = fdct->divisors[qtblno];
308
983k
        for (i = 0; i < DCTSIZE2; i++) {
309
#if BITS_IN_JSAMPLE == 8
310
#ifdef WITH_SIMD
311
          if (!compute_reciprocal(
312
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
                                      (JLONG)aanscales[i]),
314
                        CONST_BITS - 3), &dtbl[i]) &&
315
              fdct->quantize != quantize)
316
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
968k
          dtbl[i] = (DCTELEM)
325
968k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
968k
                                  (JLONG)aanscales[i]),
327
968k
                    CONST_BITS - 3);
328
968k
#endif
329
968k
        }
330
15.1k
      }
331
15.1k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
0
    case JDCT_FLOAT:
335
0
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
0
        FAST_FLOAT *fdtbl;
345
0
        int row, col;
346
0
        static const double aanscalefactor[DCTSIZE] = {
347
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
0
          1.0, 0.785694958, 0.541196100, 0.275899379
349
0
        };
350
351
0
        if (fdct->float_divisors[qtblno] == NULL) {
352
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
0
        }
356
0
        fdtbl = fdct->float_divisors[qtblno];
357
0
        i = 0;
358
0
        for (row = 0; row < DCTSIZE; row++) {
359
0
          for (col = 0; col < DCTSIZE; col++) {
360
0
            fdtbl[i] = (FAST_FLOAT)
361
0
              (1.0 / (((double)qtbl->quantval[i] *
362
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
0
            i++;
364
0
          }
365
0
        }
366
0
      }
367
0
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
90.1k
    }
373
90.1k
  }
374
35.5k
}
375
376
377
/*
378
 * Load data into workspace, applying unsigned->signed conversion.
379
 */
380
381
METHODDEF(void)
382
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
383
38.8M
{
384
38.8M
  register DCTELEM *workspaceptr;
385
38.8M
  register _JSAMPROW elemptr;
386
38.8M
  register int elemr;
387
388
38.8M
  workspaceptr = workspace;
389
349M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
390
310M
    elemptr = sample_data[elemr] + start_col;
391
392
310M
#if DCTSIZE == 8                /* unroll the inner loop */
393
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
394
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
395
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
396
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
398
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
399
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
400
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
401
#else
402
    {
403
      register int elemc;
404
      for (elemc = DCTSIZE; elemc > 0; elemc--)
405
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
    }
407
#endif
408
310M
  }
409
38.8M
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp
jcdctmgr-12.c:convsamp
Line
Count
Source
383
38.8M
{
384
38.8M
  register DCTELEM *workspaceptr;
385
38.8M
  register _JSAMPROW elemptr;
386
38.8M
  register int elemr;
387
388
38.8M
  workspaceptr = workspace;
389
349M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
390
310M
    elemptr = sample_data[elemr] + start_col;
391
392
310M
#if DCTSIZE == 8                /* unroll the inner loop */
393
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
394
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
395
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
396
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
398
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
399
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
400
310M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
401
#else
402
    {
403
      register int elemc;
404
      for (elemc = DCTSIZE; elemc > 0; elemc--)
405
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
    }
407
#endif
408
310M
  }
409
38.8M
}
410
411
412
/*
413
 * Quantize/descale the coefficients, and store into coef_blocks[].
414
 */
415
416
METHODDEF(void)
417
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
418
47.2M
{
419
47.2M
  int i;
420
47.2M
  DCTELEM temp;
421
47.2M
  JCOEFPTR output_ptr = coef_block;
422
423
#if BITS_IN_JSAMPLE == 8
424
425
  UDCTELEM recip, corr;
426
  int shift;
427
  UDCTELEM2 product;
428
429
542M
  for (i = 0; i < DCTSIZE2; i++) {
430
534M
    temp = workspace[i];
431
534M
    recip = divisors[i + DCTSIZE2 * 0];
432
534M
    corr =  divisors[i + DCTSIZE2 * 1];
433
534M
    shift = divisors[i + DCTSIZE2 * 3];
434
435
534M
    if (temp < 0) {
436
35.9M
      temp = -temp;
437
35.9M
      product = (UDCTELEM2)(temp + corr) * recip;
438
35.9M
      product >>= shift + sizeof(DCTELEM) * 8;
439
35.9M
      temp = (DCTELEM)product;
440
35.9M
      temp = -temp;
441
498M
    } else {
442
498M
      product = (UDCTELEM2)(temp + corr) * recip;
443
498M
      product >>= shift + sizeof(DCTELEM) * 8;
444
498M
      temp = (DCTELEM)product;
445
498M
    }
446
534M
    output_ptr[i] = (JCOEF)temp;
447
534M
  }
448
449
#else
450
451
  register DCTELEM qval;
452
453
2.52G
  for (i = 0; i < DCTSIZE2; i++) {
454
2.48G
    qval = divisors[i];
455
2.48G
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
2.48G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
2.48G
#endif
473
2.48G
    if (temp < 0) {
474
290M
      temp = -temp;
475
290M
      temp += qval >> 1;        /* for rounding */
476
290M
      DIVIDE_BY(temp, qval);
477
290M
      temp = -temp;
478
2.19G
    } else {
479
2.19G
      temp += qval >> 1;        /* for rounding */
480
2.19G
      DIVIDE_BY(temp, qval);
481
2.19G
    }
482
2.48G
    output_ptr[i] = (JCOEF)temp;
483
2.48G
  }
484
485
#endif
486
487
47.2M
}
jcdctmgr-8.c:quantize
Line
Count
Source
418
8.34M
{
419
8.34M
  int i;
420
8.34M
  DCTELEM temp;
421
8.34M
  JCOEFPTR output_ptr = coef_block;
422
423
8.34M
#if BITS_IN_JSAMPLE == 8
424
425
8.34M
  UDCTELEM recip, corr;
426
8.34M
  int shift;
427
8.34M
  UDCTELEM2 product;
428
429
542M
  for (i = 0; i < DCTSIZE2; i++) {
430
534M
    temp = workspace[i];
431
534M
    recip = divisors[i + DCTSIZE2 * 0];
432
534M
    corr =  divisors[i + DCTSIZE2 * 1];
433
534M
    shift = divisors[i + DCTSIZE2 * 3];
434
435
534M
    if (temp < 0) {
436
35.9M
      temp = -temp;
437
35.9M
      product = (UDCTELEM2)(temp + corr) * recip;
438
35.9M
      product >>= shift + sizeof(DCTELEM) * 8;
439
35.9M
      temp = (DCTELEM)product;
440
35.9M
      temp = -temp;
441
498M
    } else {
442
498M
      product = (UDCTELEM2)(temp + corr) * recip;
443
498M
      product >>= shift + sizeof(DCTELEM) * 8;
444
498M
      temp = (DCTELEM)product;
445
498M
    }
446
534M
    output_ptr[i] = (JCOEF)temp;
447
534M
  }
448
449
#else
450
451
  register DCTELEM qval;
452
453
  for (i = 0; i < DCTSIZE2; i++) {
454
    qval = divisors[i];
455
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
#endif
473
    if (temp < 0) {
474
      temp = -temp;
475
      temp += qval >> 1;        /* for rounding */
476
      DIVIDE_BY(temp, qval);
477
      temp = -temp;
478
    } else {
479
      temp += qval >> 1;        /* for rounding */
480
      DIVIDE_BY(temp, qval);
481
    }
482
    output_ptr[i] = (JCOEF)temp;
483
  }
484
485
#endif
486
487
8.34M
}
jcdctmgr-12.c:quantize
Line
Count
Source
418
38.8M
{
419
38.8M
  int i;
420
38.8M
  DCTELEM temp;
421
38.8M
  JCOEFPTR output_ptr = coef_block;
422
423
#if BITS_IN_JSAMPLE == 8
424
425
  UDCTELEM recip, corr;
426
  int shift;
427
  UDCTELEM2 product;
428
429
  for (i = 0; i < DCTSIZE2; i++) {
430
    temp = workspace[i];
431
    recip = divisors[i + DCTSIZE2 * 0];
432
    corr =  divisors[i + DCTSIZE2 * 1];
433
    shift = divisors[i + DCTSIZE2 * 3];
434
435
    if (temp < 0) {
436
      temp = -temp;
437
      product = (UDCTELEM2)(temp + corr) * recip;
438
      product >>= shift + sizeof(DCTELEM) * 8;
439
      temp = (DCTELEM)product;
440
      temp = -temp;
441
    } else {
442
      product = (UDCTELEM2)(temp + corr) * recip;
443
      product >>= shift + sizeof(DCTELEM) * 8;
444
      temp = (DCTELEM)product;
445
    }
446
    output_ptr[i] = (JCOEF)temp;
447
  }
448
449
#else
450
451
38.8M
  register DCTELEM qval;
452
453
2.52G
  for (i = 0; i < DCTSIZE2; i++) {
454
2.48G
    qval = divisors[i];
455
2.48G
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
2.48G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
2.48G
#endif
473
2.48G
    if (temp < 0) {
474
290M
      temp = -temp;
475
290M
      temp += qval >> 1;        /* for rounding */
476
290M
      DIVIDE_BY(temp, qval);
477
290M
      temp = -temp;
478
2.19G
    } else {
479
2.19G
      temp += qval >> 1;        /* for rounding */
480
2.19G
      DIVIDE_BY(temp, qval);
481
2.19G
    }
482
2.48G
    output_ptr[i] = (JCOEF)temp;
483
2.48G
  }
484
485
38.8M
#endif
486
487
38.8M
}
488
489
490
/*
491
 * Perform forward DCT on one or more blocks of a component.
492
 *
493
 * The input samples are taken from the sample_data[] array starting at
494
 * position start_row/start_col, and moving to the right for any additional
495
 * blocks. The quantized coefficients are returned in coef_blocks[].
496
 */
497
498
METHODDEF(void)
499
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
500
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
501
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
502
/* This version is used for integer DCT implementations. */
503
73.5M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
73.5M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
73.5M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
73.5M
  DCTELEM *workspace;
508
73.5M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
73.5M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
73.5M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
73.5M
  quantize_method_ptr do_quantize = fdct->quantize;
514
73.5M
  workspace = fdct->workspace;
515
516
73.5M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
186M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
113M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
113M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
113M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
113M
  }
549
73.5M
}
jcdctmgr-8.c:forward_DCT
Line
Count
Source
503
53.1M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
53.1M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
53.1M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
53.1M
  DCTELEM *workspace;
508
53.1M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
53.1M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
53.1M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
53.1M
  quantize_method_ptr do_quantize = fdct->quantize;
514
53.1M
  workspace = fdct->workspace;
515
516
53.1M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
127M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
74.2M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
74.2M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
74.2M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
74.2M
  }
549
53.1M
}
jcdctmgr-12.c:forward_DCT
Line
Count
Source
503
20.4M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
20.4M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
20.4M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
20.4M
  DCTELEM *workspace;
508
20.4M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
20.4M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
20.4M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
20.4M
  quantize_method_ptr do_quantize = fdct->quantize;
514
20.4M
  workspace = fdct->workspace;
515
516
20.4M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
59.2M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
38.8M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
38.8M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
38.8M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
38.8M
  }
549
20.4M
}
550
551
552
#ifdef DCT_FLOAT_SUPPORTED
553
554
METHODDEF(void)
555
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
556
               FAST_FLOAT *workspace)
557
0
{
558
0
  register FAST_FLOAT *workspaceptr;
559
0
  register _JSAMPROW elemptr;
560
0
  register int elemr;
561
562
0
  workspaceptr = workspace;
563
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
564
0
    elemptr = sample_data[elemr] + start_col;
565
0
#if DCTSIZE == 8                /* unroll the inner loop */
566
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
567
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
568
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
569
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
570
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
571
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
572
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
573
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
574
#else
575
    {
576
      register int elemc;
577
      for (elemc = DCTSIZE; elemc > 0; elemc--)
578
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
579
    }
580
#endif
581
0
  }
582
0
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp_float
Unexecuted instantiation: jcdctmgr-12.c:convsamp_float
583
584
585
METHODDEF(void)
586
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
587
               FAST_FLOAT *workspace)
588
0
{
589
0
  register FAST_FLOAT temp;
590
0
  register int i;
591
0
  register JCOEFPTR output_ptr = coef_block;
592
593
0
  for (i = 0; i < DCTSIZE2; i++) {
594
    /* Apply the quantization and scaling factor */
595
0
    temp = workspace[i] * divisors[i];
596
597
    /* Round to nearest integer.
598
     * Since C does not specify the direction of rounding for negative
599
     * quotients, we have to force the dividend positive for portability.
600
     * The maximum coefficient size is +-16K (for 12-bit data), so this
601
     * code should work for either 16-bit or 32-bit ints.
602
     */
603
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
604
0
  }
605
0
}
Unexecuted instantiation: jcdctmgr-8.c:quantize_float
Unexecuted instantiation: jcdctmgr-12.c:quantize_float
606
607
608
METHODDEF(void)
609
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
610
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
611
                  JDIMENSION start_row, JDIMENSION start_col,
612
                  JDIMENSION num_blocks)
613
/* This version is used for floating-point DCT implementations. */
614
5.06M
{
615
  /* This routine is heavily used, so it's worth coding it tightly. */
616
5.06M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
617
5.06M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
618
5.06M
  FAST_FLOAT *workspace;
619
5.06M
  JDIMENSION bi;
620
621
622
  /* Make sure the compiler doesn't look up these every pass */
623
5.06M
  float_DCT_method_ptr do_dct = fdct->float_dct;
624
5.06M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
625
5.06M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
626
5.06M
  workspace = fdct->float_workspace;
627
628
5.06M
  sample_data += start_row;     /* fold in the vertical offset once */
629
630
11.6M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
631
    /* Load data into workspace, applying unsigned->signed conversion */
632
#ifdef WITH_PROFILE
633
    cinfo->master->start = getTime();
634
#endif
635
6.61M
    (*do_convsamp) (sample_data, start_col, workspace);
636
#ifdef WITH_PROFILE
637
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
638
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
639
#endif
640
641
    /* Perform the DCT */
642
#ifdef WITH_PROFILE
643
    cinfo->master->start = getTime();
644
#endif
645
6.61M
    (*do_dct) (workspace);
646
#ifdef WITH_PROFILE
647
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
648
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
649
#endif
650
651
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
652
#ifdef WITH_PROFILE
653
    cinfo->master->start = getTime();
654
#endif
655
6.61M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
656
#ifdef WITH_PROFILE
657
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
658
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
659
#endif
660
6.61M
  }
661
5.06M
}
jcdctmgr-8.c:forward_DCT_float
Line
Count
Source
614
5.06M
{
615
  /* This routine is heavily used, so it's worth coding it tightly. */
616
5.06M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
617
5.06M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
618
5.06M
  FAST_FLOAT *workspace;
619
5.06M
  JDIMENSION bi;
620
621
622
  /* Make sure the compiler doesn't look up these every pass */
623
5.06M
  float_DCT_method_ptr do_dct = fdct->float_dct;
624
5.06M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
625
5.06M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
626
5.06M
  workspace = fdct->float_workspace;
627
628
5.06M
  sample_data += start_row;     /* fold in the vertical offset once */
629
630
11.6M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
631
    /* Load data into workspace, applying unsigned->signed conversion */
632
#ifdef WITH_PROFILE
633
    cinfo->master->start = getTime();
634
#endif
635
6.61M
    (*do_convsamp) (sample_data, start_col, workspace);
636
#ifdef WITH_PROFILE
637
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
638
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
639
#endif
640
641
    /* Perform the DCT */
642
#ifdef WITH_PROFILE
643
    cinfo->master->start = getTime();
644
#endif
645
6.61M
    (*do_dct) (workspace);
646
#ifdef WITH_PROFILE
647
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
648
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
649
#endif
650
651
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
652
#ifdef WITH_PROFILE
653
    cinfo->master->start = getTime();
654
#endif
655
6.61M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
656
#ifdef WITH_PROFILE
657
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
658
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
659
#endif
660
6.61M
  }
661
5.06M
}
Unexecuted instantiation: jcdctmgr-12.c:forward_DCT_float
662
663
#endif /* DCT_FLOAT_SUPPORTED */
664
665
666
/*
667
 * Initialize FDCT manager.
668
 */
669
670
GLOBAL(void)
671
_jinit_forward_dct(j_compress_ptr cinfo)
672
94.7k
{
673
94.7k
  my_fdct_ptr fdct;
674
94.7k
  int i;
675
676
94.7k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
94.7k
  fdct = (my_fdct_ptr)
680
94.7k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
94.7k
                                sizeof(my_fdct_controller));
682
94.7k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
94.7k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
94.7k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
74.9k
  case JDCT_ISLOW:
689
74.9k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
32.3k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
0
#endif
693
42.5k
      fdct->dct = _jpeg_fdct_islow;
694
74.9k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
15.6k
  case JDCT_IFAST:
698
15.6k
    fdct->pub._forward_DCT = forward_DCT;
699
#ifdef WITH_SIMD
700
8.50k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
0
#endif
702
7.11k
      fdct->dct = _jpeg_fdct_ifast;
703
15.6k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
4.20k
  case JDCT_FLOAT:
707
4.20k
    fdct->pub._forward_DCT = forward_DCT_float;
708
#ifdef WITH_SIMD
709
4.20k
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
0
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
4.20k
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
94.7k
  }
718
719
  /* ...then the supporting stages. */
720
94.7k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
74.9k
  case JDCT_ISLOW:
723
74.9k
#endif
724
74.9k
#ifdef DCT_IFAST_SUPPORTED
725
90.5k
  case JDCT_IFAST:
726
90.5k
#endif
727
90.5k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
#ifdef WITH_SIMD
729
40.8k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
0
#endif
731
0
      fdct->convsamp = convsamp;
732
#ifdef WITH_SIMD
733
40.8k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
0
#endif
735
0
      fdct->quantize = quantize;
736
90.5k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
4.20k
  case JDCT_FLOAT:
740
#ifdef WITH_SIMD
741
4.20k
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
0
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
#ifdef WITH_SIMD
745
4.20k
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
0
#endif
747
0
      fdct->float_quantize = quantize_float;
748
4.20k
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
94.7k
  }
754
755
  /* Allocate workspace memory */
756
94.7k
#ifdef DCT_FLOAT_SUPPORTED
757
94.7k
  if (cinfo->dct_method == JDCT_FLOAT)
758
4.20k
    fdct->float_workspace = (FAST_FLOAT *)
759
4.20k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
4.20k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
90.5k
  else
762
90.5k
#endif
763
90.5k
    fdct->workspace = (DCTELEM *)
764
90.5k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
90.5k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
473k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
378k
    fdct->divisors[i] = NULL;
770
378k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
378k
#endif
773
378k
  }
774
94.7k
}
jinit_forward_dct
Line
Count
Source
672
45.0k
{
673
45.0k
  my_fdct_ptr fdct;
674
45.0k
  int i;
675
676
45.0k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
45.0k
  fdct = (my_fdct_ptr)
680
45.0k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
45.0k
                                sizeof(my_fdct_controller));
682
45.0k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
45.0k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
45.0k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
32.3k
  case JDCT_ISLOW:
689
32.3k
    fdct->pub._forward_DCT = forward_DCT;
690
32.3k
#ifdef WITH_SIMD
691
32.3k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
0
#endif
693
0
      fdct->dct = _jpeg_fdct_islow;
694
32.3k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
8.50k
  case JDCT_IFAST:
698
8.50k
    fdct->pub._forward_DCT = forward_DCT;
699
8.50k
#ifdef WITH_SIMD
700
8.50k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
0
#endif
702
0
      fdct->dct = _jpeg_fdct_ifast;
703
8.50k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
4.20k
  case JDCT_FLOAT:
707
4.20k
    fdct->pub._forward_DCT = forward_DCT_float;
708
4.20k
#ifdef WITH_SIMD
709
4.20k
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
0
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
4.20k
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
45.0k
  }
718
719
  /* ...then the supporting stages. */
720
45.0k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
32.3k
  case JDCT_ISLOW:
723
32.3k
#endif
724
32.3k
#ifdef DCT_IFAST_SUPPORTED
725
40.8k
  case JDCT_IFAST:
726
40.8k
#endif
727
40.8k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
40.8k
#ifdef WITH_SIMD
729
40.8k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
0
#endif
731
0
      fdct->convsamp = convsamp;
732
40.8k
#ifdef WITH_SIMD
733
40.8k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
0
#endif
735
0
      fdct->quantize = quantize;
736
40.8k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
4.20k
  case JDCT_FLOAT:
740
4.20k
#ifdef WITH_SIMD
741
4.20k
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
0
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
4.20k
#ifdef WITH_SIMD
745
4.20k
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
0
#endif
747
0
      fdct->float_quantize = quantize_float;
748
4.20k
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
45.0k
  }
754
755
  /* Allocate workspace memory */
756
45.0k
#ifdef DCT_FLOAT_SUPPORTED
757
45.0k
  if (cinfo->dct_method == JDCT_FLOAT)
758
4.20k
    fdct->float_workspace = (FAST_FLOAT *)
759
4.20k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
4.20k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
40.8k
  else
762
40.8k
#endif
763
40.8k
    fdct->workspace = (DCTELEM *)
764
40.8k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
40.8k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
225k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
180k
    fdct->divisors[i] = NULL;
770
180k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
180k
#endif
773
180k
  }
774
45.0k
}
j12init_forward_dct
Line
Count
Source
672
49.6k
{
673
49.6k
  my_fdct_ptr fdct;
674
49.6k
  int i;
675
676
49.6k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
49.6k
  fdct = (my_fdct_ptr)
680
49.6k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
49.6k
                                sizeof(my_fdct_controller));
682
49.6k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
49.6k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
49.6k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
42.5k
  case JDCT_ISLOW:
689
42.5k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
#endif
693
42.5k
      fdct->dct = _jpeg_fdct_islow;
694
42.5k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
7.11k
  case JDCT_IFAST:
698
7.11k
    fdct->pub._forward_DCT = forward_DCT;
699
#ifdef WITH_SIMD
700
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
#endif
702
7.11k
      fdct->dct = _jpeg_fdct_ifast;
703
7.11k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
0
  case JDCT_FLOAT:
707
0
    fdct->pub._forward_DCT = forward_DCT_float;
708
#ifdef WITH_SIMD
709
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
0
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
49.6k
  }
718
719
  /* ...then the supporting stages. */
720
49.6k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
42.5k
  case JDCT_ISLOW:
723
42.5k
#endif
724
42.5k
#ifdef DCT_IFAST_SUPPORTED
725
49.6k
  case JDCT_IFAST:
726
49.6k
#endif
727
49.6k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
#ifdef WITH_SIMD
729
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
#endif
731
49.6k
      fdct->convsamp = convsamp;
732
#ifdef WITH_SIMD
733
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
#endif
735
49.6k
      fdct->quantize = quantize;
736
49.6k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
0
  case JDCT_FLOAT:
740
#ifdef WITH_SIMD
741
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
#ifdef WITH_SIMD
745
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
#endif
747
0
      fdct->float_quantize = quantize_float;
748
0
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
49.6k
  }
754
755
  /* Allocate workspace memory */
756
49.6k
#ifdef DCT_FLOAT_SUPPORTED
757
49.6k
  if (cinfo->dct_method == JDCT_FLOAT)
758
0
    fdct->float_workspace = (FAST_FLOAT *)
759
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
49.6k
  else
762
49.6k
#endif
763
49.6k
    fdct->workspace = (DCTELEM *)
764
49.6k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
49.6k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
248k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
198k
    fdct->divisors[i] = NULL;
770
198k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
198k
#endif
773
198k
  }
774
49.6k
}
775
776
#endif /* defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) ||
777
          defined(DCT_FLOAT_SUPPORTED) */