Coverage Report

Created: 2026-06-10 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.main/src/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024-2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#ifdef WITH_SIMD
24
#include "../simd/jsimddct.h"
25
#endif
26
#ifdef WITH_PROFILE
27
#include "tjutil.h"
28
#endif
29
30
31
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) || \
32
    defined(DCT_FLOAT_SUPPORTED)
33
34
/* Private subobject for this module */
35
36
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
37
38
typedef struct {
39
  struct jpeg_forward_dct pub;  /* public fields */
40
41
  /* Pointer to the DCT routine actually in use */
42
  forward_DCT_method_ptr dct;
43
  convsamp_method_ptr convsamp;
44
  quantize_method_ptr quantize;
45
46
  /* The actual post-DCT divisors --- not identical to the quant table
47
   * entries, because of scaling (especially for an unnormalized DCT).
48
   * Each table is given in normal array order.
49
   */
50
  DCTELEM *divisors[NUM_QUANT_TBLS];
51
52
  /* work area for FDCT subroutine */
53
  DCTELEM *workspace;
54
55
#ifdef DCT_FLOAT_SUPPORTED
56
  /* Same as above for the floating-point case. */
57
  float_DCT_method_ptr float_dct;
58
  float_convsamp_method_ptr float_convsamp;
59
  float_quantize_method_ptr float_quantize;
60
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
61
  FAST_FLOAT *float_workspace;
62
#endif
63
} my_fdct_controller;
64
65
typedef my_fdct_controller *my_fdct_ptr;
66
67
68
#if BITS_IN_JSAMPLE == 8
69
70
/*
71
 * Find the highest bit in an integer through binary search.
72
 */
73
74
LOCAL(int)
75
flss(UINT16 val)
76
10.4M
{
77
10.4M
  int bit;
78
79
10.4M
  bit = 16;
80
81
10.4M
  if (!val)
82
0
    return 0;
83
84
10.4M
  if (!(val & 0xff00)) {
85
5.24M
    bit -= 8;
86
5.24M
    val <<= 8;
87
5.24M
  }
88
10.4M
  if (!(val & 0xf000)) {
89
7.11M
    bit -= 4;
90
7.11M
    val <<= 4;
91
7.11M
  }
92
10.4M
  if (!(val & 0xc000)) {
93
3.64M
    bit -= 2;
94
3.64M
    val <<= 2;
95
3.64M
  }
96
10.4M
  if (!(val & 0x8000)) {
97
4.58M
    bit -= 1;
98
4.58M
    val <<= 1;
99
4.58M
  }
100
101
10.4M
  return bit;
102
10.4M
}
103
104
105
/*
106
 * Compute values to do a division using reciprocal.
107
 *
108
 * This implementation is based on an algorithm described in
109
 *   "Optimizing subroutines in assembly language:
110
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
111
 * More information about the basic algorithm can be found in
112
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
113
 *
114
 * The basic idea is to replace x/d by x * d^-1. In order to store
115
 * d^-1 with enough precision we shift it left a few places. It turns
116
 * out that this algoright gives just enough precision, and also fits
117
 * into DCTELEM:
118
 *
119
 *   b = (the number of significant bits in divisor) - 1
120
 *   r = (word size) + b
121
 *   f = 2^r / divisor
122
 *
123
 * f will not be an integer for most cases, so we need to compensate
124
 * for the rounding error introduced:
125
 *
126
 *   no fractional part:
127
 *
128
 *       result = input >> r
129
 *
130
 *   fractional part of f < 0.5:
131
 *
132
 *       round f down to nearest integer
133
 *       result = ((input + 1) * f) >> r
134
 *
135
 *   fractional part of f > 0.5:
136
 *
137
 *       round f up to nearest integer
138
 *       result = (input * f) >> r
139
 *
140
 * This is the original algorithm that gives truncated results. But we
141
 * want properly rounded results, so we replace "input" with
142
 * "input + divisor/2".
143
 *
144
 * In order to allow SIMD implementations we also tweak the values to
145
 * allow the same calculation to be made at all times:
146
 *
147
 *   dctbl[0] = f rounded to nearest integer
148
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
149
 *   dctbl[2] = 1 << ((word size) * 2 - r)
150
 *   dctbl[3] = r - (word size)
151
 *
152
 * dctbl[2] is for stupid instruction sets where the shift operation
153
 * isn't member wise (e.g. MMX).
154
 *
155
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
156
 * is that most SIMD implementations have a "multiply and store top
157
 * half" operation.
158
 *
159
 * Lastly, we store each of the values in their own table instead
160
 * of in a consecutive manner, yet again in order to allow SIMD
161
 * routines.
162
 */
163
164
LOCAL(int)
165
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
166
10.5M
{
167
10.5M
  UDCTELEM2 fq, fr;
168
10.5M
  UDCTELEM c;
169
10.5M
  int b, r;
170
171
10.5M
  if (divisor <= 1) {
172
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
173
     * values will cause the C quantization algorithm to act like the
174
     * identity function.  Since only the C quantization algorithm is used in
175
     * these cases, the scale value is irrelevant.
176
     *
177
     * divisor == 0 can never happen in a normal program, because
178
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
179
     * abuse the API by manually modifying the exposed quantization table just
180
     * before calling jpeg_start_compress().  Thus, we effectively clamp
181
     * values < 1 here as well, to avoid dividing by 0.
182
     */
183
35.3k
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
184
35.3k
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
185
35.3k
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
186
35.3k
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
187
35.3k
    return 0;
188
35.3k
  }
189
190
10.4M
  b = flss(divisor) - 1;
191
10.4M
  r  = sizeof(DCTELEM) * 8 + b;
192
193
10.4M
  fq = ((UDCTELEM2)1 << r) / divisor;
194
10.4M
  fr = ((UDCTELEM2)1 << r) % divisor;
195
196
10.4M
  c = divisor / 2;                      /* for rounding */
197
198
10.4M
  if (fr == 0) {                        /* divisor is power of two */
199
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
200
1.68M
    fq >>= 1;
201
1.68M
    r--;
202
8.78M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
203
2.34M
    c++;
204
6.43M
  } else {                              /* fractional part is > 0.5 */
205
6.43M
    fq++;
206
6.43M
  }
207
208
10.4M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
209
10.4M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
210
10.4M
#ifdef WITH_SIMD
211
10.4M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
212
#else
213
  dtbl[DCTSIZE2 * 2] = 1;
214
#endif
215
10.4M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
216
217
10.4M
  if (r <= 16) return 0;
218
10.3M
  else return 1;
219
10.4M
}
220
221
#endif
222
223
224
/*
225
 * Initialize for a processing pass.
226
 * Verify that all referenced Q-tables are present, and set up
227
 * the divisor table for each one.
228
 * In the current implementation, DCT of all components is done during
229
 * the first pass, even if only some components will be output in the
230
 * first scan.  Hence all components should be examined here.
231
 */
232
233
METHODDEF(void)
234
start_pass_fdctmgr(j_compress_ptr cinfo)
235
79.1k
{
236
79.1k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
79.1k
  int ci, qtblno, i;
238
79.1k
  jpeg_component_info *compptr;
239
79.1k
  JQUANT_TBL *qtbl;
240
79.1k
  DCTELEM *dtbl;
241
242
273k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
194k
       ci++, compptr++) {
244
194k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
194k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
194k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
194k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
194k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
151k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
151k
      if (fdct->divisors[qtblno] == NULL) {
259
93.9k
        fdct->divisors[qtblno] = (DCTELEM *)
260
93.9k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
93.9k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
93.9k
      }
263
151k
      dtbl = fdct->divisors[qtblno];
264
9.86M
      for (i = 0; i < DCTSIZE2; i++) {
265
#if BITS_IN_JSAMPLE == 8
266
#ifdef WITH_SIMD
267
5.05M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
0
            fdct->quantize != quantize)
269
0
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
#endif
276
9.71M
      }
277
151k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
34.7k
    case JDCT_IFAST:
281
34.7k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
34.7k
#define CONST_BITS  14
289
34.7k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
34.7k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
34.7k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
34.7k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
34.7k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
34.7k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
34.7k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
34.7k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
34.7k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
34.7k
        };
300
34.7k
        SHIFT_TEMPS
301
302
34.7k
        if (fdct->divisors[qtblno] == NULL) {
303
24.0k
          fdct->divisors[qtblno] = (DCTELEM *)
304
24.0k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
24.0k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
24.0k
        }
307
34.7k
        dtbl = fdct->divisors[qtblno];
308
2.25M
        for (i = 0; i < DCTSIZE2; i++) {
309
#if BITS_IN_JSAMPLE == 8
310
#ifdef WITH_SIMD
311
1.28M
          if (!compute_reciprocal(
312
1.28M
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
1.28M
                                      (JLONG)aanscales[i]),
314
1.28M
                        CONST_BITS - 3), &dtbl[i]) &&
315
63.6k
              fdct->quantize != quantize)
316
2.12k
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
          dtbl[i] = (DCTELEM)
325
939k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
                                  (JLONG)aanscales[i]),
327
                    CONST_BITS - 3);
328
#endif
329
2.22M
        }
330
34.7k
      }
331
34.7k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
8.15k
    case JDCT_FLOAT:
335
8.15k
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
8.15k
        FAST_FLOAT *fdtbl;
345
8.15k
        int row, col;
346
8.15k
        static const double aanscalefactor[DCTSIZE] = {
347
8.15k
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
8.15k
          1.0, 0.785694958, 0.541196100, 0.275899379
349
8.15k
        };
350
351
8.15k
        if (fdct->float_divisors[qtblno] == NULL) {
352
6.04k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
6.04k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
6.04k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
6.04k
        }
356
8.15k
        fdtbl = fdct->float_divisors[qtblno];
357
8.15k
        i = 0;
358
73.3k
        for (row = 0; row < DCTSIZE; row++) {
359
587k
          for (col = 0; col < DCTSIZE; col++) {
360
521k
            fdtbl[i] = (FAST_FLOAT)
361
521k
              (1.0 / (((double)qtbl->quantval[i] *
362
521k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
521k
            i++;
364
521k
          }
365
65.2k
        }
366
8.15k
      }
367
8.15k
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
194k
    }
373
194k
  }
374
79.1k
}
jcdctmgr-8.c:start_pass_fdctmgr
Line
Count
Source
235
44.6k
{
236
44.6k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
44.6k
  int ci, qtblno, i;
238
44.6k
  jpeg_component_info *compptr;
239
44.6k
  JQUANT_TBL *qtbl;
240
44.6k
  DCTELEM *dtbl;
241
242
151k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
107k
       ci++, compptr++) {
244
107k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
107k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
107k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
107k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
107k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
79.0k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
79.0k
      if (fdct->divisors[qtblno] == NULL) {
259
49.9k
        fdct->divisors[qtblno] = (DCTELEM *)
260
49.9k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
49.9k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
49.9k
      }
263
79.0k
      dtbl = fdct->divisors[qtblno];
264
5.13M
      for (i = 0; i < DCTSIZE2; i++) {
265
5.05M
#if BITS_IN_JSAMPLE == 8
266
5.05M
#ifdef WITH_SIMD
267
5.05M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
0
            fdct->quantize != quantize)
269
0
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
#endif
276
5.05M
      }
277
79.0k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
20.0k
    case JDCT_IFAST:
281
20.0k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
20.0k
#define CONST_BITS  14
289
20.0k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
20.0k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
20.0k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
20.0k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
20.0k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
20.0k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
20.0k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
20.0k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
20.0k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
20.0k
        };
300
20.0k
        SHIFT_TEMPS
301
302
20.0k
        if (fdct->divisors[qtblno] == NULL) {
303
14.2k
          fdct->divisors[qtblno] = (DCTELEM *)
304
14.2k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
14.2k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
14.2k
        }
307
20.0k
        dtbl = fdct->divisors[qtblno];
308
1.30M
        for (i = 0; i < DCTSIZE2; i++) {
309
1.28M
#if BITS_IN_JSAMPLE == 8
310
1.28M
#ifdef WITH_SIMD
311
1.28M
          if (!compute_reciprocal(
312
1.28M
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
1.28M
                                      (JLONG)aanscales[i]),
314
1.28M
                        CONST_BITS - 3), &dtbl[i]) &&
315
63.6k
              fdct->quantize != quantize)
316
2.12k
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
          dtbl[i] = (DCTELEM)
325
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
                                  (JLONG)aanscales[i]),
327
                    CONST_BITS - 3);
328
#endif
329
1.28M
        }
330
20.0k
      }
331
20.0k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
8.15k
    case JDCT_FLOAT:
335
8.15k
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
8.15k
        FAST_FLOAT *fdtbl;
345
8.15k
        int row, col;
346
8.15k
        static const double aanscalefactor[DCTSIZE] = {
347
8.15k
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
8.15k
          1.0, 0.785694958, 0.541196100, 0.275899379
349
8.15k
        };
350
351
8.15k
        if (fdct->float_divisors[qtblno] == NULL) {
352
6.04k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
6.04k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
6.04k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
6.04k
        }
356
8.15k
        fdtbl = fdct->float_divisors[qtblno];
357
8.15k
        i = 0;
358
73.3k
        for (row = 0; row < DCTSIZE; row++) {
359
587k
          for (col = 0; col < DCTSIZE; col++) {
360
521k
            fdtbl[i] = (FAST_FLOAT)
361
521k
              (1.0 / (((double)qtbl->quantval[i] *
362
521k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
521k
            i++;
364
521k
          }
365
65.2k
        }
366
8.15k
      }
367
8.15k
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
107k
    }
373
107k
  }
374
44.6k
}
jcdctmgr-12.c:start_pass_fdctmgr
Line
Count
Source
235
34.4k
{
236
34.4k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
237
34.4k
  int ci, qtblno, i;
238
34.4k
  jpeg_component_info *compptr;
239
34.4k
  JQUANT_TBL *qtbl;
240
34.4k
  DCTELEM *dtbl;
241
242
121k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
243
87.4k
       ci++, compptr++) {
244
87.4k
    qtblno = compptr->quant_tbl_no;
245
    /* Make sure specified quantization table is present */
246
87.4k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
247
87.4k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
248
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
249
87.4k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
250
    /* Compute divisors for this quant table */
251
    /* We may do this more than once for same table, but it's not a big deal */
252
87.4k
    switch (cinfo->dct_method) {
253
0
#ifdef DCT_ISLOW_SUPPORTED
254
72.7k
    case JDCT_ISLOW:
255
      /* For LL&M IDCT method, divisors are equal to raw quantization
256
       * coefficients multiplied by 8 (to counteract scaling).
257
       */
258
72.7k
      if (fdct->divisors[qtblno] == NULL) {
259
44.0k
        fdct->divisors[qtblno] = (DCTELEM *)
260
44.0k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
261
44.0k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
262
44.0k
      }
263
72.7k
      dtbl = fdct->divisors[qtblno];
264
4.73M
      for (i = 0; i < DCTSIZE2; i++) {
265
#if BITS_IN_JSAMPLE == 8
266
#ifdef WITH_SIMD
267
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
268
            fdct->quantize != quantize)
269
          fdct->quantize = quantize;
270
#else
271
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
272
#endif
273
#else
274
4.65M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
275
4.65M
#endif
276
4.65M
      }
277
72.7k
      break;
278
0
#endif
279
0
#ifdef DCT_IFAST_SUPPORTED
280
14.6k
    case JDCT_IFAST:
281
14.6k
      {
282
        /* For AA&N IDCT method, divisors are equal to quantization
283
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
284
         *   scalefactor[0] = 1
285
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
286
         * We apply a further scale factor of 8.
287
         */
288
14.6k
#define CONST_BITS  14
289
14.6k
        static const INT16 aanscales[DCTSIZE2] = {
290
          /* precomputed values scaled up by 14 bits */
291
14.6k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
292
14.6k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
293
14.6k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
294
14.6k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
295
14.6k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
296
14.6k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
297
14.6k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
298
14.6k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
299
14.6k
        };
300
14.6k
        SHIFT_TEMPS
301
302
14.6k
        if (fdct->divisors[qtblno] == NULL) {
303
9.78k
          fdct->divisors[qtblno] = (DCTELEM *)
304
9.78k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
305
9.78k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
306
9.78k
        }
307
14.6k
        dtbl = fdct->divisors[qtblno];
308
953k
        for (i = 0; i < DCTSIZE2; i++) {
309
#if BITS_IN_JSAMPLE == 8
310
#ifdef WITH_SIMD
311
          if (!compute_reciprocal(
312
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
313
                                      (JLONG)aanscales[i]),
314
                        CONST_BITS - 3), &dtbl[i]) &&
315
              fdct->quantize != quantize)
316
            fdct->quantize = quantize;
317
#else
318
          compute_reciprocal(
319
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
320
                                  (JLONG)aanscales[i]),
321
                    CONST_BITS-3), &dtbl[i]);
322
#endif
323
#else
324
939k
          dtbl[i] = (DCTELEM)
325
939k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
326
939k
                                  (JLONG)aanscales[i]),
327
939k
                    CONST_BITS - 3);
328
939k
#endif
329
939k
        }
330
14.6k
      }
331
14.6k
      break;
332
0
#endif
333
0
#ifdef DCT_FLOAT_SUPPORTED
334
0
    case JDCT_FLOAT:
335
0
      {
336
        /* For float AA&N IDCT method, divisors are equal to quantization
337
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
338
         *   scalefactor[0] = 1
339
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
340
         * We apply a further scale factor of 8.
341
         * What's actually stored is 1/divisor so that the inner loop can
342
         * use a multiplication rather than a division.
343
         */
344
0
        FAST_FLOAT *fdtbl;
345
0
        int row, col;
346
0
        static const double aanscalefactor[DCTSIZE] = {
347
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
348
0
          1.0, 0.785694958, 0.541196100, 0.275899379
349
0
        };
350
351
0
        if (fdct->float_divisors[qtblno] == NULL) {
352
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
354
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
355
0
        }
356
0
        fdtbl = fdct->float_divisors[qtblno];
357
0
        i = 0;
358
0
        for (row = 0; row < DCTSIZE; row++) {
359
0
          for (col = 0; col < DCTSIZE; col++) {
360
0
            fdtbl[i] = (FAST_FLOAT)
361
0
              (1.0 / (((double)qtbl->quantval[i] *
362
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363
0
            i++;
364
0
          }
365
0
        }
366
0
      }
367
0
      break;
368
0
#endif
369
0
    default:
370
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
371
0
      break;
372
87.4k
    }
373
87.4k
  }
374
34.4k
}
375
376
377
/*
378
 * Load data into workspace, applying unsigned->signed conversion.
379
 */
380
381
METHODDEF(void)
382
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
383
38.7M
{
384
38.7M
  register DCTELEM *workspaceptr;
385
38.7M
  register _JSAMPROW elemptr;
386
38.7M
  register int elemr;
387
388
38.7M
  workspaceptr = workspace;
389
348M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
390
309M
    elemptr = sample_data[elemr] + start_col;
391
392
309M
#if DCTSIZE == 8                /* unroll the inner loop */
393
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
394
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
395
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
396
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
398
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
399
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
400
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
401
#else
402
    {
403
      register int elemc;
404
      for (elemc = DCTSIZE; elemc > 0; elemc--)
405
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
    }
407
#endif
408
309M
  }
409
38.7M
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp
jcdctmgr-12.c:convsamp
Line
Count
Source
383
38.7M
{
384
38.7M
  register DCTELEM *workspaceptr;
385
38.7M
  register _JSAMPROW elemptr;
386
38.7M
  register int elemr;
387
388
38.7M
  workspaceptr = workspace;
389
348M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
390
309M
    elemptr = sample_data[elemr] + start_col;
391
392
309M
#if DCTSIZE == 8                /* unroll the inner loop */
393
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
394
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
395
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
396
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
398
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
399
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
400
309M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
401
#else
402
    {
403
      register int elemc;
404
      for (elemc = DCTSIZE; elemc > 0; elemc--)
405
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
    }
407
#endif
408
309M
  }
409
38.7M
}
410
411
412
/*
413
 * Quantize/descale the coefficients, and store into coef_blocks[].
414
 */
415
416
METHODDEF(void)
417
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
418
46.7M
{
419
46.7M
  int i;
420
46.7M
  DCTELEM temp;
421
46.7M
  JCOEFPTR output_ptr = coef_block;
422
423
#if BITS_IN_JSAMPLE == 8
424
425
  UDCTELEM recip, corr;
426
  int shift;
427
  UDCTELEM2 product;
428
429
519M
  for (i = 0; i < DCTSIZE2; i++) {
430
511M
    temp = workspace[i];
431
511M
    recip = divisors[i + DCTSIZE2 * 0];
432
511M
    corr =  divisors[i + DCTSIZE2 * 1];
433
511M
    shift = divisors[i + DCTSIZE2 * 3];
434
435
511M
    if (temp < 0) {
436
33.4M
      temp = -temp;
437
33.4M
      product = (UDCTELEM2)(temp + corr) * recip;
438
33.4M
      product >>= shift + sizeof(DCTELEM) * 8;
439
33.4M
      temp = (DCTELEM)product;
440
33.4M
      temp = -temp;
441
478M
    } else {
442
478M
      product = (UDCTELEM2)(temp + corr) * recip;
443
478M
      product >>= shift + sizeof(DCTELEM) * 8;
444
478M
      temp = (DCTELEM)product;
445
478M
    }
446
511M
    output_ptr[i] = (JCOEF)temp;
447
511M
  }
448
449
#else
450
451
  register DCTELEM qval;
452
453
2.51G
  for (i = 0; i < DCTSIZE2; i++) {
454
2.47G
    qval = divisors[i];
455
2.47G
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
2.47G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
2.47G
#endif
473
2.47G
    if (temp < 0) {
474
287M
      temp = -temp;
475
287M
      temp += qval >> 1;        /* for rounding */
476
287M
      DIVIDE_BY(temp, qval);
477
287M
      temp = -temp;
478
2.18G
    } else {
479
2.18G
      temp += qval >> 1;        /* for rounding */
480
2.18G
      DIVIDE_BY(temp, qval);
481
2.18G
    }
482
2.47G
    output_ptr[i] = (JCOEF)temp;
483
2.47G
  }
484
485
#endif
486
487
46.7M
}
jcdctmgr-8.c:quantize
Line
Count
Source
418
7.99M
{
419
7.99M
  int i;
420
7.99M
  DCTELEM temp;
421
7.99M
  JCOEFPTR output_ptr = coef_block;
422
423
7.99M
#if BITS_IN_JSAMPLE == 8
424
425
7.99M
  UDCTELEM recip, corr;
426
7.99M
  int shift;
427
7.99M
  UDCTELEM2 product;
428
429
519M
  for (i = 0; i < DCTSIZE2; i++) {
430
511M
    temp = workspace[i];
431
511M
    recip = divisors[i + DCTSIZE2 * 0];
432
511M
    corr =  divisors[i + DCTSIZE2 * 1];
433
511M
    shift = divisors[i + DCTSIZE2 * 3];
434
435
511M
    if (temp < 0) {
436
33.4M
      temp = -temp;
437
33.4M
      product = (UDCTELEM2)(temp + corr) * recip;
438
33.4M
      product >>= shift + sizeof(DCTELEM) * 8;
439
33.4M
      temp = (DCTELEM)product;
440
33.4M
      temp = -temp;
441
478M
    } else {
442
478M
      product = (UDCTELEM2)(temp + corr) * recip;
443
478M
      product >>= shift + sizeof(DCTELEM) * 8;
444
478M
      temp = (DCTELEM)product;
445
478M
    }
446
511M
    output_ptr[i] = (JCOEF)temp;
447
511M
  }
448
449
#else
450
451
  register DCTELEM qval;
452
453
  for (i = 0; i < DCTSIZE2; i++) {
454
    qval = divisors[i];
455
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
#endif
473
    if (temp < 0) {
474
      temp = -temp;
475
      temp += qval >> 1;        /* for rounding */
476
      DIVIDE_BY(temp, qval);
477
      temp = -temp;
478
    } else {
479
      temp += qval >> 1;        /* for rounding */
480
      DIVIDE_BY(temp, qval);
481
    }
482
    output_ptr[i] = (JCOEF)temp;
483
  }
484
485
#endif
486
487
7.99M
}
jcdctmgr-12.c:quantize
Line
Count
Source
418
38.7M
{
419
38.7M
  int i;
420
38.7M
  DCTELEM temp;
421
38.7M
  JCOEFPTR output_ptr = coef_block;
422
423
#if BITS_IN_JSAMPLE == 8
424
425
  UDCTELEM recip, corr;
426
  int shift;
427
  UDCTELEM2 product;
428
429
  for (i = 0; i < DCTSIZE2; i++) {
430
    temp = workspace[i];
431
    recip = divisors[i + DCTSIZE2 * 0];
432
    corr =  divisors[i + DCTSIZE2 * 1];
433
    shift = divisors[i + DCTSIZE2 * 3];
434
435
    if (temp < 0) {
436
      temp = -temp;
437
      product = (UDCTELEM2)(temp + corr) * recip;
438
      product >>= shift + sizeof(DCTELEM) * 8;
439
      temp = (DCTELEM)product;
440
      temp = -temp;
441
    } else {
442
      product = (UDCTELEM2)(temp + corr) * recip;
443
      product >>= shift + sizeof(DCTELEM) * 8;
444
      temp = (DCTELEM)product;
445
    }
446
    output_ptr[i] = (JCOEF)temp;
447
  }
448
449
#else
450
451
38.7M
  register DCTELEM qval;
452
453
2.51G
  for (i = 0; i < DCTSIZE2; i++) {
454
2.47G
    qval = divisors[i];
455
2.47G
    temp = workspace[i];
456
    /* Divide the coefficient value by qval, ensuring proper rounding.
457
     * Since C does not specify the direction of rounding for negative
458
     * quotients, we have to force the dividend positive for portability.
459
     *
460
     * In most files, at least half of the output values will be zero
461
     * (at default quantization settings, more like three-quarters...)
462
     * so we should ensure that this case is fast.  On many machines,
463
     * a comparison is enough cheaper than a divide to make a special test
464
     * a win.  Since both inputs will be nonnegative, we need only test
465
     * for a < b to discover whether a/b is 0.
466
     * If your machine's division is fast enough, define FAST_DIVIDE.
467
     */
468
#ifdef FAST_DIVIDE
469
#define DIVIDE_BY(a, b)  a /= b
470
#else
471
2.47G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
472
2.47G
#endif
473
2.47G
    if (temp < 0) {
474
287M
      temp = -temp;
475
287M
      temp += qval >> 1;        /* for rounding */
476
287M
      DIVIDE_BY(temp, qval);
477
287M
      temp = -temp;
478
2.18G
    } else {
479
2.18G
      temp += qval >> 1;        /* for rounding */
480
2.18G
      DIVIDE_BY(temp, qval);
481
2.18G
    }
482
2.47G
    output_ptr[i] = (JCOEF)temp;
483
2.47G
  }
484
485
38.7M
#endif
486
487
38.7M
}
488
489
490
/*
491
 * Perform forward DCT on one or more blocks of a component.
492
 *
493
 * The input samples are taken from the sample_data[] array starting at
494
 * position start_row/start_col, and moving to the right for any additional
495
 * blocks. The quantized coefficients are returned in coef_blocks[].
496
 */
497
498
METHODDEF(void)
499
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
500
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
501
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
502
/* This version is used for integer DCT implementations. */
503
72.7M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
72.7M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
72.7M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
72.7M
  DCTELEM *workspace;
508
72.7M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
72.7M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
72.7M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
72.7M
  quantize_method_ptr do_quantize = fdct->quantize;
514
72.7M
  workspace = fdct->workspace;
515
516
72.7M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
185M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
112M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
112M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
112M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
112M
  }
549
72.7M
}
jcdctmgr-8.c:forward_DCT
Line
Count
Source
503
52.3M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
52.3M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
52.3M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
52.3M
  DCTELEM *workspace;
508
52.3M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
52.3M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
52.3M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
52.3M
  quantize_method_ptr do_quantize = fdct->quantize;
514
52.3M
  workspace = fdct->workspace;
515
516
52.3M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
126M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
73.6M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
73.6M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
73.6M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
73.6M
  }
549
52.3M
}
jcdctmgr-12.c:forward_DCT
Line
Count
Source
503
20.3M
{
504
  /* This routine is heavily used, so it's worth coding it tightly. */
505
20.3M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
506
20.3M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
507
20.3M
  DCTELEM *workspace;
508
20.3M
  JDIMENSION bi;
509
510
  /* Make sure the compiler doesn't look up these every pass */
511
20.3M
  forward_DCT_method_ptr do_dct = fdct->dct;
512
20.3M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
513
20.3M
  quantize_method_ptr do_quantize = fdct->quantize;
514
20.3M
  workspace = fdct->workspace;
515
516
20.3M
  sample_data += start_row;     /* fold in the vertical offset once */
517
518
59.1M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
519
    /* Load data into workspace, applying unsigned->signed conversion */
520
#ifdef WITH_PROFILE
521
    cinfo->master->start = getTime();
522
#endif
523
38.7M
    (*do_convsamp) (sample_data, start_col, workspace);
524
#ifdef WITH_PROFILE
525
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
526
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
527
#endif
528
529
    /* Perform the DCT */
530
#ifdef WITH_PROFILE
531
    cinfo->master->start = getTime();
532
#endif
533
38.7M
    (*do_dct) (workspace);
534
#ifdef WITH_PROFILE
535
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
536
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
537
#endif
538
539
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
540
#ifdef WITH_PROFILE
541
    cinfo->master->start = getTime();
542
#endif
543
38.7M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
544
#ifdef WITH_PROFILE
545
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
546
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
547
#endif
548
38.7M
  }
549
20.3M
}
550
551
552
#ifdef DCT_FLOAT_SUPPORTED
553
554
METHODDEF(void)
555
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
556
               FAST_FLOAT *workspace)
557
0
{
558
0
  register FAST_FLOAT *workspaceptr;
559
0
  register _JSAMPROW elemptr;
560
0
  register int elemr;
561
562
0
  workspaceptr = workspace;
563
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
564
0
    elemptr = sample_data[elemr] + start_col;
565
0
#if DCTSIZE == 8                /* unroll the inner loop */
566
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
567
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
568
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
569
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
570
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
571
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
572
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
573
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
574
#else
575
    {
576
      register int elemc;
577
      for (elemc = DCTSIZE; elemc > 0; elemc--)
578
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
579
    }
580
#endif
581
0
  }
582
0
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp_float
Unexecuted instantiation: jcdctmgr-12.c:convsamp_float
583
584
585
METHODDEF(void)
586
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
587
               FAST_FLOAT *workspace)
588
0
{
589
0
  register FAST_FLOAT temp;
590
0
  register int i;
591
0
  register JCOEFPTR output_ptr = coef_block;
592
593
0
  for (i = 0; i < DCTSIZE2; i++) {
594
    /* Apply the quantization and scaling factor */
595
0
    temp = workspace[i] * divisors[i];
596
597
    /* Round to nearest integer.
598
     * Since C does not specify the direction of rounding for negative
599
     * quotients, we have to force the dividend positive for portability.
600
     * The maximum coefficient size is +-16K (for 12-bit data), so this
601
     * code should work for either 16-bit or 32-bit ints.
602
     */
603
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
604
0
  }
605
0
}
Unexecuted instantiation: jcdctmgr-8.c:quantize_float
Unexecuted instantiation: jcdctmgr-12.c:quantize_float
606
607
608
METHODDEF(void)
609
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
610
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
611
                  JDIMENSION start_row, JDIMENSION start_col,
612
                  JDIMENSION num_blocks)
613
/* This version is used for floating-point DCT implementations. */
614
5.04M
{
615
  /* This routine is heavily used, so it's worth coding it tightly. */
616
5.04M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
617
5.04M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
618
5.04M
  FAST_FLOAT *workspace;
619
5.04M
  JDIMENSION bi;
620
621
622
  /* Make sure the compiler doesn't look up these every pass */
623
5.04M
  float_DCT_method_ptr do_dct = fdct->float_dct;
624
5.04M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
625
5.04M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
626
5.04M
  workspace = fdct->float_workspace;
627
628
5.04M
  sample_data += start_row;     /* fold in the vertical offset once */
629
630
11.5M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
631
    /* Load data into workspace, applying unsigned->signed conversion */
632
#ifdef WITH_PROFILE
633
    cinfo->master->start = getTime();
634
#endif
635
6.54M
    (*do_convsamp) (sample_data, start_col, workspace);
636
#ifdef WITH_PROFILE
637
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
638
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
639
#endif
640
641
    /* Perform the DCT */
642
#ifdef WITH_PROFILE
643
    cinfo->master->start = getTime();
644
#endif
645
6.54M
    (*do_dct) (workspace);
646
#ifdef WITH_PROFILE
647
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
648
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
649
#endif
650
651
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
652
#ifdef WITH_PROFILE
653
    cinfo->master->start = getTime();
654
#endif
655
6.54M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
656
#ifdef WITH_PROFILE
657
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
658
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
659
#endif
660
6.54M
  }
661
5.04M
}
jcdctmgr-8.c:forward_DCT_float
Line
Count
Source
614
5.04M
{
615
  /* This routine is heavily used, so it's worth coding it tightly. */
616
5.04M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
617
5.04M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
618
5.04M
  FAST_FLOAT *workspace;
619
5.04M
  JDIMENSION bi;
620
621
622
  /* Make sure the compiler doesn't look up these every pass */
623
5.04M
  float_DCT_method_ptr do_dct = fdct->float_dct;
624
5.04M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
625
5.04M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
626
5.04M
  workspace = fdct->float_workspace;
627
628
5.04M
  sample_data += start_row;     /* fold in the vertical offset once */
629
630
11.5M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
631
    /* Load data into workspace, applying unsigned->signed conversion */
632
#ifdef WITH_PROFILE
633
    cinfo->master->start = getTime();
634
#endif
635
6.54M
    (*do_convsamp) (sample_data, start_col, workspace);
636
#ifdef WITH_PROFILE
637
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
638
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
639
#endif
640
641
    /* Perform the DCT */
642
#ifdef WITH_PROFILE
643
    cinfo->master->start = getTime();
644
#endif
645
6.54M
    (*do_dct) (workspace);
646
#ifdef WITH_PROFILE
647
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
648
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
649
#endif
650
651
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
652
#ifdef WITH_PROFILE
653
    cinfo->master->start = getTime();
654
#endif
655
6.54M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
656
#ifdef WITH_PROFILE
657
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
658
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
659
#endif
660
6.54M
  }
661
5.04M
}
Unexecuted instantiation: jcdctmgr-12.c:forward_DCT_float
662
663
#endif /* DCT_FLOAT_SUPPORTED */
664
665
666
/*
667
 * Initialize FDCT manager.
668
 */
669
670
GLOBAL(void)
671
_jinit_forward_dct(j_compress_ptr cinfo)
672
93.6k
{
673
93.6k
  my_fdct_ptr fdct;
674
93.6k
  int i;
675
676
93.6k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
93.6k
  fdct = (my_fdct_ptr)
680
93.6k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
93.6k
                                sizeof(my_fdct_controller));
682
93.6k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
93.6k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
93.6k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
74.2k
  case JDCT_ISLOW:
689
74.2k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
32.2k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
0
#endif
693
41.9k
      fdct->dct = _jpeg_fdct_islow;
694
74.2k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
15.4k
  case JDCT_IFAST:
698
15.4k
    fdct->pub._forward_DCT = forward_DCT;
699
#ifdef WITH_SIMD
700
8.47k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
0
#endif
702
7.00k
      fdct->dct = _jpeg_fdct_ifast;
703
15.4k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
3.93k
  case JDCT_FLOAT:
707
3.93k
    fdct->pub._forward_DCT = forward_DCT_float;
708
#ifdef WITH_SIMD
709
3.93k
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
0
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
3.93k
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
93.6k
  }
718
719
  /* ...then the supporting stages. */
720
93.6k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
74.2k
  case JDCT_ISLOW:
723
74.2k
#endif
724
74.2k
#ifdef DCT_IFAST_SUPPORTED
725
89.6k
  case JDCT_IFAST:
726
89.6k
#endif
727
89.6k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
#ifdef WITH_SIMD
729
40.7k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
0
#endif
731
0
      fdct->convsamp = convsamp;
732
#ifdef WITH_SIMD
733
40.7k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
0
#endif
735
0
      fdct->quantize = quantize;
736
89.6k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
3.93k
  case JDCT_FLOAT:
740
#ifdef WITH_SIMD
741
3.93k
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
0
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
#ifdef WITH_SIMD
745
3.93k
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
0
#endif
747
0
      fdct->float_quantize = quantize_float;
748
3.93k
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
93.6k
  }
754
755
  /* Allocate workspace memory */
756
93.6k
#ifdef DCT_FLOAT_SUPPORTED
757
93.6k
  if (cinfo->dct_method == JDCT_FLOAT)
758
3.93k
    fdct->float_workspace = (FAST_FLOAT *)
759
3.93k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
3.93k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
89.6k
  else
762
89.6k
#endif
763
89.6k
    fdct->workspace = (DCTELEM *)
764
89.6k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
89.6k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
468k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
374k
    fdct->divisors[i] = NULL;
770
374k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
374k
#endif
773
374k
  }
774
93.6k
}
jinit_forward_dct
Line
Count
Source
672
44.6k
{
673
44.6k
  my_fdct_ptr fdct;
674
44.6k
  int i;
675
676
44.6k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
44.6k
  fdct = (my_fdct_ptr)
680
44.6k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
44.6k
                                sizeof(my_fdct_controller));
682
44.6k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
44.6k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
44.6k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
32.2k
  case JDCT_ISLOW:
689
32.2k
    fdct->pub._forward_DCT = forward_DCT;
690
32.2k
#ifdef WITH_SIMD
691
32.2k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
0
#endif
693
0
      fdct->dct = _jpeg_fdct_islow;
694
32.2k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
8.47k
  case JDCT_IFAST:
698
8.47k
    fdct->pub._forward_DCT = forward_DCT;
699
8.47k
#ifdef WITH_SIMD
700
8.47k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
0
#endif
702
0
      fdct->dct = _jpeg_fdct_ifast;
703
8.47k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
3.93k
  case JDCT_FLOAT:
707
3.93k
    fdct->pub._forward_DCT = forward_DCT_float;
708
3.93k
#ifdef WITH_SIMD
709
3.93k
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
0
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
3.93k
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
44.6k
  }
718
719
  /* ...then the supporting stages. */
720
44.6k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
32.2k
  case JDCT_ISLOW:
723
32.2k
#endif
724
32.2k
#ifdef DCT_IFAST_SUPPORTED
725
40.7k
  case JDCT_IFAST:
726
40.7k
#endif
727
40.7k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
40.7k
#ifdef WITH_SIMD
729
40.7k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
0
#endif
731
0
      fdct->convsamp = convsamp;
732
40.7k
#ifdef WITH_SIMD
733
40.7k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
0
#endif
735
0
      fdct->quantize = quantize;
736
40.7k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
3.93k
  case JDCT_FLOAT:
740
3.93k
#ifdef WITH_SIMD
741
3.93k
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
0
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
3.93k
#ifdef WITH_SIMD
745
3.93k
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
0
#endif
747
0
      fdct->float_quantize = quantize_float;
748
3.93k
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
44.6k
  }
754
755
  /* Allocate workspace memory */
756
44.6k
#ifdef DCT_FLOAT_SUPPORTED
757
44.6k
  if (cinfo->dct_method == JDCT_FLOAT)
758
3.93k
    fdct->float_workspace = (FAST_FLOAT *)
759
3.93k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
3.93k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
40.7k
  else
762
40.7k
#endif
763
40.7k
    fdct->workspace = (DCTELEM *)
764
40.7k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
40.7k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
223k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
178k
    fdct->divisors[i] = NULL;
770
178k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
178k
#endif
773
178k
  }
774
44.6k
}
j12init_forward_dct
Line
Count
Source
672
48.9k
{
673
48.9k
  my_fdct_ptr fdct;
674
48.9k
  int i;
675
676
48.9k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
677
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
678
679
48.9k
  fdct = (my_fdct_ptr)
680
48.9k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
681
48.9k
                                sizeof(my_fdct_controller));
682
48.9k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
683
48.9k
  fdct->pub.start_pass = start_pass_fdctmgr;
684
685
  /* First determine the DCT... */
686
48.9k
  switch (cinfo->dct_method) {
687
0
#ifdef DCT_ISLOW_SUPPORTED
688
41.9k
  case JDCT_ISLOW:
689
41.9k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
692
#endif
693
41.9k
      fdct->dct = _jpeg_fdct_islow;
694
41.9k
    break;
695
0
#endif
696
0
#ifdef DCT_IFAST_SUPPORTED
697
7.00k
  case JDCT_IFAST:
698
7.00k
    fdct->pub._forward_DCT = forward_DCT;
699
#ifdef WITH_SIMD
700
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
701
#endif
702
7.00k
      fdct->dct = _jpeg_fdct_ifast;
703
7.00k
    break;
704
0
#endif
705
0
#ifdef DCT_FLOAT_SUPPORTED
706
0
  case JDCT_FLOAT:
707
0
    fdct->pub._forward_DCT = forward_DCT_float;
708
#ifdef WITH_SIMD
709
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
710
#endif
711
0
      fdct->float_dct = jpeg_fdct_float;
712
0
    break;
713
0
#endif
714
0
  default:
715
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
716
0
    break;
717
48.9k
  }
718
719
  /* ...then the supporting stages. */
720
48.9k
  switch (cinfo->dct_method) {
721
0
#ifdef DCT_ISLOW_SUPPORTED
722
41.9k
  case JDCT_ISLOW:
723
41.9k
#endif
724
41.9k
#ifdef DCT_IFAST_SUPPORTED
725
48.9k
  case JDCT_IFAST:
726
48.9k
#endif
727
48.9k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
728
#ifdef WITH_SIMD
729
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
730
#endif
731
48.9k
      fdct->convsamp = convsamp;
732
#ifdef WITH_SIMD
733
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
734
#endif
735
48.9k
      fdct->quantize = quantize;
736
48.9k
    break;
737
0
#endif
738
0
#ifdef DCT_FLOAT_SUPPORTED
739
0
  case JDCT_FLOAT:
740
#ifdef WITH_SIMD
741
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
742
#endif
743
0
      fdct->float_convsamp = convsamp_float;
744
#ifdef WITH_SIMD
745
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
746
#endif
747
0
      fdct->float_quantize = quantize_float;
748
0
    break;
749
0
#endif
750
0
  default:
751
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
752
0
    break;
753
48.9k
  }
754
755
  /* Allocate workspace memory */
756
48.9k
#ifdef DCT_FLOAT_SUPPORTED
757
48.9k
  if (cinfo->dct_method == JDCT_FLOAT)
758
0
    fdct->float_workspace = (FAST_FLOAT *)
759
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
760
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
761
48.9k
  else
762
48.9k
#endif
763
48.9k
    fdct->workspace = (DCTELEM *)
764
48.9k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
765
48.9k
                                  sizeof(DCTELEM) * DCTSIZE2);
766
767
  /* Mark divisor tables unallocated */
768
244k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
769
195k
    fdct->divisors[i] = NULL;
770
195k
#ifdef DCT_FLOAT_SUPPORTED
771
    fdct->float_divisors[i] = NULL;
772
195k
#endif
773
195k
  }
774
48.9k
}
775
776
#endif /* defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) ||
777
          defined(DCT_FLOAT_SUPPORTED) */