Coverage Report

Created: 2026-02-26 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.main/src/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024, 2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#include "jsimddct.h"
24
25
26
/* Private subobject for this module */
27
28
typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
29
typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
30
31
typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
32
                                     JDIMENSION start_col,
33
                                     DCTELEM *workspace);
34
typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
35
                                           JDIMENSION start_col,
36
                                           FAST_FLOAT *workspace);
37
38
typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
39
                                     DCTELEM *workspace);
40
typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
41
                                           FAST_FLOAT *divisors,
42
                                           FAST_FLOAT *workspace);
43
44
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
45
46
typedef struct {
47
  struct jpeg_forward_dct pub;  /* public fields */
48
49
  /* Pointer to the DCT routine actually in use */
50
  forward_DCT_method_ptr dct;
51
  convsamp_method_ptr convsamp;
52
  quantize_method_ptr quantize;
53
54
  /* The actual post-DCT divisors --- not identical to the quant table
55
   * entries, because of scaling (especially for an unnormalized DCT).
56
   * Each table is given in normal array order.
57
   */
58
  DCTELEM *divisors[NUM_QUANT_TBLS];
59
60
  /* work area for FDCT subroutine */
61
  DCTELEM *workspace;
62
63
#ifdef DCT_FLOAT_SUPPORTED
64
  /* Same as above for the floating-point case. */
65
  float_DCT_method_ptr float_dct;
66
  float_convsamp_method_ptr float_convsamp;
67
  float_quantize_method_ptr float_quantize;
68
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
69
  FAST_FLOAT *float_workspace;
70
#endif
71
} my_fdct_controller;
72
73
typedef my_fdct_controller *my_fdct_ptr;
74
75
76
#if BITS_IN_JSAMPLE == 8
77
78
/*
79
 * Find the highest bit in an integer through binary search.
80
 */
81
82
LOCAL(int)
83
flss(UINT16 val)
84
7.33M
{
85
7.33M
  int bit;
86
87
7.33M
  bit = 16;
88
89
7.33M
  if (!val)
90
0
    return 0;
91
92
7.33M
  if (!(val & 0xff00)) {
93
3.73M
    bit -= 8;
94
3.73M
    val <<= 8;
95
3.73M
  }
96
7.33M
  if (!(val & 0xf000)) {
97
4.95M
    bit -= 4;
98
4.95M
    val <<= 4;
99
4.95M
  }
100
7.33M
  if (!(val & 0xc000)) {
101
2.66M
    bit -= 2;
102
2.66M
    val <<= 2;
103
2.66M
  }
104
7.33M
  if (!(val & 0x8000)) {
105
3.17M
    bit -= 1;
106
3.17M
    val <<= 1;
107
3.17M
  }
108
109
7.33M
  return bit;
110
7.33M
}
111
112
113
/*
114
 * Compute values to do a division using reciprocal.
115
 *
116
 * This implementation is based on an algorithm described in
117
 *   "Optimizing subroutines in assembly language:
118
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
119
 * More information about the basic algorithm can be found in
120
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
121
 *
122
 * The basic idea is to replace x/d by x * d^-1. In order to store
123
 * d^-1 with enough precision we shift it left a few places. It turns
124
 * out that this algoright gives just enough precision, and also fits
125
 * into DCTELEM:
126
 *
127
 *   b = (the number of significant bits in divisor) - 1
128
 *   r = (word size) + b
129
 *   f = 2^r / divisor
130
 *
131
 * f will not be an integer for most cases, so we need to compensate
132
 * for the rounding error introduced:
133
 *
134
 *   no fractional part:
135
 *
136
 *       result = input >> r
137
 *
138
 *   fractional part of f < 0.5:
139
 *
140
 *       round f down to nearest integer
141
 *       result = ((input + 1) * f) >> r
142
 *
143
 *   fractional part of f > 0.5:
144
 *
145
 *       round f up to nearest integer
146
 *       result = (input * f) >> r
147
 *
148
 * This is the original algorithm that gives truncated results. But we
149
 * want properly rounded results, so we replace "input" with
150
 * "input + divisor/2".
151
 *
152
 * In order to allow SIMD implementations we also tweak the values to
153
 * allow the same calculation to be made at all times:
154
 *
155
 *   dctbl[0] = f rounded to nearest integer
156
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
157
 *   dctbl[2] = 1 << ((word size) * 2 - r)
158
 *   dctbl[3] = r - (word size)
159
 *
160
 * dctbl[2] is for stupid instruction sets where the shift operation
161
 * isn't member wise (e.g. MMX).
162
 *
163
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
164
 * is that most SIMD implementations have a "multiply and store top
165
 * half" operation.
166
 *
167
 * Lastly, we store each of the values in their own table instead
168
 * of in a consecutive manner, yet again in order to allow SIMD
169
 * routines.
170
 */
171
172
LOCAL(int)
173
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
174
7.36M
{
175
7.36M
  UDCTELEM2 fq, fr;
176
7.36M
  UDCTELEM c;
177
7.36M
  int b, r;
178
179
7.36M
  if (divisor <= 1) {
180
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
181
     * values will cause the C quantization algorithm to act like the
182
     * identity function.  Since only the C quantization algorithm is used in
183
     * these cases, the scale value is irrelevant.
184
     *
185
     * divisor == 0 can never happen in a normal program, because
186
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
187
     * abuse the API by manually modifying the exposed quantization table just
188
     * before calling jpeg_start_compress().  Thus, we effectively clamp
189
     * values < 1 here as well, to avoid dividing by 0.
190
     */
191
30.2k
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
192
30.2k
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
193
30.2k
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
194
30.2k
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
195
30.2k
    return 0;
196
30.2k
  }
197
198
7.33M
  b = flss(divisor) - 1;
199
7.33M
  r  = sizeof(DCTELEM) * 8 + b;
200
201
7.33M
  fq = ((UDCTELEM2)1 << r) / divisor;
202
7.33M
  fr = ((UDCTELEM2)1 << r) % divisor;
203
204
7.33M
  c = divisor / 2;                      /* for rounding */
205
206
7.33M
  if (fr == 0) {                        /* divisor is power of two */
207
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
208
1.13M
    fq >>= 1;
209
1.13M
    r--;
210
6.19M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
211
1.71M
    c++;
212
4.47M
  } else {                              /* fractional part is > 0.5 */
213
4.47M
    fq++;
214
4.47M
  }
215
216
7.33M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
217
7.33M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
218
7.33M
#ifdef WITH_SIMD
219
7.33M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
220
#else
221
  dtbl[DCTSIZE2 * 2] = 1;
222
#endif
223
7.33M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
224
225
7.33M
  if (r <= 16) return 0;
226
7.26M
  else return 1;
227
7.33M
}
228
229
#endif
230
231
232
/*
233
 * Initialize for a processing pass.
234
 * Verify that all referenced Q-tables are present, and set up
235
 * the divisor table for each one.
236
 * In the current implementation, DCT of all components is done during
237
 * the first pass, even if only some components will be output in the
238
 * first scan.  Hence all components should be examined here.
239
 */
240
241
METHODDEF(void)
242
start_pass_fdctmgr(j_compress_ptr cinfo)
243
52.5k
{
244
52.5k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
245
52.5k
  int ci, qtblno, i;
246
52.5k
  jpeg_component_info *compptr;
247
52.5k
  JQUANT_TBL *qtbl;
248
52.5k
  DCTELEM *dtbl;
249
250
184k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
251
131k
       ci++, compptr++) {
252
131k
    qtblno = compptr->quant_tbl_no;
253
    /* Make sure specified quantization table is present */
254
131k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
255
131k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
256
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
257
131k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
258
    /* Compute divisors for this quant table */
259
    /* We may do this more than once for same table, but it's not a big deal */
260
131k
    switch (cinfo->dct_method) {
261
0
#ifdef DCT_ISLOW_SUPPORTED
262
101k
    case JDCT_ISLOW:
263
      /* For LL&M IDCT method, divisors are equal to raw quantization
264
       * coefficients multiplied by 8 (to counteract scaling).
265
       */
266
101k
      if (fdct->divisors[qtblno] == NULL) {
267
62.8k
        fdct->divisors[qtblno] = (DCTELEM *)
268
62.8k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
269
62.8k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
270
62.8k
      }
271
101k
      dtbl = fdct->divisors[qtblno];
272
6.62M
      for (i = 0; i < DCTSIZE2; i++) {
273
#if BITS_IN_JSAMPLE == 8
274
#ifdef WITH_SIMD
275
2.89M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
276
0
            fdct->quantize == jsimd_quantize)
277
0
          fdct->quantize = quantize;
278
#else
279
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
280
#endif
281
#else
282
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
283
#endif
284
6.52M
      }
285
101k
      break;
286
0
#endif
287
0
#ifdef DCT_IFAST_SUPPORTED
288
23.7k
    case JDCT_IFAST:
289
23.7k
      {
290
        /* For AA&N IDCT method, divisors are equal to quantization
291
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
292
         *   scalefactor[0] = 1
293
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294
         * We apply a further scale factor of 8.
295
         */
296
23.7k
#define CONST_BITS  14
297
23.7k
        static const INT16 aanscales[DCTSIZE2] = {
298
          /* precomputed values scaled up by 14 bits */
299
23.7k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
300
23.7k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
301
23.7k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
302
23.7k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
303
23.7k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
304
23.7k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
305
23.7k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
306
23.7k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
307
23.7k
        };
308
23.7k
        SHIFT_TEMPS
309
310
23.7k
        if (fdct->divisors[qtblno] == NULL) {
311
16.4k
          fdct->divisors[qtblno] = (DCTELEM *)
312
16.4k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
313
16.4k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
314
16.4k
        }
315
23.7k
        dtbl = fdct->divisors[qtblno];
316
1.54M
        for (i = 0; i < DCTSIZE2; i++) {
317
#if BITS_IN_JSAMPLE == 8
318
#ifdef WITH_SIMD
319
789k
          if (!compute_reciprocal(
320
789k
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
321
789k
                                      (JLONG)aanscales[i]),
322
789k
                        CONST_BITS - 3), &dtbl[i]) &&
323
51.1k
              fdct->quantize == jsimd_quantize)
324
1.70k
            fdct->quantize = quantize;
325
#else
326
          compute_reciprocal(
327
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
328
                                  (JLONG)aanscales[i]),
329
                    CONST_BITS-3), &dtbl[i]);
330
#endif
331
#else
332
          dtbl[i] = (DCTELEM)
333
732k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
334
                                  (JLONG)aanscales[i]),
335
                    CONST_BITS - 3);
336
#endif
337
1.52M
        }
338
23.7k
      }
339
23.7k
      break;
340
0
#endif
341
0
#ifdef DCT_FLOAT_SUPPORTED
342
6.24k
    case JDCT_FLOAT:
343
6.24k
      {
344
        /* For float AA&N IDCT method, divisors are equal to quantization
345
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
346
         *   scalefactor[0] = 1
347
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
348
         * We apply a further scale factor of 8.
349
         * What's actually stored is 1/divisor so that the inner loop can
350
         * use a multiplication rather than a division.
351
         */
352
6.24k
        FAST_FLOAT *fdtbl;
353
6.24k
        int row, col;
354
6.24k
        static const double aanscalefactor[DCTSIZE] = {
355
6.24k
          1.0, 1.387039845, 1.306562965, 1.175875602,
356
6.24k
          1.0, 0.785694958, 0.541196100, 0.275899379
357
6.24k
        };
358
359
6.24k
        if (fdct->float_divisors[qtblno] == NULL) {
360
4.47k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
361
4.47k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
362
4.47k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
363
4.47k
        }
364
6.24k
        fdtbl = fdct->float_divisors[qtblno];
365
6.24k
        i = 0;
366
56.2k
        for (row = 0; row < DCTSIZE; row++) {
367
449k
          for (col = 0; col < DCTSIZE; col++) {
368
399k
            fdtbl[i] = (FAST_FLOAT)
369
399k
              (1.0 / (((double)qtbl->quantval[i] *
370
399k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
371
399k
            i++;
372
399k
          }
373
49.9k
        }
374
6.24k
      }
375
6.24k
      break;
376
0
#endif
377
0
    default:
378
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
379
0
      break;
380
131k
    }
381
131k
  }
382
52.5k
}
jcdctmgr-8.c:start_pass_fdctmgr
Line
Count
Source
243
26.0k
{
244
26.0k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
245
26.0k
  int ci, qtblno, i;
246
26.0k
  jpeg_component_info *compptr;
247
26.0k
  JQUANT_TBL *qtbl;
248
26.0k
  DCTELEM *dtbl;
249
250
89.8k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
251
63.8k
       ci++, compptr++) {
252
63.8k
    qtblno = compptr->quant_tbl_no;
253
    /* Make sure specified quantization table is present */
254
63.8k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
255
63.8k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
256
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
257
63.8k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
258
    /* Compute divisors for this quant table */
259
    /* We may do this more than once for same table, but it's not a big deal */
260
63.8k
    switch (cinfo->dct_method) {
261
0
#ifdef DCT_ISLOW_SUPPORTED
262
45.2k
    case JDCT_ISLOW:
263
      /* For LL&M IDCT method, divisors are equal to raw quantization
264
       * coefficients multiplied by 8 (to counteract scaling).
265
       */
266
45.2k
      if (fdct->divisors[qtblno] == NULL) {
267
28.8k
        fdct->divisors[qtblno] = (DCTELEM *)
268
28.8k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
269
28.8k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
270
28.8k
      }
271
45.2k
      dtbl = fdct->divisors[qtblno];
272
2.94M
      for (i = 0; i < DCTSIZE2; i++) {
273
2.89M
#if BITS_IN_JSAMPLE == 8
274
2.89M
#ifdef WITH_SIMD
275
2.89M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
276
0
            fdct->quantize == jsimd_quantize)
277
0
          fdct->quantize = quantize;
278
#else
279
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
280
#endif
281
#else
282
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
283
#endif
284
2.89M
      }
285
45.2k
      break;
286
0
#endif
287
0
#ifdef DCT_IFAST_SUPPORTED
288
12.3k
    case JDCT_IFAST:
289
12.3k
      {
290
        /* For AA&N IDCT method, divisors are equal to quantization
291
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
292
         *   scalefactor[0] = 1
293
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294
         * We apply a further scale factor of 8.
295
         */
296
12.3k
#define CONST_BITS  14
297
12.3k
        static const INT16 aanscales[DCTSIZE2] = {
298
          /* precomputed values scaled up by 14 bits */
299
12.3k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
300
12.3k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
301
12.3k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
302
12.3k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
303
12.3k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
304
12.3k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
305
12.3k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
306
12.3k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
307
12.3k
        };
308
12.3k
        SHIFT_TEMPS
309
310
12.3k
        if (fdct->divisors[qtblno] == NULL) {
311
8.78k
          fdct->divisors[qtblno] = (DCTELEM *)
312
8.78k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
313
8.78k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
314
8.78k
        }
315
12.3k
        dtbl = fdct->divisors[qtblno];
316
801k
        for (i = 0; i < DCTSIZE2; i++) {
317
789k
#if BITS_IN_JSAMPLE == 8
318
789k
#ifdef WITH_SIMD
319
789k
          if (!compute_reciprocal(
320
789k
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
321
789k
                                      (JLONG)aanscales[i]),
322
789k
                        CONST_BITS - 3), &dtbl[i]) &&
323
51.1k
              fdct->quantize == jsimd_quantize)
324
1.70k
            fdct->quantize = quantize;
325
#else
326
          compute_reciprocal(
327
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
328
                                  (JLONG)aanscales[i]),
329
                    CONST_BITS-3), &dtbl[i]);
330
#endif
331
#else
332
          dtbl[i] = (DCTELEM)
333
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
334
                                  (JLONG)aanscales[i]),
335
                    CONST_BITS - 3);
336
#endif
337
789k
        }
338
12.3k
      }
339
12.3k
      break;
340
0
#endif
341
0
#ifdef DCT_FLOAT_SUPPORTED
342
6.24k
    case JDCT_FLOAT:
343
6.24k
      {
344
        /* For float AA&N IDCT method, divisors are equal to quantization
345
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
346
         *   scalefactor[0] = 1
347
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
348
         * We apply a further scale factor of 8.
349
         * What's actually stored is 1/divisor so that the inner loop can
350
         * use a multiplication rather than a division.
351
         */
352
6.24k
        FAST_FLOAT *fdtbl;
353
6.24k
        int row, col;
354
6.24k
        static const double aanscalefactor[DCTSIZE] = {
355
6.24k
          1.0, 1.387039845, 1.306562965, 1.175875602,
356
6.24k
          1.0, 0.785694958, 0.541196100, 0.275899379
357
6.24k
        };
358
359
6.24k
        if (fdct->float_divisors[qtblno] == NULL) {
360
4.47k
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
361
4.47k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
362
4.47k
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
363
4.47k
        }
364
6.24k
        fdtbl = fdct->float_divisors[qtblno];
365
6.24k
        i = 0;
366
56.2k
        for (row = 0; row < DCTSIZE; row++) {
367
449k
          for (col = 0; col < DCTSIZE; col++) {
368
399k
            fdtbl[i] = (FAST_FLOAT)
369
399k
              (1.0 / (((double)qtbl->quantval[i] *
370
399k
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
371
399k
            i++;
372
399k
          }
373
49.9k
        }
374
6.24k
      }
375
6.24k
      break;
376
0
#endif
377
0
    default:
378
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
379
0
      break;
380
63.8k
    }
381
63.8k
  }
382
26.0k
}
jcdctmgr-12.c:start_pass_fdctmgr
Line
Count
Source
243
26.4k
{
244
26.4k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
245
26.4k
  int ci, qtblno, i;
246
26.4k
  jpeg_component_info *compptr;
247
26.4k
  JQUANT_TBL *qtbl;
248
26.4k
  DCTELEM *dtbl;
249
250
94.6k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
251
68.1k
       ci++, compptr++) {
252
68.1k
    qtblno = compptr->quant_tbl_no;
253
    /* Make sure specified quantization table is present */
254
68.1k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
255
68.1k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
256
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
257
68.1k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
258
    /* Compute divisors for this quant table */
259
    /* We may do this more than once for same table, but it's not a big deal */
260
68.1k
    switch (cinfo->dct_method) {
261
0
#ifdef DCT_ISLOW_SUPPORTED
262
56.6k
    case JDCT_ISLOW:
263
      /* For LL&M IDCT method, divisors are equal to raw quantization
264
       * coefficients multiplied by 8 (to counteract scaling).
265
       */
266
56.6k
      if (fdct->divisors[qtblno] == NULL) {
267
34.0k
        fdct->divisors[qtblno] = (DCTELEM *)
268
34.0k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
269
34.0k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
270
34.0k
      }
271
56.6k
      dtbl = fdct->divisors[qtblno];
272
3.68M
      for (i = 0; i < DCTSIZE2; i++) {
273
#if BITS_IN_JSAMPLE == 8
274
#ifdef WITH_SIMD
275
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
276
            fdct->quantize == jsimd_quantize)
277
          fdct->quantize = quantize;
278
#else
279
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
280
#endif
281
#else
282
3.62M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
283
3.62M
#endif
284
3.62M
      }
285
56.6k
      break;
286
0
#endif
287
0
#ifdef DCT_IFAST_SUPPORTED
288
11.4k
    case JDCT_IFAST:
289
11.4k
      {
290
        /* For AA&N IDCT method, divisors are equal to quantization
291
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
292
         *   scalefactor[0] = 1
293
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294
         * We apply a further scale factor of 8.
295
         */
296
11.4k
#define CONST_BITS  14
297
11.4k
        static const INT16 aanscales[DCTSIZE2] = {
298
          /* precomputed values scaled up by 14 bits */
299
11.4k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
300
11.4k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
301
11.4k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
302
11.4k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
303
11.4k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
304
11.4k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
305
11.4k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
306
11.4k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
307
11.4k
        };
308
11.4k
        SHIFT_TEMPS
309
310
11.4k
        if (fdct->divisors[qtblno] == NULL) {
311
7.63k
          fdct->divisors[qtblno] = (DCTELEM *)
312
7.63k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
313
7.63k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
314
7.63k
        }
315
11.4k
        dtbl = fdct->divisors[qtblno];
316
744k
        for (i = 0; i < DCTSIZE2; i++) {
317
#if BITS_IN_JSAMPLE == 8
318
#ifdef WITH_SIMD
319
          if (!compute_reciprocal(
320
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
321
                                      (JLONG)aanscales[i]),
322
                        CONST_BITS - 3), &dtbl[i]) &&
323
              fdct->quantize == jsimd_quantize)
324
            fdct->quantize = quantize;
325
#else
326
          compute_reciprocal(
327
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
328
                                  (JLONG)aanscales[i]),
329
                    CONST_BITS-3), &dtbl[i]);
330
#endif
331
#else
332
732k
          dtbl[i] = (DCTELEM)
333
732k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
334
732k
                                  (JLONG)aanscales[i]),
335
732k
                    CONST_BITS - 3);
336
732k
#endif
337
732k
        }
338
11.4k
      }
339
11.4k
      break;
340
0
#endif
341
0
#ifdef DCT_FLOAT_SUPPORTED
342
0
    case JDCT_FLOAT:
343
0
      {
344
        /* For float AA&N IDCT method, divisors are equal to quantization
345
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
346
         *   scalefactor[0] = 1
347
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
348
         * We apply a further scale factor of 8.
349
         * What's actually stored is 1/divisor so that the inner loop can
350
         * use a multiplication rather than a division.
351
         */
352
0
        FAST_FLOAT *fdtbl;
353
0
        int row, col;
354
0
        static const double aanscalefactor[DCTSIZE] = {
355
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
356
0
          1.0, 0.785694958, 0.541196100, 0.275899379
357
0
        };
358
359
0
        if (fdct->float_divisors[qtblno] == NULL) {
360
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
361
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
362
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
363
0
        }
364
0
        fdtbl = fdct->float_divisors[qtblno];
365
0
        i = 0;
366
0
        for (row = 0; row < DCTSIZE; row++) {
367
0
          for (col = 0; col < DCTSIZE; col++) {
368
0
            fdtbl[i] = (FAST_FLOAT)
369
0
              (1.0 / (((double)qtbl->quantval[i] *
370
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
371
0
            i++;
372
0
          }
373
0
        }
374
0
      }
375
0
      break;
376
0
#endif
377
0
    default:
378
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
379
0
      break;
380
68.1k
    }
381
68.1k
  }
382
26.4k
}
383
384
385
/*
386
 * Load data into workspace, applying unsigned->signed conversion.
387
 */
388
389
METHODDEF(void)
390
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
391
47.7M
{
392
47.7M
  register DCTELEM *workspaceptr;
393
47.7M
  register _JSAMPROW elemptr;
394
47.7M
  register int elemr;
395
396
47.7M
  workspaceptr = workspace;
397
429M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
398
382M
    elemptr = sample_data[elemr] + start_col;
399
400
382M
#if DCTSIZE == 8                /* unroll the inner loop */
401
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
402
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
403
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
404
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
405
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
407
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
408
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
409
#else
410
    {
411
      register int elemc;
412
      for (elemc = DCTSIZE; elemc > 0; elemc--)
413
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
414
    }
415
#endif
416
382M
  }
417
47.7M
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp
jcdctmgr-12.c:convsamp
Line
Count
Source
391
47.7M
{
392
47.7M
  register DCTELEM *workspaceptr;
393
47.7M
  register _JSAMPROW elemptr;
394
47.7M
  register int elemr;
395
396
47.7M
  workspaceptr = workspace;
397
429M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
398
382M
    elemptr = sample_data[elemr] + start_col;
399
400
382M
#if DCTSIZE == 8                /* unroll the inner loop */
401
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
402
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
403
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
404
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
405
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
407
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
408
382M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
409
#else
410
    {
411
      register int elemc;
412
      for (elemc = DCTSIZE; elemc > 0; elemc--)
413
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
414
    }
415
#endif
416
382M
  }
417
47.7M
}
418
419
420
/*
421
 * Quantize/descale the coefficients, and store into coef_blocks[].
422
 */
423
424
METHODDEF(void)
425
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
426
56.1M
{
427
56.1M
  int i;
428
56.1M
  DCTELEM temp;
429
56.1M
  JCOEFPTR output_ptr = coef_block;
430
431
#if BITS_IN_JSAMPLE == 8
432
433
  UDCTELEM recip, corr;
434
  int shift;
435
  UDCTELEM2 product;
436
437
543M
  for (i = 0; i < DCTSIZE2; i++) {
438
535M
    temp = workspace[i];
439
535M
    recip = divisors[i + DCTSIZE2 * 0];
440
535M
    corr =  divisors[i + DCTSIZE2 * 1];
441
535M
    shift = divisors[i + DCTSIZE2 * 3];
442
443
535M
    if (temp < 0) {
444
26.4M
      temp = -temp;
445
26.4M
      product = (UDCTELEM2)(temp + corr) * recip;
446
26.4M
      product >>= shift + sizeof(DCTELEM) * 8;
447
26.4M
      temp = (DCTELEM)product;
448
26.4M
      temp = -temp;
449
508M
    } else {
450
508M
      product = (UDCTELEM2)(temp + corr) * recip;
451
508M
      product >>= shift + sizeof(DCTELEM) * 8;
452
508M
      temp = (DCTELEM)product;
453
508M
    }
454
535M
    output_ptr[i] = (JCOEF)temp;
455
535M
  }
456
457
#else
458
459
  register DCTELEM qval;
460
461
3.10G
  for (i = 0; i < DCTSIZE2; i++) {
462
3.05G
    qval = divisors[i];
463
3.05G
    temp = workspace[i];
464
    /* Divide the coefficient value by qval, ensuring proper rounding.
465
     * Since C does not specify the direction of rounding for negative
466
     * quotients, we have to force the dividend positive for portability.
467
     *
468
     * In most files, at least half of the output values will be zero
469
     * (at default quantization settings, more like three-quarters...)
470
     * so we should ensure that this case is fast.  On many machines,
471
     * a comparison is enough cheaper than a divide to make a special test
472
     * a win.  Since both inputs will be nonnegative, we need only test
473
     * for a < b to discover whether a/b is 0.
474
     * If your machine's division is fast enough, define FAST_DIVIDE.
475
     */
476
#ifdef FAST_DIVIDE
477
#define DIVIDE_BY(a, b)  a /= b
478
#else
479
3.05G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
480
3.05G
#endif
481
3.05G
    if (temp < 0) {
482
383M
      temp = -temp;
483
383M
      temp += qval >> 1;        /* for rounding */
484
383M
      DIVIDE_BY(temp, qval);
485
383M
      temp = -temp;
486
2.67G
    } else {
487
2.67G
      temp += qval >> 1;        /* for rounding */
488
2.67G
      DIVIDE_BY(temp, qval);
489
2.67G
    }
490
3.05G
    output_ptr[i] = (JCOEF)temp;
491
3.05G
  }
492
493
#endif
494
495
56.1M
}
jcdctmgr-8.c:quantize
Line
Count
Source
426
8.36M
{
427
8.36M
  int i;
428
8.36M
  DCTELEM temp;
429
8.36M
  JCOEFPTR output_ptr = coef_block;
430
431
8.36M
#if BITS_IN_JSAMPLE == 8
432
433
8.36M
  UDCTELEM recip, corr;
434
8.36M
  int shift;
435
8.36M
  UDCTELEM2 product;
436
437
543M
  for (i = 0; i < DCTSIZE2; i++) {
438
535M
    temp = workspace[i];
439
535M
    recip = divisors[i + DCTSIZE2 * 0];
440
535M
    corr =  divisors[i + DCTSIZE2 * 1];
441
535M
    shift = divisors[i + DCTSIZE2 * 3];
442
443
535M
    if (temp < 0) {
444
26.4M
      temp = -temp;
445
26.4M
      product = (UDCTELEM2)(temp + corr) * recip;
446
26.4M
      product >>= shift + sizeof(DCTELEM) * 8;
447
26.4M
      temp = (DCTELEM)product;
448
26.4M
      temp = -temp;
449
508M
    } else {
450
508M
      product = (UDCTELEM2)(temp + corr) * recip;
451
508M
      product >>= shift + sizeof(DCTELEM) * 8;
452
508M
      temp = (DCTELEM)product;
453
508M
    }
454
535M
    output_ptr[i] = (JCOEF)temp;
455
535M
  }
456
457
#else
458
459
  register DCTELEM qval;
460
461
  for (i = 0; i < DCTSIZE2; i++) {
462
    qval = divisors[i];
463
    temp = workspace[i];
464
    /* Divide the coefficient value by qval, ensuring proper rounding.
465
     * Since C does not specify the direction of rounding for negative
466
     * quotients, we have to force the dividend positive for portability.
467
     *
468
     * In most files, at least half of the output values will be zero
469
     * (at default quantization settings, more like three-quarters...)
470
     * so we should ensure that this case is fast.  On many machines,
471
     * a comparison is enough cheaper than a divide to make a special test
472
     * a win.  Since both inputs will be nonnegative, we need only test
473
     * for a < b to discover whether a/b is 0.
474
     * If your machine's division is fast enough, define FAST_DIVIDE.
475
     */
476
#ifdef FAST_DIVIDE
477
#define DIVIDE_BY(a, b)  a /= b
478
#else
479
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
480
#endif
481
    if (temp < 0) {
482
      temp = -temp;
483
      temp += qval >> 1;        /* for rounding */
484
      DIVIDE_BY(temp, qval);
485
      temp = -temp;
486
    } else {
487
      temp += qval >> 1;        /* for rounding */
488
      DIVIDE_BY(temp, qval);
489
    }
490
    output_ptr[i] = (JCOEF)temp;
491
  }
492
493
#endif
494
495
8.36M
}
jcdctmgr-12.c:quantize
Line
Count
Source
426
47.7M
{
427
47.7M
  int i;
428
47.7M
  DCTELEM temp;
429
47.7M
  JCOEFPTR output_ptr = coef_block;
430
431
#if BITS_IN_JSAMPLE == 8
432
433
  UDCTELEM recip, corr;
434
  int shift;
435
  UDCTELEM2 product;
436
437
  for (i = 0; i < DCTSIZE2; i++) {
438
    temp = workspace[i];
439
    recip = divisors[i + DCTSIZE2 * 0];
440
    corr =  divisors[i + DCTSIZE2 * 1];
441
    shift = divisors[i + DCTSIZE2 * 3];
442
443
    if (temp < 0) {
444
      temp = -temp;
445
      product = (UDCTELEM2)(temp + corr) * recip;
446
      product >>= shift + sizeof(DCTELEM) * 8;
447
      temp = (DCTELEM)product;
448
      temp = -temp;
449
    } else {
450
      product = (UDCTELEM2)(temp + corr) * recip;
451
      product >>= shift + sizeof(DCTELEM) * 8;
452
      temp = (DCTELEM)product;
453
    }
454
    output_ptr[i] = (JCOEF)temp;
455
  }
456
457
#else
458
459
47.7M
  register DCTELEM qval;
460
461
3.10G
  for (i = 0; i < DCTSIZE2; i++) {
462
3.05G
    qval = divisors[i];
463
3.05G
    temp = workspace[i];
464
    /* Divide the coefficient value by qval, ensuring proper rounding.
465
     * Since C does not specify the direction of rounding for negative
466
     * quotients, we have to force the dividend positive for portability.
467
     *
468
     * In most files, at least half of the output values will be zero
469
     * (at default quantization settings, more like three-quarters...)
470
     * so we should ensure that this case is fast.  On many machines,
471
     * a comparison is enough cheaper than a divide to make a special test
472
     * a win.  Since both inputs will be nonnegative, we need only test
473
     * for a < b to discover whether a/b is 0.
474
     * If your machine's division is fast enough, define FAST_DIVIDE.
475
     */
476
#ifdef FAST_DIVIDE
477
#define DIVIDE_BY(a, b)  a /= b
478
#else
479
3.05G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
480
3.05G
#endif
481
3.05G
    if (temp < 0) {
482
383M
      temp = -temp;
483
383M
      temp += qval >> 1;        /* for rounding */
484
383M
      DIVIDE_BY(temp, qval);
485
383M
      temp = -temp;
486
2.67G
    } else {
487
2.67G
      temp += qval >> 1;        /* for rounding */
488
2.67G
      DIVIDE_BY(temp, qval);
489
2.67G
    }
490
3.05G
    output_ptr[i] = (JCOEF)temp;
491
3.05G
  }
492
493
47.7M
#endif
494
495
47.7M
}
496
497
498
/*
499
 * Perform forward DCT on one or more blocks of a component.
500
 *
501
 * The input samples are taken from the sample_data[] array starting at
502
 * position start_row/start_col, and moving to the right for any additional
503
 * blocks. The quantized coefficients are returned in coef_blocks[].
504
 */
505
506
METHODDEF(void)
507
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
508
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
509
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
510
/* This version is used for integer DCT implementations. */
511
85.4M
{
512
  /* This routine is heavily used, so it's worth coding it tightly. */
513
85.4M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
514
85.4M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
515
85.4M
  DCTELEM *workspace;
516
85.4M
  JDIMENSION bi;
517
518
  /* Make sure the compiler doesn't look up these every pass */
519
85.4M
  forward_DCT_method_ptr do_dct = fdct->dct;
520
85.4M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
521
85.4M
  quantize_method_ptr do_quantize = fdct->quantize;
522
85.4M
  workspace = fdct->workspace;
523
524
85.4M
  sample_data += start_row;     /* fold in the vertical offset once */
525
526
206M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
527
    /* Load data into workspace, applying unsigned->signed conversion */
528
120M
    (*do_convsamp) (sample_data, start_col, workspace);
529
530
    /* Perform the DCT */
531
120M
    (*do_dct) (workspace);
532
533
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
534
120M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
120M
  }
536
85.4M
}
jcdctmgr-8.c:forward_DCT
Line
Count
Source
511
56.7M
{
512
  /* This routine is heavily used, so it's worth coding it tightly. */
513
56.7M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
514
56.7M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
515
56.7M
  DCTELEM *workspace;
516
56.7M
  JDIMENSION bi;
517
518
  /* Make sure the compiler doesn't look up these every pass */
519
56.7M
  forward_DCT_method_ptr do_dct = fdct->dct;
520
56.7M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
521
56.7M
  quantize_method_ptr do_quantize = fdct->quantize;
522
56.7M
  workspace = fdct->workspace;
523
524
56.7M
  sample_data += start_row;     /* fold in the vertical offset once */
525
526
129M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
527
    /* Load data into workspace, applying unsigned->signed conversion */
528
73.1M
    (*do_convsamp) (sample_data, start_col, workspace);
529
530
    /* Perform the DCT */
531
73.1M
    (*do_dct) (workspace);
532
533
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
534
73.1M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
73.1M
  }
536
56.7M
}
jcdctmgr-12.c:forward_DCT
Line
Count
Source
511
28.7M
{
512
  /* This routine is heavily used, so it's worth coding it tightly. */
513
28.7M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
514
28.7M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
515
28.7M
  DCTELEM *workspace;
516
28.7M
  JDIMENSION bi;
517
518
  /* Make sure the compiler doesn't look up these every pass */
519
28.7M
  forward_DCT_method_ptr do_dct = fdct->dct;
520
28.7M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
521
28.7M
  quantize_method_ptr do_quantize = fdct->quantize;
522
28.7M
  workspace = fdct->workspace;
523
524
28.7M
  sample_data += start_row;     /* fold in the vertical offset once */
525
526
76.4M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
527
    /* Load data into workspace, applying unsigned->signed conversion */
528
47.7M
    (*do_convsamp) (sample_data, start_col, workspace);
529
530
    /* Perform the DCT */
531
47.7M
    (*do_dct) (workspace);
532
533
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
534
47.7M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
47.7M
  }
536
28.7M
}
537
538
539
#ifdef DCT_FLOAT_SUPPORTED
540
541
METHODDEF(void)
542
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
543
               FAST_FLOAT *workspace)
544
0
{
545
0
  register FAST_FLOAT *workspaceptr;
546
0
  register _JSAMPROW elemptr;
547
0
  register int elemr;
548
549
0
  workspaceptr = workspace;
550
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
551
0
    elemptr = sample_data[elemr] + start_col;
552
0
#if DCTSIZE == 8                /* unroll the inner loop */
553
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
554
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
555
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
556
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
557
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
558
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
559
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
560
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
561
#else
562
    {
563
      register int elemc;
564
      for (elemc = DCTSIZE; elemc > 0; elemc--)
565
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
566
    }
567
#endif
568
0
  }
569
0
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp_float
Unexecuted instantiation: jcdctmgr-12.c:convsamp_float
570
571
572
METHODDEF(void)
573
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
574
               FAST_FLOAT *workspace)
575
0
{
576
0
  register FAST_FLOAT temp;
577
0
  register int i;
578
0
  register JCOEFPTR output_ptr = coef_block;
579
580
0
  for (i = 0; i < DCTSIZE2; i++) {
581
    /* Apply the quantization and scaling factor */
582
0
    temp = workspace[i] * divisors[i];
583
584
    /* Round to nearest integer.
585
     * Since C does not specify the direction of rounding for negative
586
     * quotients, we have to force the dividend positive for portability.
587
     * The maximum coefficient size is +-16K (for 12-bit data), so this
588
     * code should work for either 16-bit or 32-bit ints.
589
     */
590
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
591
0
  }
592
0
}
Unexecuted instantiation: jcdctmgr-8.c:quantize_float
Unexecuted instantiation: jcdctmgr-12.c:quantize_float
593
594
595
METHODDEF(void)
596
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
597
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
598
                  JDIMENSION start_row, JDIMENSION start_col,
599
                  JDIMENSION num_blocks)
600
/* This version is used for floating-point DCT implementations. */
601
4.77M
{
602
  /* This routine is heavily used, so it's worth coding it tightly. */
603
4.77M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
604
4.77M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
605
4.77M
  FAST_FLOAT *workspace;
606
4.77M
  JDIMENSION bi;
607
608
609
  /* Make sure the compiler doesn't look up these every pass */
610
4.77M
  float_DCT_method_ptr do_dct = fdct->float_dct;
611
4.77M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
612
4.77M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
613
4.77M
  workspace = fdct->float_workspace;
614
615
4.77M
  sample_data += start_row;     /* fold in the vertical offset once */
616
617
11.0M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
618
    /* Load data into workspace, applying unsigned->signed conversion */
619
6.28M
    (*do_convsamp) (sample_data, start_col, workspace);
620
621
    /* Perform the DCT */
622
6.28M
    (*do_dct) (workspace);
623
624
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
625
6.28M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
626
6.28M
  }
627
4.77M
}
jcdctmgr-8.c:forward_DCT_float
Line
Count
Source
601
4.77M
{
602
  /* This routine is heavily used, so it's worth coding it tightly. */
603
4.77M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
604
4.77M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
605
4.77M
  FAST_FLOAT *workspace;
606
4.77M
  JDIMENSION bi;
607
608
609
  /* Make sure the compiler doesn't look up these every pass */
610
4.77M
  float_DCT_method_ptr do_dct = fdct->float_dct;
611
4.77M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
612
4.77M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
613
4.77M
  workspace = fdct->float_workspace;
614
615
4.77M
  sample_data += start_row;     /* fold in the vertical offset once */
616
617
11.0M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
618
    /* Load data into workspace, applying unsigned->signed conversion */
619
6.28M
    (*do_convsamp) (sample_data, start_col, workspace);
620
621
    /* Perform the DCT */
622
6.28M
    (*do_dct) (workspace);
623
624
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
625
6.28M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
626
6.28M
  }
627
4.77M
}
Unexecuted instantiation: jcdctmgr-12.c:forward_DCT_float
628
629
#endif /* DCT_FLOAT_SUPPORTED */
630
631
632
/*
633
 * Initialize FDCT manager.
634
 */
635
636
GLOBAL(void)
637
_jinit_forward_dct(j_compress_ptr cinfo)
638
95.3k
{
639
95.3k
  my_fdct_ptr fdct;
640
95.3k
  int i;
641
642
95.3k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
95.3k
  fdct = (my_fdct_ptr)
646
95.3k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
95.3k
                                sizeof(my_fdct_controller));
648
95.3k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
95.3k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
95.3k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
73.7k
  case JDCT_ISLOW:
655
73.7k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
39.2k
    if (jsimd_can_fdct_islow())
658
39.2k
      fdct->dct = jsimd_fdct_islow;
659
0
    else
660
0
#endif
661
34.5k
      fdct->dct = _jpeg_fdct_islow;
662
73.7k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
15.0k
  case JDCT_IFAST:
666
15.0k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
9.18k
    if (jsimd_can_fdct_ifast())
669
9.18k
      fdct->dct = jsimd_fdct_ifast;
670
0
    else
671
0
#endif
672
5.84k
      fdct->dct = _jpeg_fdct_ifast;
673
15.0k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
6.49k
  case JDCT_FLOAT:
677
6.49k
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
6.49k
    if (jsimd_can_fdct_float())
680
6.49k
      fdct->float_dct = jsimd_fdct_float;
681
0
    else
682
0
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
6.49k
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
95.3k
  }
690
691
  /* ...then the supporting stages. */
692
95.3k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
73.7k
  case JDCT_ISLOW:
695
73.7k
#endif
696
73.7k
#ifdef DCT_IFAST_SUPPORTED
697
88.8k
  case JDCT_IFAST:
698
88.8k
#endif
699
88.8k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
48.4k
    if (jsimd_can_convsamp())
702
48.4k
      fdct->convsamp = jsimd_convsamp;
703
0
    else
704
0
#endif
705
0
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
48.4k
    if (jsimd_can_quantize())
708
48.4k
      fdct->quantize = jsimd_quantize;
709
0
    else
710
0
#endif
711
0
      fdct->quantize = quantize;
712
88.8k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
6.49k
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
6.49k
    if (jsimd_can_convsamp_float())
718
6.49k
      fdct->float_convsamp = jsimd_convsamp_float;
719
0
    else
720
0
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
6.49k
    if (jsimd_can_quantize_float())
724
6.49k
      fdct->float_quantize = jsimd_quantize_float;
725
0
    else
726
0
#endif
727
0
      fdct->float_quantize = quantize_float;
728
6.49k
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
95.3k
  }
734
735
  /* Allocate workspace memory */
736
95.3k
#ifdef DCT_FLOAT_SUPPORTED
737
95.3k
  if (cinfo->dct_method == JDCT_FLOAT)
738
6.49k
    fdct->float_workspace = (FAST_FLOAT *)
739
6.49k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
6.49k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
88.8k
  else
742
88.8k
#endif
743
88.8k
    fdct->workspace = (DCTELEM *)
744
88.8k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
88.8k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
476k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
381k
    fdct->divisors[i] = NULL;
750
381k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
381k
#endif
753
381k
  }
754
95.3k
}
jinit_forward_dct
Line
Count
Source
638
54.9k
{
639
54.9k
  my_fdct_ptr fdct;
640
54.9k
  int i;
641
642
54.9k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
54.9k
  fdct = (my_fdct_ptr)
646
54.9k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
54.9k
                                sizeof(my_fdct_controller));
648
54.9k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
54.9k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
54.9k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
39.2k
  case JDCT_ISLOW:
655
39.2k
    fdct->pub._forward_DCT = forward_DCT;
656
39.2k
#ifdef WITH_SIMD
657
39.2k
    if (jsimd_can_fdct_islow())
658
39.2k
      fdct->dct = jsimd_fdct_islow;
659
0
    else
660
0
#endif
661
0
      fdct->dct = _jpeg_fdct_islow;
662
39.2k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
9.18k
  case JDCT_IFAST:
666
9.18k
    fdct->pub._forward_DCT = forward_DCT;
667
9.18k
#ifdef WITH_SIMD
668
9.18k
    if (jsimd_can_fdct_ifast())
669
9.18k
      fdct->dct = jsimd_fdct_ifast;
670
0
    else
671
0
#endif
672
0
      fdct->dct = _jpeg_fdct_ifast;
673
9.18k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
6.49k
  case JDCT_FLOAT:
677
6.49k
    fdct->pub._forward_DCT = forward_DCT_float;
678
6.49k
#ifdef WITH_SIMD
679
6.49k
    if (jsimd_can_fdct_float())
680
6.49k
      fdct->float_dct = jsimd_fdct_float;
681
0
    else
682
0
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
6.49k
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
54.9k
  }
690
691
  /* ...then the supporting stages. */
692
54.9k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
39.2k
  case JDCT_ISLOW:
695
39.2k
#endif
696
39.2k
#ifdef DCT_IFAST_SUPPORTED
697
48.4k
  case JDCT_IFAST:
698
48.4k
#endif
699
48.4k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
48.4k
#ifdef WITH_SIMD
701
48.4k
    if (jsimd_can_convsamp())
702
48.4k
      fdct->convsamp = jsimd_convsamp;
703
0
    else
704
0
#endif
705
0
      fdct->convsamp = convsamp;
706
48.4k
#ifdef WITH_SIMD
707
48.4k
    if (jsimd_can_quantize())
708
48.4k
      fdct->quantize = jsimd_quantize;
709
0
    else
710
0
#endif
711
0
      fdct->quantize = quantize;
712
48.4k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
6.49k
  case JDCT_FLOAT:
716
6.49k
#ifdef WITH_SIMD
717
6.49k
    if (jsimd_can_convsamp_float())
718
6.49k
      fdct->float_convsamp = jsimd_convsamp_float;
719
0
    else
720
0
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
6.49k
#ifdef WITH_SIMD
723
6.49k
    if (jsimd_can_quantize_float())
724
6.49k
      fdct->float_quantize = jsimd_quantize_float;
725
0
    else
726
0
#endif
727
0
      fdct->float_quantize = quantize_float;
728
6.49k
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
54.9k
  }
734
735
  /* Allocate workspace memory */
736
54.9k
#ifdef DCT_FLOAT_SUPPORTED
737
54.9k
  if (cinfo->dct_method == JDCT_FLOAT)
738
6.49k
    fdct->float_workspace = (FAST_FLOAT *)
739
6.49k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
6.49k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
48.4k
  else
742
48.4k
#endif
743
48.4k
    fdct->workspace = (DCTELEM *)
744
48.4k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
48.4k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
274k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
219k
    fdct->divisors[i] = NULL;
750
219k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
219k
#endif
753
219k
  }
754
54.9k
}
j12init_forward_dct
Line
Count
Source
638
40.3k
{
639
40.3k
  my_fdct_ptr fdct;
640
40.3k
  int i;
641
642
40.3k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
40.3k
  fdct = (my_fdct_ptr)
646
40.3k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
40.3k
                                sizeof(my_fdct_controller));
648
40.3k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
40.3k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
40.3k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
34.5k
  case JDCT_ISLOW:
655
34.5k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
    if (jsimd_can_fdct_islow())
658
      fdct->dct = jsimd_fdct_islow;
659
    else
660
#endif
661
34.5k
      fdct->dct = _jpeg_fdct_islow;
662
34.5k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
5.84k
  case JDCT_IFAST:
666
5.84k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
    if (jsimd_can_fdct_ifast())
669
      fdct->dct = jsimd_fdct_ifast;
670
    else
671
#endif
672
5.84k
      fdct->dct = _jpeg_fdct_ifast;
673
5.84k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
0
  case JDCT_FLOAT:
677
0
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
    if (jsimd_can_fdct_float())
680
      fdct->float_dct = jsimd_fdct_float;
681
    else
682
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
0
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
40.3k
  }
690
691
  /* ...then the supporting stages. */
692
40.3k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
34.5k
  case JDCT_ISLOW:
695
34.5k
#endif
696
34.5k
#ifdef DCT_IFAST_SUPPORTED
697
40.3k
  case JDCT_IFAST:
698
40.3k
#endif
699
40.3k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
    if (jsimd_can_convsamp())
702
      fdct->convsamp = jsimd_convsamp;
703
    else
704
#endif
705
40.3k
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
    if (jsimd_can_quantize())
708
      fdct->quantize = jsimd_quantize;
709
    else
710
#endif
711
40.3k
      fdct->quantize = quantize;
712
40.3k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
0
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
    if (jsimd_can_convsamp_float())
718
      fdct->float_convsamp = jsimd_convsamp_float;
719
    else
720
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
    if (jsimd_can_quantize_float())
724
      fdct->float_quantize = jsimd_quantize_float;
725
    else
726
#endif
727
0
      fdct->float_quantize = quantize_float;
728
0
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
40.3k
  }
734
735
  /* Allocate workspace memory */
736
40.3k
#ifdef DCT_FLOAT_SUPPORTED
737
40.3k
  if (cinfo->dct_method == JDCT_FLOAT)
738
0
    fdct->float_workspace = (FAST_FLOAT *)
739
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
40.3k
  else
742
40.3k
#endif
743
40.3k
    fdct->workspace = (DCTELEM *)
744
40.3k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
40.3k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
201k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
161k
    fdct->divisors[i] = NULL;
750
161k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
161k
#endif
753
161k
  }
754
40.3k
}