Coverage Report

Created: 2025-11-11 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.dev/src/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024-2025, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#ifdef WITH_SIMD
24
#include "../simd/jsimddct.h"
25
#endif
26
#ifdef WITH_PROFILE
27
#include "tjutil.h"
28
#endif
29
30
31
/* Private subobject for this module */
32
33
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
34
35
typedef struct {
36
  struct jpeg_forward_dct pub;  /* public fields */
37
38
  /* Pointer to the DCT routine actually in use */
39
  forward_DCT_method_ptr dct;
40
  convsamp_method_ptr convsamp;
41
  quantize_method_ptr quantize;
42
43
  /* The actual post-DCT divisors --- not identical to the quant table
44
   * entries, because of scaling (especially for an unnormalized DCT).
45
   * Each table is given in normal array order.
46
   */
47
  DCTELEM *divisors[NUM_QUANT_TBLS];
48
49
  /* work area for FDCT subroutine */
50
  DCTELEM *workspace;
51
52
#ifdef DCT_FLOAT_SUPPORTED
53
  /* Same as above for the floating-point case. */
54
  float_DCT_method_ptr float_dct;
55
  float_convsamp_method_ptr float_convsamp;
56
  float_quantize_method_ptr float_quantize;
57
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
58
  FAST_FLOAT *float_workspace;
59
#endif
60
} my_fdct_controller;
61
62
typedef my_fdct_controller *my_fdct_ptr;
63
64
65
#if BITS_IN_JSAMPLE == 8
66
67
/*
68
 * Find the highest bit in an integer through binary search.
69
 */
70
71
LOCAL(int)
72
flss(UINT16 val)
73
7.87M
{
74
7.87M
  int bit;
75
76
7.87M
  bit = 16;
77
78
7.87M
  if (!val)
79
0
    return 0;
80
81
7.87M
  if (!(val & 0xff00)) {
82
4.39M
    bit -= 8;
83
4.39M
    val <<= 8;
84
4.39M
  }
85
7.87M
  if (!(val & 0xf000)) {
86
4.98M
    bit -= 4;
87
4.98M
    val <<= 4;
88
4.98M
  }
89
7.87M
  if (!(val & 0xc000)) {
90
4.20M
    bit -= 2;
91
4.20M
    val <<= 2;
92
4.20M
  }
93
7.87M
  if (!(val & 0x8000)) {
94
3.66M
    bit -= 1;
95
3.66M
    val <<= 1;
96
3.66M
  }
97
98
7.87M
  return bit;
99
7.87M
}
100
101
102
/*
103
 * Compute values to do a division using reciprocal.
104
 *
105
 * This implementation is based on an algorithm described in
106
 *   "Optimizing subroutines in assembly language:
107
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
108
 * More information about the basic algorithm can be found in
109
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
110
 *
111
 * The basic idea is to replace x/d by x * d^-1. In order to store
112
 * d^-1 with enough precision we shift it left a few places. It turns
113
 * out that this algoright gives just enough precision, and also fits
114
 * into DCTELEM:
115
 *
116
 *   b = (the number of significant bits in divisor) - 1
117
 *   r = (word size) + b
118
 *   f = 2^r / divisor
119
 *
120
 * f will not be an integer for most cases, so we need to compensate
121
 * for the rounding error introduced:
122
 *
123
 *   no fractional part:
124
 *
125
 *       result = input >> r
126
 *
127
 *   fractional part of f < 0.5:
128
 *
129
 *       round f down to nearest integer
130
 *       result = ((input + 1) * f) >> r
131
 *
132
 *   fractional part of f > 0.5:
133
 *
134
 *       round f up to nearest integer
135
 *       result = (input * f) >> r
136
 *
137
 * This is the original algorithm that gives truncated results. But we
138
 * want properly rounded results, so we replace "input" with
139
 * "input + divisor/2".
140
 *
141
 * In order to allow SIMD implementations we also tweak the values to
142
 * allow the same calculation to be made at all times:
143
 *
144
 *   dctbl[0] = f rounded to nearest integer
145
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
146
 *   dctbl[2] = 1 << ((word size) * 2 - r)
147
 *   dctbl[3] = r - (word size)
148
 *
149
 * dctbl[2] is for stupid instruction sets where the shift operation
150
 * isn't member wise (e.g. MMX).
151
 *
152
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
153
 * is that most SIMD implementations have a "multiply and store top
154
 * half" operation.
155
 *
156
 * Lastly, we store each of the values in their own table instead
157
 * of in a consecutive manner, yet again in order to allow SIMD
158
 * routines.
159
 */
160
161
LOCAL(int)
162
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
163
7.87M
{
164
7.87M
  UDCTELEM2 fq, fr;
165
7.87M
  UDCTELEM c;
166
7.87M
  int b, r;
167
168
7.87M
  if (divisor == 1) {
169
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
170
     * values will cause the C quantization algorithm to act like the
171
     * identity function.  Since only the C quantization algorithm is used in
172
     * these cases, the scale value is irrelevant.
173
     */
174
0
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
175
0
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
176
0
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
177
0
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
178
0
    return 0;
179
0
  }
180
181
7.87M
  b = flss(divisor) - 1;
182
7.87M
  r  = sizeof(DCTELEM) * 8 + b;
183
184
7.87M
  fq = ((UDCTELEM2)1 << r) / divisor;
185
7.87M
  fr = ((UDCTELEM2)1 << r) % divisor;
186
187
7.87M
  c = divisor / 2;                      /* for rounding */
188
189
7.87M
  if (fr == 0) {                        /* divisor is power of two */
190
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
191
1.72M
    fq >>= 1;
192
1.72M
    r--;
193
6.14M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
194
2.25M
    c++;
195
3.89M
  } else {                              /* fractional part is > 0.5 */
196
3.89M
    fq++;
197
3.89M
  }
198
199
7.87M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
200
7.87M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
201
7.87M
#ifdef WITH_SIMD
202
7.87M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
203
#else
204
  dtbl[DCTSIZE2 * 2] = 1;
205
#endif
206
7.87M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
207
208
7.87M
  if (r <= 16) return 0;
209
7.87M
  else return 1;
210
7.87M
}
211
212
#endif
213
214
215
/*
216
 * Initialize for a processing pass.
217
 * Verify that all referenced Q-tables are present, and set up
218
 * the divisor table for each one.
219
 * In the current implementation, DCT of all components is done during
220
 * the first pass, even if only some components will be output in the
221
 * first scan.  Hence all components should be examined here.
222
 */
223
224
METHODDEF(void)
225
start_pass_fdctmgr(j_compress_ptr cinfo)
226
49.8k
{
227
49.8k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
228
49.8k
  int ci, qtblno, i;
229
49.8k
  jpeg_component_info *compptr;
230
49.8k
  JQUANT_TBL *qtbl;
231
49.8k
  DCTELEM *dtbl;
232
233
177k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
234
127k
       ci++, compptr++) {
235
127k
    qtblno = compptr->quant_tbl_no;
236
    /* Make sure specified quantization table is present */
237
127k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
238
127k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
239
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
240
127k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
241
    /* Compute divisors for this quant table */
242
    /* We may do this more than once for same table, but it's not a big deal */
243
127k
    switch (cinfo->dct_method) {
244
0
#ifdef DCT_ISLOW_SUPPORTED
245
104k
    case JDCT_ISLOW:
246
      /* For LL&M IDCT method, divisors are equal to raw quantization
247
       * coefficients multiplied by 8 (to counteract scaling).
248
       */
249
104k
      if (fdct->divisors[qtblno] == NULL) {
250
70.6k
        fdct->divisors[qtblno] = (DCTELEM *)
251
70.6k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
252
70.6k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
253
70.6k
      }
254
104k
      dtbl = fdct->divisors[qtblno];
255
6.81M
      for (i = 0; i < DCTSIZE2; i++) {
256
#if BITS_IN_JSAMPLE == 8
257
#ifdef WITH_SIMD
258
3.08M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
259
0
            fdct->quantize != quantize)
260
0
          fdct->quantize = quantize;
261
#else
262
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
263
#endif
264
#else
265
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
266
#endif
267
6.70M
      }
268
104k
      break;
269
0
#endif
270
0
#ifdef DCT_IFAST_SUPPORTED
271
22.6k
    case JDCT_IFAST:
272
22.6k
      {
273
        /* For AA&N IDCT method, divisors are equal to quantization
274
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
275
         *   scalefactor[0] = 1
276
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
277
         * We apply a further scale factor of 8.
278
         */
279
22.6k
#define CONST_BITS  14
280
22.6k
        static const INT16 aanscales[DCTSIZE2] = {
281
          /* precomputed values scaled up by 14 bits */
282
22.6k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
283
22.6k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
284
22.6k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
285
22.6k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
286
22.6k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
287
22.6k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
288
22.6k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
289
22.6k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
290
22.6k
        };
291
22.6k
        SHIFT_TEMPS
292
293
22.6k
        if (fdct->divisors[qtblno] == NULL) {
294
15.1k
          fdct->divisors[qtblno] = (DCTELEM *)
295
15.1k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
296
15.1k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
297
15.1k
        }
298
22.6k
        dtbl = fdct->divisors[qtblno];
299
1.47M
        for (i = 0; i < DCTSIZE2; i++) {
300
#if BITS_IN_JSAMPLE == 8
301
#ifdef WITH_SIMD
302
718k
          if (!compute_reciprocal(
303
718k
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
304
718k
                                      (JLONG)aanscales[i]),
305
718k
                        CONST_BITS - 3), &dtbl[i]) &&
306
0
              fdct->quantize != quantize)
307
0
            fdct->quantize = quantize;
308
#else
309
          compute_reciprocal(
310
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
311
                                  (JLONG)aanscales[i]),
312
                    CONST_BITS-3), &dtbl[i]);
313
#endif
314
#else
315
          dtbl[i] = (DCTELEM)
316
732k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
317
                                  (JLONG)aanscales[i]),
318
                    CONST_BITS - 3);
319
#endif
320
1.45M
        }
321
22.6k
      }
322
22.6k
      break;
323
0
#endif
324
0
#ifdef DCT_FLOAT_SUPPORTED
325
0
    case JDCT_FLOAT:
326
0
      {
327
        /* For float AA&N IDCT method, divisors are equal to quantization
328
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
329
         *   scalefactor[0] = 1
330
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
331
         * We apply a further scale factor of 8.
332
         * What's actually stored is 1/divisor so that the inner loop can
333
         * use a multiplication rather than a division.
334
         */
335
0
        FAST_FLOAT *fdtbl;
336
0
        int row, col;
337
0
        static const double aanscalefactor[DCTSIZE] = {
338
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
339
0
          1.0, 0.785694958, 0.541196100, 0.275899379
340
0
        };
341
342
0
        if (fdct->float_divisors[qtblno] == NULL) {
343
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
344
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
345
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
346
0
        }
347
0
        fdtbl = fdct->float_divisors[qtblno];
348
0
        i = 0;
349
0
        for (row = 0; row < DCTSIZE; row++) {
350
0
          for (col = 0; col < DCTSIZE; col++) {
351
0
            fdtbl[i] = (FAST_FLOAT)
352
0
              (1.0 / (((double)qtbl->quantval[i] *
353
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
354
0
            i++;
355
0
          }
356
0
        }
357
0
      }
358
0
      break;
359
0
#endif
360
0
    default:
361
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
362
0
      break;
363
127k
    }
364
127k
  }
365
49.8k
}
jcdctmgr-8.c:start_pass_fdctmgr
Line
Count
Source
226
23.6k
{
227
23.6k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
228
23.6k
  int ci, qtblno, i;
229
23.6k
  jpeg_component_info *compptr;
230
23.6k
  JQUANT_TBL *qtbl;
231
23.6k
  DCTELEM *dtbl;
232
233
83.0k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
234
59.3k
       ci++, compptr++) {
235
59.3k
    qtblno = compptr->quant_tbl_no;
236
    /* Make sure specified quantization table is present */
237
59.3k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
238
59.3k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
239
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
240
59.3k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
241
    /* Compute divisors for this quant table */
242
    /* We may do this more than once for same table, but it's not a big deal */
243
59.3k
    switch (cinfo->dct_method) {
244
0
#ifdef DCT_ISLOW_SUPPORTED
245
48.1k
    case JDCT_ISLOW:
246
      /* For LL&M IDCT method, divisors are equal to raw quantization
247
       * coefficients multiplied by 8 (to counteract scaling).
248
       */
249
48.1k
      if (fdct->divisors[qtblno] == NULL) {
250
33.0k
        fdct->divisors[qtblno] = (DCTELEM *)
251
33.0k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
252
33.0k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
253
33.0k
      }
254
48.1k
      dtbl = fdct->divisors[qtblno];
255
3.13M
      for (i = 0; i < DCTSIZE2; i++) {
256
3.08M
#if BITS_IN_JSAMPLE == 8
257
3.08M
#ifdef WITH_SIMD
258
3.08M
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
259
0
            fdct->quantize != quantize)
260
0
          fdct->quantize = quantize;
261
#else
262
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
263
#endif
264
#else
265
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
266
#endif
267
3.08M
      }
268
48.1k
      break;
269
0
#endif
270
0
#ifdef DCT_IFAST_SUPPORTED
271
11.2k
    case JDCT_IFAST:
272
11.2k
      {
273
        /* For AA&N IDCT method, divisors are equal to quantization
274
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
275
         *   scalefactor[0] = 1
276
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
277
         * We apply a further scale factor of 8.
278
         */
279
11.2k
#define CONST_BITS  14
280
11.2k
        static const INT16 aanscales[DCTSIZE2] = {
281
          /* precomputed values scaled up by 14 bits */
282
11.2k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
283
11.2k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
284
11.2k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
285
11.2k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
286
11.2k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
287
11.2k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
288
11.2k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
289
11.2k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
290
11.2k
        };
291
11.2k
        SHIFT_TEMPS
292
293
11.2k
        if (fdct->divisors[qtblno] == NULL) {
294
7.48k
          fdct->divisors[qtblno] = (DCTELEM *)
295
7.48k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
296
7.48k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
297
7.48k
        }
298
11.2k
        dtbl = fdct->divisors[qtblno];
299
729k
        for (i = 0; i < DCTSIZE2; i++) {
300
718k
#if BITS_IN_JSAMPLE == 8
301
718k
#ifdef WITH_SIMD
302
718k
          if (!compute_reciprocal(
303
718k
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
304
718k
                                      (JLONG)aanscales[i]),
305
718k
                        CONST_BITS - 3), &dtbl[i]) &&
306
0
              fdct->quantize != quantize)
307
0
            fdct->quantize = quantize;
308
#else
309
          compute_reciprocal(
310
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
311
                                  (JLONG)aanscales[i]),
312
                    CONST_BITS-3), &dtbl[i]);
313
#endif
314
#else
315
          dtbl[i] = (DCTELEM)
316
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
317
                                  (JLONG)aanscales[i]),
318
                    CONST_BITS - 3);
319
#endif
320
718k
        }
321
11.2k
      }
322
11.2k
      break;
323
0
#endif
324
0
#ifdef DCT_FLOAT_SUPPORTED
325
0
    case JDCT_FLOAT:
326
0
      {
327
        /* For float AA&N IDCT method, divisors are equal to quantization
328
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
329
         *   scalefactor[0] = 1
330
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
331
         * We apply a further scale factor of 8.
332
         * What's actually stored is 1/divisor so that the inner loop can
333
         * use a multiplication rather than a division.
334
         */
335
0
        FAST_FLOAT *fdtbl;
336
0
        int row, col;
337
0
        static const double aanscalefactor[DCTSIZE] = {
338
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
339
0
          1.0, 0.785694958, 0.541196100, 0.275899379
340
0
        };
341
342
0
        if (fdct->float_divisors[qtblno] == NULL) {
343
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
344
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
345
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
346
0
        }
347
0
        fdtbl = fdct->float_divisors[qtblno];
348
0
        i = 0;
349
0
        for (row = 0; row < DCTSIZE; row++) {
350
0
          for (col = 0; col < DCTSIZE; col++) {
351
0
            fdtbl[i] = (FAST_FLOAT)
352
0
              (1.0 / (((double)qtbl->quantval[i] *
353
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
354
0
            i++;
355
0
          }
356
0
        }
357
0
      }
358
0
      break;
359
0
#endif
360
0
    default:
361
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
362
0
      break;
363
59.3k
    }
364
59.3k
  }
365
23.6k
}
jcdctmgr-12.c:start_pass_fdctmgr
Line
Count
Source
226
26.1k
{
227
26.1k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
228
26.1k
  int ci, qtblno, i;
229
26.1k
  jpeg_component_info *compptr;
230
26.1k
  JQUANT_TBL *qtbl;
231
26.1k
  DCTELEM *dtbl;
232
233
94.2k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
234
68.1k
       ci++, compptr++) {
235
68.1k
    qtblno = compptr->quant_tbl_no;
236
    /* Make sure specified quantization table is present */
237
68.1k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
238
68.1k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
239
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
240
68.1k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
241
    /* Compute divisors for this quant table */
242
    /* We may do this more than once for same table, but it's not a big deal */
243
68.1k
    switch (cinfo->dct_method) {
244
0
#ifdef DCT_ISLOW_SUPPORTED
245
56.6k
    case JDCT_ISLOW:
246
      /* For LL&M IDCT method, divisors are equal to raw quantization
247
       * coefficients multiplied by 8 (to counteract scaling).
248
       */
249
56.6k
      if (fdct->divisors[qtblno] == NULL) {
250
37.5k
        fdct->divisors[qtblno] = (DCTELEM *)
251
37.5k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
252
37.5k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
253
37.5k
      }
254
56.6k
      dtbl = fdct->divisors[qtblno];
255
3.68M
      for (i = 0; i < DCTSIZE2; i++) {
256
#if BITS_IN_JSAMPLE == 8
257
#ifdef WITH_SIMD
258
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
259
            fdct->quantize != quantize)
260
          fdct->quantize = quantize;
261
#else
262
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
263
#endif
264
#else
265
3.62M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
266
3.62M
#endif
267
3.62M
      }
268
56.6k
      break;
269
0
#endif
270
0
#ifdef DCT_IFAST_SUPPORTED
271
11.4k
    case JDCT_IFAST:
272
11.4k
      {
273
        /* For AA&N IDCT method, divisors are equal to quantization
274
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
275
         *   scalefactor[0] = 1
276
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
277
         * We apply a further scale factor of 8.
278
         */
279
11.4k
#define CONST_BITS  14
280
11.4k
        static const INT16 aanscales[DCTSIZE2] = {
281
          /* precomputed values scaled up by 14 bits */
282
11.4k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
283
11.4k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
284
11.4k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
285
11.4k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
286
11.4k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
287
11.4k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
288
11.4k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
289
11.4k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
290
11.4k
        };
291
11.4k
        SHIFT_TEMPS
292
293
11.4k
        if (fdct->divisors[qtblno] == NULL) {
294
7.63k
          fdct->divisors[qtblno] = (DCTELEM *)
295
7.63k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
296
7.63k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
297
7.63k
        }
298
11.4k
        dtbl = fdct->divisors[qtblno];
299
744k
        for (i = 0; i < DCTSIZE2; i++) {
300
#if BITS_IN_JSAMPLE == 8
301
#ifdef WITH_SIMD
302
          if (!compute_reciprocal(
303
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
304
                                      (JLONG)aanscales[i]),
305
                        CONST_BITS - 3), &dtbl[i]) &&
306
              fdct->quantize != quantize)
307
            fdct->quantize = quantize;
308
#else
309
          compute_reciprocal(
310
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
311
                                  (JLONG)aanscales[i]),
312
                    CONST_BITS-3), &dtbl[i]);
313
#endif
314
#else
315
732k
          dtbl[i] = (DCTELEM)
316
732k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
317
732k
                                  (JLONG)aanscales[i]),
318
732k
                    CONST_BITS - 3);
319
732k
#endif
320
732k
        }
321
11.4k
      }
322
11.4k
      break;
323
0
#endif
324
0
#ifdef DCT_FLOAT_SUPPORTED
325
0
    case JDCT_FLOAT:
326
0
      {
327
        /* For float AA&N IDCT method, divisors are equal to quantization
328
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
329
         *   scalefactor[0] = 1
330
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
331
         * We apply a further scale factor of 8.
332
         * What's actually stored is 1/divisor so that the inner loop can
333
         * use a multiplication rather than a division.
334
         */
335
0
        FAST_FLOAT *fdtbl;
336
0
        int row, col;
337
0
        static const double aanscalefactor[DCTSIZE] = {
338
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
339
0
          1.0, 0.785694958, 0.541196100, 0.275899379
340
0
        };
341
342
0
        if (fdct->float_divisors[qtblno] == NULL) {
343
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
344
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
345
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
346
0
        }
347
0
        fdtbl = fdct->float_divisors[qtblno];
348
0
        i = 0;
349
0
        for (row = 0; row < DCTSIZE; row++) {
350
0
          for (col = 0; col < DCTSIZE; col++) {
351
0
            fdtbl[i] = (FAST_FLOAT)
352
0
              (1.0 / (((double)qtbl->quantval[i] *
353
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
354
0
            i++;
355
0
          }
356
0
        }
357
0
      }
358
0
      break;
359
0
#endif
360
0
    default:
361
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
362
0
      break;
363
68.1k
    }
364
68.1k
  }
365
26.1k
}
366
367
368
/*
369
 * Load data into workspace, applying unsigned->signed conversion.
370
 */
371
372
METHODDEF(void)
373
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
374
46.9M
{
375
46.9M
  register DCTELEM *workspaceptr;
376
46.9M
  register _JSAMPROW elemptr;
377
46.9M
  register int elemr;
378
379
46.9M
  workspaceptr = workspace;
380
422M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
381
375M
    elemptr = sample_data[elemr] + start_col;
382
383
375M
#if DCTSIZE == 8                /* unroll the inner loop */
384
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
385
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
386
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
387
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
388
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
389
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
390
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
391
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
392
#else
393
    {
394
      register int elemc;
395
      for (elemc = DCTSIZE; elemc > 0; elemc--)
396
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
    }
398
#endif
399
375M
  }
400
46.9M
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp
jcdctmgr-12.c:convsamp
Line
Count
Source
374
46.9M
{
375
46.9M
  register DCTELEM *workspaceptr;
376
46.9M
  register _JSAMPROW elemptr;
377
46.9M
  register int elemr;
378
379
46.9M
  workspaceptr = workspace;
380
422M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
381
375M
    elemptr = sample_data[elemr] + start_col;
382
383
375M
#if DCTSIZE == 8                /* unroll the inner loop */
384
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
385
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
386
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
387
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
388
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
389
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
390
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
391
375M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
392
#else
393
    {
394
      register int elemc;
395
      for (elemc = DCTSIZE; elemc > 0; elemc--)
396
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
397
    }
398
#endif
399
375M
  }
400
46.9M
}
401
402
403
/*
404
 * Quantize/descale the coefficients, and store into coef_blocks[].
405
 */
406
407
METHODDEF(void)
408
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
409
46.9M
{
410
46.9M
  int i;
411
46.9M
  DCTELEM temp;
412
46.9M
  JCOEFPTR output_ptr = coef_block;
413
414
#if BITS_IN_JSAMPLE == 8
415
416
  UDCTELEM recip, corr;
417
  int shift;
418
  UDCTELEM2 product;
419
420
0
  for (i = 0; i < DCTSIZE2; i++) {
421
0
    temp = workspace[i];
422
0
    recip = divisors[i + DCTSIZE2 * 0];
423
0
    corr =  divisors[i + DCTSIZE2 * 1];
424
0
    shift = divisors[i + DCTSIZE2 * 3];
425
426
0
    if (temp < 0) {
427
0
      temp = -temp;
428
0
      product = (UDCTELEM2)(temp + corr) * recip;
429
0
      product >>= shift + sizeof(DCTELEM) * 8;
430
0
      temp = (DCTELEM)product;
431
0
      temp = -temp;
432
0
    } else {
433
0
      product = (UDCTELEM2)(temp + corr) * recip;
434
0
      product >>= shift + sizeof(DCTELEM) * 8;
435
0
      temp = (DCTELEM)product;
436
0
    }
437
0
    output_ptr[i] = (JCOEF)temp;
438
0
  }
439
440
#else
441
442
  register DCTELEM qval;
443
444
3.05G
  for (i = 0; i < DCTSIZE2; i++) {
445
3.00G
    qval = divisors[i];
446
3.00G
    temp = workspace[i];
447
    /* Divide the coefficient value by qval, ensuring proper rounding.
448
     * Since C does not specify the direction of rounding for negative
449
     * quotients, we have to force the dividend positive for portability.
450
     *
451
     * In most files, at least half of the output values will be zero
452
     * (at default quantization settings, more like three-quarters...)
453
     * so we should ensure that this case is fast.  On many machines,
454
     * a comparison is enough cheaper than a divide to make a special test
455
     * a win.  Since both inputs will be nonnegative, we need only test
456
     * for a < b to discover whether a/b is 0.
457
     * If your machine's division is fast enough, define FAST_DIVIDE.
458
     */
459
#ifdef FAST_DIVIDE
460
#define DIVIDE_BY(a, b)  a /= b
461
#else
462
3.00G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
463
3.00G
#endif
464
3.00G
    if (temp < 0) {
465
122M
      temp = -temp;
466
122M
      temp += qval >> 1;        /* for rounding */
467
122M
      DIVIDE_BY(temp, qval);
468
122M
      temp = -temp;
469
2.88G
    } else {
470
2.88G
      temp += qval >> 1;        /* for rounding */
471
2.88G
      DIVIDE_BY(temp, qval);
472
2.88G
    }
473
3.00G
    output_ptr[i] = (JCOEF)temp;
474
3.00G
  }
475
476
#endif
477
478
46.9M
}
Unexecuted instantiation: jcdctmgr-8.c:quantize
jcdctmgr-12.c:quantize
Line
Count
Source
409
46.9M
{
410
46.9M
  int i;
411
46.9M
  DCTELEM temp;
412
46.9M
  JCOEFPTR output_ptr = coef_block;
413
414
#if BITS_IN_JSAMPLE == 8
415
416
  UDCTELEM recip, corr;
417
  int shift;
418
  UDCTELEM2 product;
419
420
  for (i = 0; i < DCTSIZE2; i++) {
421
    temp = workspace[i];
422
    recip = divisors[i + DCTSIZE2 * 0];
423
    corr =  divisors[i + DCTSIZE2 * 1];
424
    shift = divisors[i + DCTSIZE2 * 3];
425
426
    if (temp < 0) {
427
      temp = -temp;
428
      product = (UDCTELEM2)(temp + corr) * recip;
429
      product >>= shift + sizeof(DCTELEM) * 8;
430
      temp = (DCTELEM)product;
431
      temp = -temp;
432
    } else {
433
      product = (UDCTELEM2)(temp + corr) * recip;
434
      product >>= shift + sizeof(DCTELEM) * 8;
435
      temp = (DCTELEM)product;
436
    }
437
    output_ptr[i] = (JCOEF)temp;
438
  }
439
440
#else
441
442
46.9M
  register DCTELEM qval;
443
444
3.05G
  for (i = 0; i < DCTSIZE2; i++) {
445
3.00G
    qval = divisors[i];
446
3.00G
    temp = workspace[i];
447
    /* Divide the coefficient value by qval, ensuring proper rounding.
448
     * Since C does not specify the direction of rounding for negative
449
     * quotients, we have to force the dividend positive for portability.
450
     *
451
     * In most files, at least half of the output values will be zero
452
     * (at default quantization settings, more like three-quarters...)
453
     * so we should ensure that this case is fast.  On many machines,
454
     * a comparison is enough cheaper than a divide to make a special test
455
     * a win.  Since both inputs will be nonnegative, we need only test
456
     * for a < b to discover whether a/b is 0.
457
     * If your machine's division is fast enough, define FAST_DIVIDE.
458
     */
459
#ifdef FAST_DIVIDE
460
#define DIVIDE_BY(a, b)  a /= b
461
#else
462
3.00G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
463
3.00G
#endif
464
3.00G
    if (temp < 0) {
465
122M
      temp = -temp;
466
122M
      temp += qval >> 1;        /* for rounding */
467
122M
      DIVIDE_BY(temp, qval);
468
122M
      temp = -temp;
469
2.88G
    } else {
470
2.88G
      temp += qval >> 1;        /* for rounding */
471
2.88G
      DIVIDE_BY(temp, qval);
472
2.88G
    }
473
3.00G
    output_ptr[i] = (JCOEF)temp;
474
3.00G
  }
475
476
46.9M
#endif
477
478
46.9M
}
479
480
481
/*
482
 * Perform forward DCT on one or more blocks of a component.
483
 *
484
 * The input samples are taken from the sample_data[] array starting at
485
 * position start_row/start_col, and moving to the right for any additional
486
 * blocks. The quantized coefficients are returned in coef_blocks[].
487
 */
488
489
METHODDEF(void)
490
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
491
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
492
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
493
/* This version is used for integer DCT implementations. */
494
90.8M
{
495
  /* This routine is heavily used, so it's worth coding it tightly. */
496
90.8M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
497
90.8M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
498
90.8M
  DCTELEM *workspace;
499
90.8M
  JDIMENSION bi;
500
501
  /* Make sure the compiler doesn't look up these every pass */
502
90.8M
  forward_DCT_method_ptr do_dct = fdct->dct;
503
90.8M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
504
90.8M
  quantize_method_ptr do_quantize = fdct->quantize;
505
90.8M
  workspace = fdct->workspace;
506
507
90.8M
  sample_data += start_row;     /* fold in the vertical offset once */
508
509
209M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
510
    /* Load data into workspace, applying unsigned->signed conversion */
511
#ifdef WITH_PROFILE
512
    cinfo->master->start = getTime();
513
#endif
514
118M
    (*do_convsamp) (sample_data, start_col, workspace);
515
#ifdef WITH_PROFILE
516
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
517
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
518
#endif
519
520
    /* Perform the DCT */
521
#ifdef WITH_PROFILE
522
    cinfo->master->start = getTime();
523
#endif
524
118M
    (*do_dct) (workspace);
525
#ifdef WITH_PROFILE
526
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
527
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
528
#endif
529
530
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
531
#ifdef WITH_PROFILE
532
    cinfo->master->start = getTime();
533
#endif
534
118M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
#ifdef WITH_PROFILE
536
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
537
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
538
#endif
539
118M
  }
540
90.8M
}
jcdctmgr-8.c:forward_DCT
Line
Count
Source
494
57.0M
{
495
  /* This routine is heavily used, so it's worth coding it tightly. */
496
57.0M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
497
57.0M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
498
57.0M
  DCTELEM *workspace;
499
57.0M
  JDIMENSION bi;
500
501
  /* Make sure the compiler doesn't look up these every pass */
502
57.0M
  forward_DCT_method_ptr do_dct = fdct->dct;
503
57.0M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
504
57.0M
  quantize_method_ptr do_quantize = fdct->quantize;
505
57.0M
  workspace = fdct->workspace;
506
507
57.0M
  sample_data += start_row;     /* fold in the vertical offset once */
508
509
128M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
510
    /* Load data into workspace, applying unsigned->signed conversion */
511
#ifdef WITH_PROFILE
512
    cinfo->master->start = getTime();
513
#endif
514
71.2M
    (*do_convsamp) (sample_data, start_col, workspace);
515
#ifdef WITH_PROFILE
516
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
517
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
518
#endif
519
520
    /* Perform the DCT */
521
#ifdef WITH_PROFILE
522
    cinfo->master->start = getTime();
523
#endif
524
71.2M
    (*do_dct) (workspace);
525
#ifdef WITH_PROFILE
526
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
527
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
528
#endif
529
530
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
531
#ifdef WITH_PROFILE
532
    cinfo->master->start = getTime();
533
#endif
534
71.2M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
#ifdef WITH_PROFILE
536
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
537
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
538
#endif
539
71.2M
  }
540
57.0M
}
jcdctmgr-12.c:forward_DCT
Line
Count
Source
494
33.7M
{
495
  /* This routine is heavily used, so it's worth coding it tightly. */
496
33.7M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
497
33.7M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
498
33.7M
  DCTELEM *workspace;
499
33.7M
  JDIMENSION bi;
500
501
  /* Make sure the compiler doesn't look up these every pass */
502
33.7M
  forward_DCT_method_ptr do_dct = fdct->dct;
503
33.7M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
504
33.7M
  quantize_method_ptr do_quantize = fdct->quantize;
505
33.7M
  workspace = fdct->workspace;
506
507
33.7M
  sample_data += start_row;     /* fold in the vertical offset once */
508
509
80.7M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
510
    /* Load data into workspace, applying unsigned->signed conversion */
511
#ifdef WITH_PROFILE
512
    cinfo->master->start = getTime();
513
#endif
514
46.9M
    (*do_convsamp) (sample_data, start_col, workspace);
515
#ifdef WITH_PROFILE
516
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
517
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
518
#endif
519
520
    /* Perform the DCT */
521
#ifdef WITH_PROFILE
522
    cinfo->master->start = getTime();
523
#endif
524
46.9M
    (*do_dct) (workspace);
525
#ifdef WITH_PROFILE
526
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
527
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
528
#endif
529
530
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
531
#ifdef WITH_PROFILE
532
    cinfo->master->start = getTime();
533
#endif
534
46.9M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
#ifdef WITH_PROFILE
536
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
537
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
538
#endif
539
46.9M
  }
540
33.7M
}
541
542
543
#ifdef DCT_FLOAT_SUPPORTED
544
545
METHODDEF(void)
546
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
547
               FAST_FLOAT *workspace)
548
0
{
549
0
  register FAST_FLOAT *workspaceptr;
550
0
  register _JSAMPROW elemptr;
551
0
  register int elemr;
552
553
0
  workspaceptr = workspace;
554
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
555
0
    elemptr = sample_data[elemr] + start_col;
556
0
#if DCTSIZE == 8                /* unroll the inner loop */
557
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
558
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
559
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
560
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
561
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
562
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
563
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
564
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
565
#else
566
    {
567
      register int elemc;
568
      for (elemc = DCTSIZE; elemc > 0; elemc--)
569
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
570
    }
571
#endif
572
0
  }
573
0
}
Unexecuted instantiation: jcdctmgr-8.c:convsamp_float
Unexecuted instantiation: jcdctmgr-12.c:convsamp_float
574
575
576
METHODDEF(void)
577
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
578
               FAST_FLOAT *workspace)
579
0
{
580
0
  register FAST_FLOAT temp;
581
0
  register int i;
582
0
  register JCOEFPTR output_ptr = coef_block;
583
584
0
  for (i = 0; i < DCTSIZE2; i++) {
585
    /* Apply the quantization and scaling factor */
586
0
    temp = workspace[i] * divisors[i];
587
588
    /* Round to nearest integer.
589
     * Since C does not specify the direction of rounding for negative
590
     * quotients, we have to force the dividend positive for portability.
591
     * The maximum coefficient size is +-16K (for 12-bit data), so this
592
     * code should work for either 16-bit or 32-bit ints.
593
     */
594
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
595
0
  }
596
0
}
Unexecuted instantiation: jcdctmgr-8.c:quantize_float
Unexecuted instantiation: jcdctmgr-12.c:quantize_float
597
598
599
METHODDEF(void)
600
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
601
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
602
                  JDIMENSION start_row, JDIMENSION start_col,
603
                  JDIMENSION num_blocks)
604
/* This version is used for floating-point DCT implementations. */
605
0
{
606
  /* This routine is heavily used, so it's worth coding it tightly. */
607
0
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
608
0
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
609
0
  FAST_FLOAT *workspace;
610
0
  JDIMENSION bi;
611
612
613
  /* Make sure the compiler doesn't look up these every pass */
614
0
  float_DCT_method_ptr do_dct = fdct->float_dct;
615
0
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
616
0
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
617
0
  workspace = fdct->float_workspace;
618
619
0
  sample_data += start_row;     /* fold in the vertical offset once */
620
621
0
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
622
    /* Load data into workspace, applying unsigned->signed conversion */
623
#ifdef WITH_PROFILE
624
    cinfo->master->start = getTime();
625
#endif
626
0
    (*do_convsamp) (sample_data, start_col, workspace);
627
#ifdef WITH_PROFILE
628
    cinfo->master->convsamp_elapsed += getTime() - cinfo->master->start;
629
    cinfo->master->convsamp_msamples += (double)DCTSIZE2 / 1000000.;
630
#endif
631
632
    /* Perform the DCT */
633
#ifdef WITH_PROFILE
634
    cinfo->master->start = getTime();
635
#endif
636
0
    (*do_dct) (workspace);
637
#ifdef WITH_PROFILE
638
    cinfo->master->fdct_elapsed += getTime() - cinfo->master->start;
639
    cinfo->master->fdct_mcoeffs += (double)DCTSIZE2 / 1000000.;
640
#endif
641
642
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
643
#ifdef WITH_PROFILE
644
    cinfo->master->start = getTime();
645
#endif
646
0
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
647
#ifdef WITH_PROFILE
648
    cinfo->master->quantize_elapsed += getTime() - cinfo->master->start;
649
    cinfo->master->quantize_mcoeffs += (double)DCTSIZE2 / 1000000.;
650
#endif
651
0
  }
652
0
}
Unexecuted instantiation: jcdctmgr-8.c:forward_DCT_float
Unexecuted instantiation: jcdctmgr-12.c:forward_DCT_float
653
654
#endif /* DCT_FLOAT_SUPPORTED */
655
656
657
/*
658
 * Initialize FDCT manager.
659
 */
660
661
GLOBAL(void)
662
_jinit_forward_dct(j_compress_ptr cinfo)
663
63.1k
{
664
63.1k
  my_fdct_ptr fdct;
665
63.1k
  int i;
666
667
63.1k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
668
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
669
670
63.1k
  fdct = (my_fdct_ptr)
671
63.1k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
672
63.1k
                                sizeof(my_fdct_controller));
673
63.1k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
674
63.1k
  fdct->pub.start_pass = start_pass_fdctmgr;
675
676
  /* First determine the DCT... */
677
63.1k
  switch (cinfo->dct_method) {
678
0
#ifdef DCT_ISLOW_SUPPORTED
679
53.6k
  case JDCT_ISLOW:
680
53.6k
    fdct->pub._forward_DCT = forward_DCT;
681
#ifdef WITH_SIMD
682
19.9k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
683
0
#endif
684
33.7k
      fdct->dct = _jpeg_fdct_islow;
685
53.6k
    break;
686
0
#endif
687
0
#ifdef DCT_IFAST_SUPPORTED
688
9.50k
  case JDCT_IFAST:
689
9.50k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
3.74k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
692
0
#endif
693
5.76k
      fdct->dct = _jpeg_fdct_ifast;
694
9.50k
    break;
695
0
#endif
696
0
#ifdef DCT_FLOAT_SUPPORTED
697
0
  case JDCT_FLOAT:
698
0
    fdct->pub._forward_DCT = forward_DCT_float;
699
#ifdef WITH_SIMD
700
0
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
701
0
#endif
702
0
      fdct->float_dct = jpeg_fdct_float;
703
0
    break;
704
0
#endif
705
0
  default:
706
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
707
0
    break;
708
63.1k
  }
709
710
  /* ...then the supporting stages. */
711
63.1k
  switch (cinfo->dct_method) {
712
0
#ifdef DCT_ISLOW_SUPPORTED
713
53.6k
  case JDCT_ISLOW:
714
53.6k
#endif
715
53.6k
#ifdef DCT_IFAST_SUPPORTED
716
63.1k
  case JDCT_IFAST:
717
63.1k
#endif
718
63.1k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
719
#ifdef WITH_SIMD
720
23.6k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
721
0
#endif
722
0
      fdct->convsamp = convsamp;
723
#ifdef WITH_SIMD
724
23.6k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
725
0
#endif
726
0
      fdct->quantize = quantize;
727
63.1k
    break;
728
0
#endif
729
0
#ifdef DCT_FLOAT_SUPPORTED
730
0
  case JDCT_FLOAT:
731
#ifdef WITH_SIMD
732
0
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
733
0
#endif
734
0
      fdct->float_convsamp = convsamp_float;
735
#ifdef WITH_SIMD
736
0
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
737
0
#endif
738
0
      fdct->float_quantize = quantize_float;
739
0
    break;
740
0
#endif
741
0
  default:
742
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
743
0
    break;
744
63.1k
  }
745
746
  /* Allocate workspace memory */
747
63.1k
#ifdef DCT_FLOAT_SUPPORTED
748
63.1k
  if (cinfo->dct_method == JDCT_FLOAT)
749
0
    fdct->float_workspace = (FAST_FLOAT *)
750
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
751
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
752
63.1k
  else
753
63.1k
#endif
754
63.1k
    fdct->workspace = (DCTELEM *)
755
63.1k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
756
63.1k
                                  sizeof(DCTELEM) * DCTSIZE2);
757
758
  /* Mark divisor tables unallocated */
759
315k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
760
252k
    fdct->divisors[i] = NULL;
761
252k
#ifdef DCT_FLOAT_SUPPORTED
762
    fdct->float_divisors[i] = NULL;
763
252k
#endif
764
252k
  }
765
63.1k
}
jinit_forward_dct
Line
Count
Source
663
23.6k
{
664
23.6k
  my_fdct_ptr fdct;
665
23.6k
  int i;
666
667
23.6k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
668
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
669
670
23.6k
  fdct = (my_fdct_ptr)
671
23.6k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
672
23.6k
                                sizeof(my_fdct_controller));
673
23.6k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
674
23.6k
  fdct->pub.start_pass = start_pass_fdctmgr;
675
676
  /* First determine the DCT... */
677
23.6k
  switch (cinfo->dct_method) {
678
0
#ifdef DCT_ISLOW_SUPPORTED
679
19.9k
  case JDCT_ISLOW:
680
19.9k
    fdct->pub._forward_DCT = forward_DCT;
681
19.9k
#ifdef WITH_SIMD
682
19.9k
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
683
0
#endif
684
0
      fdct->dct = _jpeg_fdct_islow;
685
19.9k
    break;
686
0
#endif
687
0
#ifdef DCT_IFAST_SUPPORTED
688
3.74k
  case JDCT_IFAST:
689
3.74k
    fdct->pub._forward_DCT = forward_DCT;
690
3.74k
#ifdef WITH_SIMD
691
3.74k
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
692
0
#endif
693
0
      fdct->dct = _jpeg_fdct_ifast;
694
3.74k
    break;
695
0
#endif
696
0
#ifdef DCT_FLOAT_SUPPORTED
697
0
  case JDCT_FLOAT:
698
0
    fdct->pub._forward_DCT = forward_DCT_float;
699
0
#ifdef WITH_SIMD
700
0
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
701
0
#endif
702
0
      fdct->float_dct = jpeg_fdct_float;
703
0
    break;
704
0
#endif
705
0
  default:
706
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
707
0
    break;
708
23.6k
  }
709
710
  /* ...then the supporting stages. */
711
23.6k
  switch (cinfo->dct_method) {
712
0
#ifdef DCT_ISLOW_SUPPORTED
713
19.9k
  case JDCT_ISLOW:
714
19.9k
#endif
715
19.9k
#ifdef DCT_IFAST_SUPPORTED
716
23.6k
  case JDCT_IFAST:
717
23.6k
#endif
718
23.6k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
719
23.6k
#ifdef WITH_SIMD
720
23.6k
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
721
0
#endif
722
0
      fdct->convsamp = convsamp;
723
23.6k
#ifdef WITH_SIMD
724
23.6k
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
725
0
#endif
726
0
      fdct->quantize = quantize;
727
23.6k
    break;
728
0
#endif
729
0
#ifdef DCT_FLOAT_SUPPORTED
730
0
  case JDCT_FLOAT:
731
0
#ifdef WITH_SIMD
732
0
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
733
0
#endif
734
0
      fdct->float_convsamp = convsamp_float;
735
0
#ifdef WITH_SIMD
736
0
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
737
0
#endif
738
0
      fdct->float_quantize = quantize_float;
739
0
    break;
740
0
#endif
741
0
  default:
742
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
743
0
    break;
744
23.6k
  }
745
746
  /* Allocate workspace memory */
747
23.6k
#ifdef DCT_FLOAT_SUPPORTED
748
23.6k
  if (cinfo->dct_method == JDCT_FLOAT)
749
0
    fdct->float_workspace = (FAST_FLOAT *)
750
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
751
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
752
23.6k
  else
753
23.6k
#endif
754
23.6k
    fdct->workspace = (DCTELEM *)
755
23.6k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
756
23.6k
                                  sizeof(DCTELEM) * DCTSIZE2);
757
758
  /* Mark divisor tables unallocated */
759
118k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
760
94.7k
    fdct->divisors[i] = NULL;
761
94.7k
#ifdef DCT_FLOAT_SUPPORTED
762
    fdct->float_divisors[i] = NULL;
763
94.7k
#endif
764
94.7k
  }
765
23.6k
}
j12init_forward_dct
Line
Count
Source
663
39.4k
{
664
39.4k
  my_fdct_ptr fdct;
665
39.4k
  int i;
666
667
39.4k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
668
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
669
670
39.4k
  fdct = (my_fdct_ptr)
671
39.4k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
672
39.4k
                                sizeof(my_fdct_controller));
673
39.4k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
674
39.4k
  fdct->pub.start_pass = start_pass_fdctmgr;
675
676
  /* First determine the DCT... */
677
39.4k
  switch (cinfo->dct_method) {
678
0
#ifdef DCT_ISLOW_SUPPORTED
679
33.7k
  case JDCT_ISLOW:
680
33.7k
    fdct->pub._forward_DCT = forward_DCT;
681
#ifdef WITH_SIMD
682
    if (!jsimd_set_fdct_islow(cinfo, &fdct->dct))
683
#endif
684
33.7k
      fdct->dct = _jpeg_fdct_islow;
685
33.7k
    break;
686
0
#endif
687
0
#ifdef DCT_IFAST_SUPPORTED
688
5.76k
  case JDCT_IFAST:
689
5.76k
    fdct->pub._forward_DCT = forward_DCT;
690
#ifdef WITH_SIMD
691
    if (!jsimd_set_fdct_ifast(cinfo, &fdct->dct))
692
#endif
693
5.76k
      fdct->dct = _jpeg_fdct_ifast;
694
5.76k
    break;
695
0
#endif
696
0
#ifdef DCT_FLOAT_SUPPORTED
697
0
  case JDCT_FLOAT:
698
0
    fdct->pub._forward_DCT = forward_DCT_float;
699
#ifdef WITH_SIMD
700
    if (!jsimd_set_fdct_float(cinfo, &fdct->float_dct))
701
#endif
702
0
      fdct->float_dct = jpeg_fdct_float;
703
0
    break;
704
0
#endif
705
0
  default:
706
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
707
0
    break;
708
39.4k
  }
709
710
  /* ...then the supporting stages. */
711
39.4k
  switch (cinfo->dct_method) {
712
0
#ifdef DCT_ISLOW_SUPPORTED
713
33.7k
  case JDCT_ISLOW:
714
33.7k
#endif
715
33.7k
#ifdef DCT_IFAST_SUPPORTED
716
39.4k
  case JDCT_IFAST:
717
39.4k
#endif
718
39.4k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
719
#ifdef WITH_SIMD
720
    if (!jsimd_set_convsamp(cinfo, &fdct->convsamp))
721
#endif
722
39.4k
      fdct->convsamp = convsamp;
723
#ifdef WITH_SIMD
724
    if (!jsimd_set_quantize(cinfo, &fdct->quantize))
725
#endif
726
39.4k
      fdct->quantize = quantize;
727
39.4k
    break;
728
0
#endif
729
0
#ifdef DCT_FLOAT_SUPPORTED
730
0
  case JDCT_FLOAT:
731
#ifdef WITH_SIMD
732
    if (!jsimd_set_convsamp_float(cinfo, &fdct->float_convsamp))
733
#endif
734
0
      fdct->float_convsamp = convsamp_float;
735
#ifdef WITH_SIMD
736
    if (!jsimd_set_quantize_float(cinfo, &fdct->float_quantize))
737
#endif
738
0
      fdct->float_quantize = quantize_float;
739
0
    break;
740
0
#endif
741
0
  default:
742
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
743
0
    break;
744
39.4k
  }
745
746
  /* Allocate workspace memory */
747
39.4k
#ifdef DCT_FLOAT_SUPPORTED
748
39.4k
  if (cinfo->dct_method == JDCT_FLOAT)
749
0
    fdct->float_workspace = (FAST_FLOAT *)
750
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
751
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
752
39.4k
  else
753
39.4k
#endif
754
39.4k
    fdct->workspace = (DCTELEM *)
755
39.4k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
756
39.4k
                                  sizeof(DCTELEM) * DCTSIZE2);
757
758
  /* Mark divisor tables unallocated */
759
197k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
760
157k
    fdct->divisors[i] = NULL;
761
157k
#ifdef DCT_FLOAT_SUPPORTED
762
    fdct->float_divisors[i] = NULL;
763
157k
#endif
764
157k
  }
765
39.4k
}