Coverage Report

Created: 2026-02-26 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.3.0.x/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024, 2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#include "jsimddct.h"
24
25
26
/* Private subobject for this module */
27
28
typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
29
typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
30
31
typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
32
                                     JDIMENSION start_col,
33
                                     DCTELEM *workspace);
34
typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
35
                                           JDIMENSION start_col,
36
                                           FAST_FLOAT *workspace);
37
38
typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
39
                                     DCTELEM *workspace);
40
typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
41
                                           FAST_FLOAT *divisors,
42
                                           FAST_FLOAT *workspace);
43
44
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
45
46
typedef struct {
47
  struct jpeg_forward_dct pub;  /* public fields */
48
49
  /* Pointer to the DCT routine actually in use */
50
  forward_DCT_method_ptr dct;
51
  convsamp_method_ptr convsamp;
52
  quantize_method_ptr quantize;
53
54
  /* The actual post-DCT divisors --- not identical to the quant table
55
   * entries, because of scaling (especially for an unnormalized DCT).
56
   * Each table is given in normal array order.
57
   */
58
  DCTELEM *divisors[NUM_QUANT_TBLS];
59
60
  /* work area for FDCT subroutine */
61
  DCTELEM *workspace;
62
63
#ifdef DCT_FLOAT_SUPPORTED
64
  /* Same as above for the floating-point case. */
65
  float_DCT_method_ptr float_dct;
66
  float_convsamp_method_ptr float_convsamp;
67
  float_quantize_method_ptr float_quantize;
68
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
69
  FAST_FLOAT *float_workspace;
70
#endif
71
} my_fdct_controller;
72
73
typedef my_fdct_controller *my_fdct_ptr;
74
75
76
#if BITS_IN_JSAMPLE == 8
77
78
/*
79
 * Find the highest bit in an integer through binary search.
80
 */
81
82
LOCAL(int)
83
flss(UINT16 val)
84
0
{
85
0
  int bit;
86
87
0
  bit = 16;
88
89
0
  if (!val)
90
0
    return 0;
91
92
0
  if (!(val & 0xff00)) {
93
0
    bit -= 8;
94
0
    val <<= 8;
95
0
  }
96
0
  if (!(val & 0xf000)) {
97
0
    bit -= 4;
98
0
    val <<= 4;
99
0
  }
100
0
  if (!(val & 0xc000)) {
101
0
    bit -= 2;
102
0
    val <<= 2;
103
0
  }
104
0
  if (!(val & 0x8000)) {
105
0
    bit -= 1;
106
0
    val <<= 1;
107
0
  }
108
109
0
  return bit;
110
0
}
111
112
113
/*
114
 * Compute values to do a division using reciprocal.
115
 *
116
 * This implementation is based on an algorithm described in
117
 *   "Optimizing subroutines in assembly language:
118
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
119
 * More information about the basic algorithm can be found in
120
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
121
 *
122
 * The basic idea is to replace x/d by x * d^-1. In order to store
123
 * d^-1 with enough precision we shift it left a few places. It turns
124
 * out that this algoright gives just enough precision, and also fits
125
 * into DCTELEM:
126
 *
127
 *   b = (the number of significant bits in divisor) - 1
128
 *   r = (word size) + b
129
 *   f = 2^r / divisor
130
 *
131
 * f will not be an integer for most cases, so we need to compensate
132
 * for the rounding error introduced:
133
 *
134
 *   no fractional part:
135
 *
136
 *       result = input >> r
137
 *
138
 *   fractional part of f < 0.5:
139
 *
140
 *       round f down to nearest integer
141
 *       result = ((input + 1) * f) >> r
142
 *
143
 *   fractional part of f > 0.5:
144
 *
145
 *       round f up to nearest integer
146
 *       result = (input * f) >> r
147
 *
148
 * This is the original algorithm that gives truncated results. But we
149
 * want properly rounded results, so we replace "input" with
150
 * "input + divisor/2".
151
 *
152
 * In order to allow SIMD implementations we also tweak the values to
153
 * allow the same calculation to be made at all times:
154
 *
155
 *   dctbl[0] = f rounded to nearest integer
156
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
157
 *   dctbl[2] = 1 << ((word size) * 2 - r)
158
 *   dctbl[3] = r - (word size)
159
 *
160
 * dctbl[2] is for stupid instruction sets where the shift operation
161
 * isn't member wise (e.g. MMX).
162
 *
163
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
164
 * is that most SIMD implementations have a "multiply and store top
165
 * half" operation.
166
 *
167
 * Lastly, we store each of the values in their own table instead
168
 * of in a consecutive manner, yet again in order to allow SIMD
169
 * routines.
170
 */
171
172
LOCAL(int)
173
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
174
0
{
175
0
  UDCTELEM2 fq, fr;
176
0
  UDCTELEM c;
177
0
  int b, r;
178
179
0
  if (divisor <= 1) {
180
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
181
     * values will cause the C quantization algorithm to act like the
182
     * identity function.  Since only the C quantization algorithm is used in
183
     * these cases, the scale value is irrelevant.
184
     *
185
     * divisor == 0 can never happen in a normal program, because
186
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
187
     * abuse the API by manually modifying the exposed quantization table just
188
     * before calling jpeg_start_compress().  Thus, we effectively clamp
189
     * values < 1 here as well, to avoid dividing by 0.
190
     */
191
0
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
192
0
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
193
0
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
194
0
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
195
0
    return 0;
196
0
  }
197
198
0
  b = flss(divisor) - 1;
199
0
  r  = sizeof(DCTELEM) * 8 + b;
200
201
0
  fq = ((UDCTELEM2)1 << r) / divisor;
202
0
  fr = ((UDCTELEM2)1 << r) % divisor;
203
204
0
  c = divisor / 2;                      /* for rounding */
205
206
0
  if (fr == 0) {                        /* divisor is power of two */
207
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
208
0
    fq >>= 1;
209
0
    r--;
210
0
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
211
0
    c++;
212
0
  } else {                              /* fractional part is > 0.5 */
213
0
    fq++;
214
0
  }
215
216
0
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
217
0
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
218
0
#ifdef WITH_SIMD
219
0
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
220
#else
221
  dtbl[DCTSIZE2 * 2] = 1;
222
#endif
223
0
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
224
225
0
  if (r <= 16) return 0;
226
0
  else return 1;
227
0
}
228
229
#endif
230
231
232
/*
233
 * Initialize for a processing pass.
234
 * Verify that all referenced Q-tables are present, and set up
235
 * the divisor table for each one.
236
 * In the current implementation, DCT of all components is done during
237
 * the first pass, even if only some components will be output in the
238
 * first scan.  Hence all components should be examined here.
239
 */
240
241
METHODDEF(void)
242
start_pass_fdctmgr(j_compress_ptr cinfo)
243
13.8k
{
244
13.8k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
245
13.8k
  int ci, qtblno, i;
246
13.8k
  jpeg_component_info *compptr;
247
13.8k
  JQUANT_TBL *qtbl;
248
13.8k
  DCTELEM *dtbl;
249
250
50.1k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
251
36.2k
       ci++, compptr++) {
252
36.2k
    qtblno = compptr->quant_tbl_no;
253
    /* Make sure specified quantization table is present */
254
36.2k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
255
36.2k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
256
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
257
36.2k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
258
    /* Compute divisors for this quant table */
259
    /* We may do this more than once for same table, but it's not a big deal */
260
36.2k
    switch (cinfo->dct_method) {
261
0
#ifdef DCT_ISLOW_SUPPORTED
262
30.1k
    case JDCT_ISLOW:
263
      /* For LL&M IDCT method, divisors are equal to raw quantization
264
       * coefficients multiplied by 8 (to counteract scaling).
265
       */
266
30.1k
      if (fdct->divisors[qtblno] == NULL) {
267
19.9k
        fdct->divisors[qtblno] = (DCTELEM *)
268
19.9k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
269
19.9k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
270
19.9k
      }
271
30.1k
      dtbl = fdct->divisors[qtblno];
272
1.95M
      for (i = 0; i < DCTSIZE2; i++) {
273
#if BITS_IN_JSAMPLE == 8
274
#ifdef WITH_SIMD
275
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
276
            fdct->quantize == jsimd_quantize)
277
          fdct->quantize = quantize;
278
#else
279
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
280
#endif
281
#else
282
1.92M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
283
1.92M
#endif
284
1.92M
      }
285
30.1k
      break;
286
0
#endif
287
0
#ifdef DCT_IFAST_SUPPORTED
288
6.09k
    case JDCT_IFAST:
289
6.09k
      {
290
        /* For AA&N IDCT method, divisors are equal to quantization
291
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
292
         *   scalefactor[0] = 1
293
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294
         * We apply a further scale factor of 8.
295
         */
296
6.09k
#define CONST_BITS  14
297
6.09k
        static const INT16 aanscales[DCTSIZE2] = {
298
          /* precomputed values scaled up by 14 bits */
299
6.09k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
300
6.09k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
301
6.09k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
302
6.09k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
303
6.09k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
304
6.09k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
305
6.09k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
306
6.09k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
307
6.09k
        };
308
6.09k
        SHIFT_TEMPS
309
310
6.09k
        if (fdct->divisors[qtblno] == NULL) {
311
4.06k
          fdct->divisors[qtblno] = (DCTELEM *)
312
4.06k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
313
4.06k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
314
4.06k
        }
315
6.09k
        dtbl = fdct->divisors[qtblno];
316
396k
        for (i = 0; i < DCTSIZE2; i++) {
317
#if BITS_IN_JSAMPLE == 8
318
#ifdef WITH_SIMD
319
          if (!compute_reciprocal(
320
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
321
                                      (JLONG)aanscales[i]),
322
                        CONST_BITS - 3), &dtbl[i]) &&
323
              fdct->quantize == jsimd_quantize)
324
            fdct->quantize = quantize;
325
#else
326
          compute_reciprocal(
327
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
328
                                  (JLONG)aanscales[i]),
329
                    CONST_BITS-3), &dtbl[i]);
330
#endif
331
#else
332
389k
          dtbl[i] = (DCTELEM)
333
389k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
334
389k
                                  (JLONG)aanscales[i]),
335
389k
                    CONST_BITS - 3);
336
389k
#endif
337
389k
        }
338
6.09k
      }
339
6.09k
      break;
340
0
#endif
341
0
#ifdef DCT_FLOAT_SUPPORTED
342
0
    case JDCT_FLOAT:
343
0
      {
344
        /* For float AA&N IDCT method, divisors are equal to quantization
345
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
346
         *   scalefactor[0] = 1
347
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
348
         * We apply a further scale factor of 8.
349
         * What's actually stored is 1/divisor so that the inner loop can
350
         * use a multiplication rather than a division.
351
         */
352
0
        FAST_FLOAT *fdtbl;
353
0
        int row, col;
354
0
        static const double aanscalefactor[DCTSIZE] = {
355
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
356
0
          1.0, 0.785694958, 0.541196100, 0.275899379
357
0
        };
358
359
0
        if (fdct->float_divisors[qtblno] == NULL) {
360
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
361
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
362
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
363
0
        }
364
0
        fdtbl = fdct->float_divisors[qtblno];
365
0
        i = 0;
366
0
        for (row = 0; row < DCTSIZE; row++) {
367
0
          for (col = 0; col < DCTSIZE; col++) {
368
0
            fdtbl[i] = (FAST_FLOAT)
369
0
              (1.0 / (((double)qtbl->quantval[i] *
370
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
371
0
            i++;
372
0
          }
373
0
        }
374
0
      }
375
0
      break;
376
0
#endif
377
0
    default:
378
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
379
0
      break;
380
36.2k
    }
381
36.2k
  }
382
13.8k
}
383
384
385
/*
386
 * Load data into workspace, applying unsigned->signed conversion.
387
 */
388
389
METHODDEF(void)
390
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
391
23.0M
{
392
23.0M
  register DCTELEM *workspaceptr;
393
23.0M
  register _JSAMPROW elemptr;
394
23.0M
  register int elemr;
395
396
23.0M
  workspaceptr = workspace;
397
207M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
398
184M
    elemptr = sample_data[elemr] + start_col;
399
400
184M
#if DCTSIZE == 8                /* unroll the inner loop */
401
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
402
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
403
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
404
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
405
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
407
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
408
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
409
#else
410
    {
411
      register int elemc;
412
      for (elemc = DCTSIZE; elemc > 0; elemc--)
413
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
414
    }
415
#endif
416
184M
  }
417
23.0M
}
418
419
420
/*
421
 * Quantize/descale the coefficients, and store into coef_blocks[].
422
 */
423
424
METHODDEF(void)
425
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
426
23.0M
{
427
23.0M
  int i;
428
23.0M
  DCTELEM temp;
429
23.0M
  JCOEFPTR output_ptr = coef_block;
430
431
#if BITS_IN_JSAMPLE == 8
432
433
  UDCTELEM recip, corr;
434
  int shift;
435
  UDCTELEM2 product;
436
437
  for (i = 0; i < DCTSIZE2; i++) {
438
    temp = workspace[i];
439
    recip = divisors[i + DCTSIZE2 * 0];
440
    corr =  divisors[i + DCTSIZE2 * 1];
441
    shift = divisors[i + DCTSIZE2 * 3];
442
443
    if (temp < 0) {
444
      temp = -temp;
445
      product = (UDCTELEM2)(temp + corr) * recip;
446
      product >>= shift + sizeof(DCTELEM) * 8;
447
      temp = (DCTELEM)product;
448
      temp = -temp;
449
    } else {
450
      product = (UDCTELEM2)(temp + corr) * recip;
451
      product >>= shift + sizeof(DCTELEM) * 8;
452
      temp = (DCTELEM)product;
453
    }
454
    output_ptr[i] = (JCOEF)temp;
455
  }
456
457
#else
458
459
23.0M
  register DCTELEM qval;
460
461
1.49G
  for (i = 0; i < DCTSIZE2; i++) {
462
1.47G
    qval = divisors[i];
463
1.47G
    temp = workspace[i];
464
    /* Divide the coefficient value by qval, ensuring proper rounding.
465
     * Since C does not specify the direction of rounding for negative
466
     * quotients, we have to force the dividend positive for portability.
467
     *
468
     * In most files, at least half of the output values will be zero
469
     * (at default quantization settings, more like three-quarters...)
470
     * so we should ensure that this case is fast.  On many machines,
471
     * a comparison is enough cheaper than a divide to make a special test
472
     * a win.  Since both inputs will be nonnegative, we need only test
473
     * for a < b to discover whether a/b is 0.
474
     * If your machine's division is fast enough, define FAST_DIVIDE.
475
     */
476
#ifdef FAST_DIVIDE
477
#define DIVIDE_BY(a, b)  a /= b
478
#else
479
1.47G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
480
1.47G
#endif
481
1.47G
    if (temp < 0) {
482
74.3M
      temp = -temp;
483
74.3M
      temp += qval >> 1;        /* for rounding */
484
74.3M
      DIVIDE_BY(temp, qval);
485
74.3M
      temp = -temp;
486
1.39G
    } else {
487
1.39G
      temp += qval >> 1;        /* for rounding */
488
1.39G
      DIVIDE_BY(temp, qval);
489
1.39G
    }
490
1.47G
    output_ptr[i] = (JCOEF)temp;
491
1.47G
  }
492
493
23.0M
#endif
494
495
23.0M
}
496
497
498
/*
499
 * Perform forward DCT on one or more blocks of a component.
500
 *
501
 * The input samples are taken from the sample_data[] array starting at
502
 * position start_row/start_col, and moving to the right for any additional
503
 * blocks. The quantized coefficients are returned in coef_blocks[].
504
 */
505
506
METHODDEF(void)
507
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
508
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
509
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
510
/* This version is used for integer DCT implementations. */
511
16.4M
{
512
  /* This routine is heavily used, so it's worth coding it tightly. */
513
16.4M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
514
16.4M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
515
16.4M
  DCTELEM *workspace;
516
16.4M
  JDIMENSION bi;
517
518
  /* Make sure the compiler doesn't look up these every pass */
519
16.4M
  forward_DCT_method_ptr do_dct = fdct->dct;
520
16.4M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
521
16.4M
  quantize_method_ptr do_quantize = fdct->quantize;
522
16.4M
  workspace = fdct->workspace;
523
524
16.4M
  sample_data += start_row;     /* fold in the vertical offset once */
525
526
39.5M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
527
    /* Load data into workspace, applying unsigned->signed conversion */
528
23.0M
    (*do_convsamp) (sample_data, start_col, workspace);
529
530
    /* Perform the DCT */
531
23.0M
    (*do_dct) (workspace);
532
533
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
534
23.0M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
23.0M
  }
536
16.4M
}
537
538
539
#ifdef DCT_FLOAT_SUPPORTED
540
541
METHODDEF(void)
542
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
543
               FAST_FLOAT *workspace)
544
0
{
545
0
  register FAST_FLOAT *workspaceptr;
546
0
  register _JSAMPROW elemptr;
547
0
  register int elemr;
548
549
0
  workspaceptr = workspace;
550
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
551
0
    elemptr = sample_data[elemr] + start_col;
552
0
#if DCTSIZE == 8                /* unroll the inner loop */
553
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
554
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
555
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
556
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
557
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
558
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
559
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
560
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
561
#else
562
    {
563
      register int elemc;
564
      for (elemc = DCTSIZE; elemc > 0; elemc--)
565
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
566
    }
567
#endif
568
0
  }
569
0
}
570
571
572
METHODDEF(void)
573
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
574
               FAST_FLOAT *workspace)
575
0
{
576
0
  register FAST_FLOAT temp;
577
0
  register int i;
578
0
  register JCOEFPTR output_ptr = coef_block;
579
580
0
  for (i = 0; i < DCTSIZE2; i++) {
581
    /* Apply the quantization and scaling factor */
582
0
    temp = workspace[i] * divisors[i];
583
584
    /* Round to nearest integer.
585
     * Since C does not specify the direction of rounding for negative
586
     * quotients, we have to force the dividend positive for portability.
587
     * The maximum coefficient size is +-16K (for 12-bit data), so this
588
     * code should work for either 16-bit or 32-bit ints.
589
     */
590
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
591
0
  }
592
0
}
593
594
595
METHODDEF(void)
596
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
597
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
598
                  JDIMENSION start_row, JDIMENSION start_col,
599
                  JDIMENSION num_blocks)
600
/* This version is used for floating-point DCT implementations. */
601
0
{
602
  /* This routine is heavily used, so it's worth coding it tightly. */
603
0
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
604
0
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
605
0
  FAST_FLOAT *workspace;
606
0
  JDIMENSION bi;
607
608
609
  /* Make sure the compiler doesn't look up these every pass */
610
0
  float_DCT_method_ptr do_dct = fdct->float_dct;
611
0
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
612
0
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
613
0
  workspace = fdct->float_workspace;
614
615
0
  sample_data += start_row;     /* fold in the vertical offset once */
616
617
0
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
618
    /* Load data into workspace, applying unsigned->signed conversion */
619
0
    (*do_convsamp) (sample_data, start_col, workspace);
620
621
    /* Perform the DCT */
622
0
    (*do_dct) (workspace);
623
624
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
625
0
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
626
0
  }
627
0
}
628
629
#endif /* DCT_FLOAT_SUPPORTED */
630
631
632
/*
633
 * Initialize FDCT manager.
634
 */
635
636
GLOBAL(void)
637
_jinit_forward_dct(j_compress_ptr cinfo)
638
13.8k
{
639
13.8k
  my_fdct_ptr fdct;
640
13.8k
  int i;
641
642
13.8k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
13.8k
  fdct = (my_fdct_ptr)
646
13.8k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
13.8k
                                sizeof(my_fdct_controller));
648
13.8k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
13.8k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
13.8k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
11.8k
  case JDCT_ISLOW:
655
11.8k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
0
    if (jsimd_can_fdct_islow())
658
0
      fdct->dct = jsimd_fdct_islow;
659
0
    else
660
0
#endif
661
11.8k
      fdct->dct = _jpeg_fdct_islow;
662
11.8k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
2.03k
  case JDCT_IFAST:
666
2.03k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
0
    if (jsimd_can_fdct_ifast())
669
0
      fdct->dct = jsimd_fdct_ifast;
670
0
    else
671
0
#endif
672
2.03k
      fdct->dct = _jpeg_fdct_ifast;
673
2.03k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
0
  case JDCT_FLOAT:
677
0
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
0
    if (jsimd_can_fdct_float())
680
0
      fdct->float_dct = jsimd_fdct_float;
681
0
    else
682
0
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
0
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
13.8k
  }
690
691
  /* ...then the supporting stages. */
692
13.8k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
11.8k
  case JDCT_ISLOW:
695
11.8k
#endif
696
11.8k
#ifdef DCT_IFAST_SUPPORTED
697
13.8k
  case JDCT_IFAST:
698
13.8k
#endif
699
13.8k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
0
    if (jsimd_can_convsamp())
702
0
      fdct->convsamp = jsimd_convsamp;
703
0
    else
704
0
#endif
705
0
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
0
    if (jsimd_can_quantize())
708
0
      fdct->quantize = jsimd_quantize;
709
0
    else
710
0
#endif
711
0
      fdct->quantize = quantize;
712
13.8k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
0
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
0
    if (jsimd_can_convsamp_float())
718
0
      fdct->float_convsamp = jsimd_convsamp_float;
719
0
    else
720
0
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
0
    if (jsimd_can_quantize_float())
724
0
      fdct->float_quantize = jsimd_quantize_float;
725
0
    else
726
0
#endif
727
0
      fdct->float_quantize = quantize_float;
728
0
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
13.8k
  }
734
735
  /* Allocate workspace memory */
736
13.8k
#ifdef DCT_FLOAT_SUPPORTED
737
13.8k
  if (cinfo->dct_method == JDCT_FLOAT)
738
0
    fdct->float_workspace = (FAST_FLOAT *)
739
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
13.8k
  else
742
13.8k
#endif
743
13.8k
    fdct->workspace = (DCTELEM *)
744
13.8k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
13.8k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
69.4k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
55.5k
    fdct->divisors[i] = NULL;
750
55.5k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
55.5k
#endif
753
55.5k
  }
754
13.8k
}
j12init_forward_dct
Line
Count
Source
638
13.8k
{
639
13.8k
  my_fdct_ptr fdct;
640
13.8k
  int i;
641
642
13.8k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
13.8k
  fdct = (my_fdct_ptr)
646
13.8k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
13.8k
                                sizeof(my_fdct_controller));
648
13.8k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
13.8k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
13.8k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
11.8k
  case JDCT_ISLOW:
655
11.8k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
    if (jsimd_can_fdct_islow())
658
      fdct->dct = jsimd_fdct_islow;
659
    else
660
#endif
661
11.8k
      fdct->dct = _jpeg_fdct_islow;
662
11.8k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
2.03k
  case JDCT_IFAST:
666
2.03k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
    if (jsimd_can_fdct_ifast())
669
      fdct->dct = jsimd_fdct_ifast;
670
    else
671
#endif
672
2.03k
      fdct->dct = _jpeg_fdct_ifast;
673
2.03k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
0
  case JDCT_FLOAT:
677
0
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
    if (jsimd_can_fdct_float())
680
      fdct->float_dct = jsimd_fdct_float;
681
    else
682
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
0
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
13.8k
  }
690
691
  /* ...then the supporting stages. */
692
13.8k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
11.8k
  case JDCT_ISLOW:
695
11.8k
#endif
696
11.8k
#ifdef DCT_IFAST_SUPPORTED
697
13.8k
  case JDCT_IFAST:
698
13.8k
#endif
699
13.8k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
    if (jsimd_can_convsamp())
702
      fdct->convsamp = jsimd_convsamp;
703
    else
704
#endif
705
13.8k
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
    if (jsimd_can_quantize())
708
      fdct->quantize = jsimd_quantize;
709
    else
710
#endif
711
13.8k
      fdct->quantize = quantize;
712
13.8k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
0
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
    if (jsimd_can_convsamp_float())
718
      fdct->float_convsamp = jsimd_convsamp_float;
719
    else
720
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
    if (jsimd_can_quantize_float())
724
      fdct->float_quantize = jsimd_quantize_float;
725
    else
726
#endif
727
0
      fdct->float_quantize = quantize_float;
728
0
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
13.8k
  }
734
735
  /* Allocate workspace memory */
736
13.8k
#ifdef DCT_FLOAT_SUPPORTED
737
13.8k
  if (cinfo->dct_method == JDCT_FLOAT)
738
0
    fdct->float_workspace = (FAST_FLOAT *)
739
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
13.8k
  else
742
13.8k
#endif
743
13.8k
    fdct->workspace = (DCTELEM *)
744
13.8k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
13.8k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
69.4k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
55.5k
    fdct->divisors[i] = NULL;
750
55.5k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
55.5k
#endif
753
55.5k
  }
754
13.8k
}
Unexecuted instantiation: jinit_forward_dct