Coverage Report

Created: 2026-02-26 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.3.0.x/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024, 2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#include "jsimddct.h"
24
25
26
/* Private subobject for this module */
27
28
typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
29
typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
30
31
typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
32
                                     JDIMENSION start_col,
33
                                     DCTELEM *workspace);
34
typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
35
                                           JDIMENSION start_col,
36
                                           FAST_FLOAT *workspace);
37
38
typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
39
                                     DCTELEM *workspace);
40
typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
41
                                           FAST_FLOAT *divisors,
42
                                           FAST_FLOAT *workspace);
43
44
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
45
46
typedef struct {
47
  struct jpeg_forward_dct pub;  /* public fields */
48
49
  /* Pointer to the DCT routine actually in use */
50
  forward_DCT_method_ptr dct;
51
  convsamp_method_ptr convsamp;
52
  quantize_method_ptr quantize;
53
54
  /* The actual post-DCT divisors --- not identical to the quant table
55
   * entries, because of scaling (especially for an unnormalized DCT).
56
   * Each table is given in normal array order.
57
   */
58
  DCTELEM *divisors[NUM_QUANT_TBLS];
59
60
  /* work area for FDCT subroutine */
61
  DCTELEM *workspace;
62
63
#ifdef DCT_FLOAT_SUPPORTED
64
  /* Same as above for the floating-point case. */
65
  float_DCT_method_ptr float_dct;
66
  float_convsamp_method_ptr float_convsamp;
67
  float_quantize_method_ptr float_quantize;
68
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
69
  FAST_FLOAT *float_workspace;
70
#endif
71
} my_fdct_controller;
72
73
typedef my_fdct_controller *my_fdct_ptr;
74
75
76
#if BITS_IN_JSAMPLE == 8
77
78
/*
79
 * Find the highest bit in an integer through binary search.
80
 */
81
82
LOCAL(int)
83
flss(UINT16 val)
84
4.01M
{
85
4.01M
  int bit;
86
87
4.01M
  bit = 16;
88
89
4.01M
  if (!val)
90
0
    return 0;
91
92
4.01M
  if (!(val & 0xff00)) {
93
2.24M
    bit -= 8;
94
2.24M
    val <<= 8;
95
2.24M
  }
96
4.01M
  if (!(val & 0xf000)) {
97
2.53M
    bit -= 4;
98
2.53M
    val <<= 4;
99
2.53M
  }
100
4.01M
  if (!(val & 0xc000)) {
101
2.14M
    bit -= 2;
102
2.14M
    val <<= 2;
103
2.14M
  }
104
4.01M
  if (!(val & 0x8000)) {
105
1.87M
    bit -= 1;
106
1.87M
    val <<= 1;
107
1.87M
  }
108
109
4.01M
  return bit;
110
4.01M
}
111
112
113
/*
114
 * Compute values to do a division using reciprocal.
115
 *
116
 * This implementation is based on an algorithm described in
117
 *   "Optimizing subroutines in assembly language:
118
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
119
 * More information about the basic algorithm can be found in
120
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
121
 *
122
 * The basic idea is to replace x/d by x * d^-1. In order to store
123
 * d^-1 with enough precision we shift it left a few places. It turns
124
 * out that this algoright gives just enough precision, and also fits
125
 * into DCTELEM:
126
 *
127
 *   b = (the number of significant bits in divisor) - 1
128
 *   r = (word size) + b
129
 *   f = 2^r / divisor
130
 *
131
 * f will not be an integer for most cases, so we need to compensate
132
 * for the rounding error introduced:
133
 *
134
 *   no fractional part:
135
 *
136
 *       result = input >> r
137
 *
138
 *   fractional part of f < 0.5:
139
 *
140
 *       round f down to nearest integer
141
 *       result = ((input + 1) * f) >> r
142
 *
143
 *   fractional part of f > 0.5:
144
 *
145
 *       round f up to nearest integer
146
 *       result = (input * f) >> r
147
 *
148
 * This is the original algorithm that gives truncated results. But we
149
 * want properly rounded results, so we replace "input" with
150
 * "input + divisor/2".
151
 *
152
 * In order to allow SIMD implementations we also tweak the values to
153
 * allow the same calculation to be made at all times:
154
 *
155
 *   dctbl[0] = f rounded to nearest integer
156
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
157
 *   dctbl[2] = 1 << ((word size) * 2 - r)
158
 *   dctbl[3] = r - (word size)
159
 *
160
 * dctbl[2] is for stupid instruction sets where the shift operation
161
 * isn't member wise (e.g. MMX).
162
 *
163
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
164
 * is that most SIMD implementations have a "multiply and store top
165
 * half" operation.
166
 *
167
 * Lastly, we store each of the values in their own table instead
168
 * of in a consecutive manner, yet again in order to allow SIMD
169
 * routines.
170
 */
171
172
LOCAL(int)
173
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
174
4.01M
{
175
4.01M
  UDCTELEM2 fq, fr;
176
4.01M
  UDCTELEM c;
177
4.01M
  int b, r;
178
179
4.01M
  if (divisor <= 1) {
180
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
181
     * values will cause the C quantization algorithm to act like the
182
     * identity function.  Since only the C quantization algorithm is used in
183
     * these cases, the scale value is irrelevant.
184
     *
185
     * divisor == 0 can never happen in a normal program, because
186
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
187
     * abuse the API by manually modifying the exposed quantization table just
188
     * before calling jpeg_start_compress().  Thus, we effectively clamp
189
     * values < 1 here as well, to avoid dividing by 0.
190
     */
191
0
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
192
0
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
193
0
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
194
0
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
195
0
    return 0;
196
0
  }
197
198
4.01M
  b = flss(divisor) - 1;
199
4.01M
  r  = sizeof(DCTELEM) * 8 + b;
200
201
4.01M
  fq = ((UDCTELEM2)1 << r) / divisor;
202
4.01M
  fr = ((UDCTELEM2)1 << r) % divisor;
203
204
4.01M
  c = divisor / 2;                      /* for rounding */
205
206
4.01M
  if (fr == 0) {                        /* divisor is power of two */
207
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
208
882k
    fq >>= 1;
209
882k
    r--;
210
3.13M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
211
1.14M
    c++;
212
1.98M
  } else {                              /* fractional part is > 0.5 */
213
1.98M
    fq++;
214
1.98M
  }
215
216
4.01M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
217
4.01M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
218
4.01M
#ifdef WITH_SIMD
219
4.01M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
220
#else
221
  dtbl[DCTSIZE2 * 2] = 1;
222
#endif
223
4.01M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
224
225
4.01M
  if (r <= 16) return 0;
226
4.01M
  else return 1;
227
4.01M
}
228
229
#endif
230
231
232
/*
233
 * Initialize for a processing pass.
234
 * Verify that all referenced Q-tables are present, and set up
235
 * the divisor table for each one.
236
 * In the current implementation, DCT of all components is done during
237
 * the first pass, even if only some components will be output in the
238
 * first scan.  Hence all components should be examined here.
239
 */
240
241
METHODDEF(void)
242
start_pass_fdctmgr(j_compress_ptr cinfo)
243
13.8k
{
244
13.8k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
245
13.8k
  int ci, qtblno, i;
246
13.8k
  jpeg_component_info *compptr;
247
13.8k
  JQUANT_TBL *qtbl;
248
13.8k
  DCTELEM *dtbl;
249
250
50.1k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
251
36.2k
       ci++, compptr++) {
252
36.2k
    qtblno = compptr->quant_tbl_no;
253
    /* Make sure specified quantization table is present */
254
36.2k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
255
36.2k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
256
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
257
36.2k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
258
    /* Compute divisors for this quant table */
259
    /* We may do this more than once for same table, but it's not a big deal */
260
36.2k
    switch (cinfo->dct_method) {
261
0
#ifdef DCT_ISLOW_SUPPORTED
262
30.1k
    case JDCT_ISLOW:
263
      /* For LL&M IDCT method, divisors are equal to raw quantization
264
       * coefficients multiplied by 8 (to counteract scaling).
265
       */
266
30.1k
      if (fdct->divisors[qtblno] == NULL) {
267
19.9k
        fdct->divisors[qtblno] = (DCTELEM *)
268
19.9k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
269
19.9k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
270
19.9k
      }
271
30.1k
      dtbl = fdct->divisors[qtblno];
272
1.95M
      for (i = 0; i < DCTSIZE2; i++) {
273
#if BITS_IN_JSAMPLE == 8
274
#ifdef WITH_SIMD
275
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
276
            fdct->quantize == jsimd_quantize)
277
          fdct->quantize = quantize;
278
#else
279
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
280
#endif
281
#else
282
1.92M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
283
1.92M
#endif
284
1.92M
      }
285
30.1k
      break;
286
0
#endif
287
0
#ifdef DCT_IFAST_SUPPORTED
288
6.09k
    case JDCT_IFAST:
289
6.09k
      {
290
        /* For AA&N IDCT method, divisors are equal to quantization
291
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
292
         *   scalefactor[0] = 1
293
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294
         * We apply a further scale factor of 8.
295
         */
296
6.09k
#define CONST_BITS  14
297
6.09k
        static const INT16 aanscales[DCTSIZE2] = {
298
          /* precomputed values scaled up by 14 bits */
299
6.09k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
300
6.09k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
301
6.09k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
302
6.09k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
303
6.09k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
304
6.09k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
305
6.09k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
306
6.09k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
307
6.09k
        };
308
6.09k
        SHIFT_TEMPS
309
310
6.09k
        if (fdct->divisors[qtblno] == NULL) {
311
4.06k
          fdct->divisors[qtblno] = (DCTELEM *)
312
4.06k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
313
4.06k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
314
4.06k
        }
315
6.09k
        dtbl = fdct->divisors[qtblno];
316
396k
        for (i = 0; i < DCTSIZE2; i++) {
317
#if BITS_IN_JSAMPLE == 8
318
#ifdef WITH_SIMD
319
          if (!compute_reciprocal(
320
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
321
                                      (JLONG)aanscales[i]),
322
                        CONST_BITS - 3), &dtbl[i]) &&
323
              fdct->quantize == jsimd_quantize)
324
            fdct->quantize = quantize;
325
#else
326
          compute_reciprocal(
327
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
328
                                  (JLONG)aanscales[i]),
329
                    CONST_BITS-3), &dtbl[i]);
330
#endif
331
#else
332
389k
          dtbl[i] = (DCTELEM)
333
389k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
334
389k
                                  (JLONG)aanscales[i]),
335
389k
                    CONST_BITS - 3);
336
389k
#endif
337
389k
        }
338
6.09k
      }
339
6.09k
      break;
340
0
#endif
341
0
#ifdef DCT_FLOAT_SUPPORTED
342
0
    case JDCT_FLOAT:
343
0
      {
344
        /* For float AA&N IDCT method, divisors are equal to quantization
345
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
346
         *   scalefactor[0] = 1
347
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
348
         * We apply a further scale factor of 8.
349
         * What's actually stored is 1/divisor so that the inner loop can
350
         * use a multiplication rather than a division.
351
         */
352
0
        FAST_FLOAT *fdtbl;
353
0
        int row, col;
354
0
        static const double aanscalefactor[DCTSIZE] = {
355
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
356
0
          1.0, 0.785694958, 0.541196100, 0.275899379
357
0
        };
358
359
0
        if (fdct->float_divisors[qtblno] == NULL) {
360
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
361
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
362
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
363
0
        }
364
0
        fdtbl = fdct->float_divisors[qtblno];
365
0
        i = 0;
366
0
        for (row = 0; row < DCTSIZE; row++) {
367
0
          for (col = 0; col < DCTSIZE; col++) {
368
0
            fdtbl[i] = (FAST_FLOAT)
369
0
              (1.0 / (((double)qtbl->quantval[i] *
370
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
371
0
            i++;
372
0
          }
373
0
        }
374
0
      }
375
0
      break;
376
0
#endif
377
0
    default:
378
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
379
0
      break;
380
36.2k
    }
381
36.2k
  }
382
13.8k
}
383
384
385
/*
386
 * Load data into workspace, applying unsigned->signed conversion.
387
 */
388
389
METHODDEF(void)
390
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
391
23.0M
{
392
23.0M
  register DCTELEM *workspaceptr;
393
23.0M
  register _JSAMPROW elemptr;
394
23.0M
  register int elemr;
395
396
23.0M
  workspaceptr = workspace;
397
207M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
398
184M
    elemptr = sample_data[elemr] + start_col;
399
400
184M
#if DCTSIZE == 8                /* unroll the inner loop */
401
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
402
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
403
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
404
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
405
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
407
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
408
184M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
409
#else
410
    {
411
      register int elemc;
412
      for (elemc = DCTSIZE; elemc > 0; elemc--)
413
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
414
    }
415
#endif
416
184M
  }
417
23.0M
}
418
419
420
/*
421
 * Quantize/descale the coefficients, and store into coef_blocks[].
422
 */
423
424
METHODDEF(void)
425
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
426
23.0M
{
427
23.0M
  int i;
428
23.0M
  DCTELEM temp;
429
23.0M
  JCOEFPTR output_ptr = coef_block;
430
431
#if BITS_IN_JSAMPLE == 8
432
433
  UDCTELEM recip, corr;
434
  int shift;
435
  UDCTELEM2 product;
436
437
  for (i = 0; i < DCTSIZE2; i++) {
438
    temp = workspace[i];
439
    recip = divisors[i + DCTSIZE2 * 0];
440
    corr =  divisors[i + DCTSIZE2 * 1];
441
    shift = divisors[i + DCTSIZE2 * 3];
442
443
    if (temp < 0) {
444
      temp = -temp;
445
      product = (UDCTELEM2)(temp + corr) * recip;
446
      product >>= shift + sizeof(DCTELEM) * 8;
447
      temp = (DCTELEM)product;
448
      temp = -temp;
449
    } else {
450
      product = (UDCTELEM2)(temp + corr) * recip;
451
      product >>= shift + sizeof(DCTELEM) * 8;
452
      temp = (DCTELEM)product;
453
    }
454
    output_ptr[i] = (JCOEF)temp;
455
  }
456
457
#else
458
459
23.0M
  register DCTELEM qval;
460
461
1.49G
  for (i = 0; i < DCTSIZE2; i++) {
462
1.47G
    qval = divisors[i];
463
1.47G
    temp = workspace[i];
464
    /* Divide the coefficient value by qval, ensuring proper rounding.
465
     * Since C does not specify the direction of rounding for negative
466
     * quotients, we have to force the dividend positive for portability.
467
     *
468
     * In most files, at least half of the output values will be zero
469
     * (at default quantization settings, more like three-quarters...)
470
     * so we should ensure that this case is fast.  On many machines,
471
     * a comparison is enough cheaper than a divide to make a special test
472
     * a win.  Since both inputs will be nonnegative, we need only test
473
     * for a < b to discover whether a/b is 0.
474
     * If your machine's division is fast enough, define FAST_DIVIDE.
475
     */
476
#ifdef FAST_DIVIDE
477
#define DIVIDE_BY(a, b)  a /= b
478
#else
479
1.47G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
480
1.47G
#endif
481
1.47G
    if (temp < 0) {
482
74.3M
      temp = -temp;
483
74.3M
      temp += qval >> 1;        /* for rounding */
484
74.3M
      DIVIDE_BY(temp, qval);
485
74.3M
      temp = -temp;
486
1.39G
    } else {
487
1.39G
      temp += qval >> 1;        /* for rounding */
488
1.39G
      DIVIDE_BY(temp, qval);
489
1.39G
    }
490
1.47G
    output_ptr[i] = (JCOEF)temp;
491
1.47G
  }
492
493
23.0M
#endif
494
495
23.0M
}
496
497
498
/*
499
 * Perform forward DCT on one or more blocks of a component.
500
 *
501
 * The input samples are taken from the sample_data[] array starting at
502
 * position start_row/start_col, and moving to the right for any additional
503
 * blocks. The quantized coefficients are returned in coef_blocks[].
504
 */
505
506
METHODDEF(void)
507
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
508
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
509
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
510
/* This version is used for integer DCT implementations. */
511
47.1M
{
512
  /* This routine is heavily used, so it's worth coding it tightly. */
513
47.1M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
514
47.1M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
515
47.1M
  DCTELEM *workspace;
516
47.1M
  JDIMENSION bi;
517
518
  /* Make sure the compiler doesn't look up these every pass */
519
47.1M
  forward_DCT_method_ptr do_dct = fdct->dct;
520
47.1M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
521
47.1M
  quantize_method_ptr do_quantize = fdct->quantize;
522
47.1M
  workspace = fdct->workspace;
523
524
47.1M
  sample_data += start_row;     /* fold in the vertical offset once */
525
526
108M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
527
    /* Load data into workspace, applying unsigned->signed conversion */
528
60.9M
    (*do_convsamp) (sample_data, start_col, workspace);
529
530
    /* Perform the DCT */
531
60.9M
    (*do_dct) (workspace);
532
533
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
534
60.9M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
535
60.9M
  }
536
47.1M
}
537
538
539
#ifdef DCT_FLOAT_SUPPORTED
540
541
METHODDEF(void)
542
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
543
               FAST_FLOAT *workspace)
544
0
{
545
0
  register FAST_FLOAT *workspaceptr;
546
0
  register _JSAMPROW elemptr;
547
0
  register int elemr;
548
549
0
  workspaceptr = workspace;
550
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
551
0
    elemptr = sample_data[elemr] + start_col;
552
0
#if DCTSIZE == 8                /* unroll the inner loop */
553
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
554
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
555
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
556
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
557
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
558
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
559
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
560
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
561
#else
562
    {
563
      register int elemc;
564
      for (elemc = DCTSIZE; elemc > 0; elemc--)
565
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
566
    }
567
#endif
568
0
  }
569
0
}
570
571
572
METHODDEF(void)
573
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
574
               FAST_FLOAT *workspace)
575
0
{
576
0
  register FAST_FLOAT temp;
577
0
  register int i;
578
0
  register JCOEFPTR output_ptr = coef_block;
579
580
0
  for (i = 0; i < DCTSIZE2; i++) {
581
    /* Apply the quantization and scaling factor */
582
0
    temp = workspace[i] * divisors[i];
583
584
    /* Round to nearest integer.
585
     * Since C does not specify the direction of rounding for negative
586
     * quotients, we have to force the dividend positive for portability.
587
     * The maximum coefficient size is +-16K (for 12-bit data), so this
588
     * code should work for either 16-bit or 32-bit ints.
589
     */
590
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
591
0
  }
592
0
}
593
594
595
METHODDEF(void)
596
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
597
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
598
                  JDIMENSION start_row, JDIMENSION start_col,
599
                  JDIMENSION num_blocks)
600
/* This version is used for floating-point DCT implementations. */
601
8.43M
{
602
  /* This routine is heavily used, so it's worth coding it tightly. */
603
8.43M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
604
8.43M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
605
8.43M
  FAST_FLOAT *workspace;
606
8.43M
  JDIMENSION bi;
607
608
609
  /* Make sure the compiler doesn't look up these every pass */
610
8.43M
  float_DCT_method_ptr do_dct = fdct->float_dct;
611
8.43M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
612
8.43M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
613
8.43M
  workspace = fdct->float_workspace;
614
615
8.43M
  sample_data += start_row;     /* fold in the vertical offset once */
616
617
20.5M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
618
    /* Load data into workspace, applying unsigned->signed conversion */
619
12.0M
    (*do_convsamp) (sample_data, start_col, workspace);
620
621
    /* Perform the DCT */
622
12.0M
    (*do_dct) (workspace);
623
624
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
625
12.0M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
626
12.0M
  }
627
8.43M
}
628
629
#endif /* DCT_FLOAT_SUPPORTED */
630
631
632
/*
633
 * Initialize FDCT manager.
634
 */
635
636
GLOBAL(void)
637
_jinit_forward_dct(j_compress_ptr cinfo)
638
95.3k
{
639
95.3k
  my_fdct_ptr fdct;
640
95.3k
  int i;
641
642
95.3k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
95.3k
  fdct = (my_fdct_ptr)
646
95.3k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
95.3k
                                sizeof(my_fdct_controller));
648
95.3k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
95.3k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
95.3k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
73.7k
  case JDCT_ISLOW:
655
73.7k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
39.2k
    if (jsimd_can_fdct_islow())
658
39.2k
      fdct->dct = jsimd_fdct_islow;
659
0
    else
660
0
#endif
661
34.5k
      fdct->dct = _jpeg_fdct_islow;
662
73.7k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
15.0k
  case JDCT_IFAST:
666
15.0k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
9.18k
    if (jsimd_can_fdct_ifast())
669
9.18k
      fdct->dct = jsimd_fdct_ifast;
670
0
    else
671
0
#endif
672
5.84k
      fdct->dct = _jpeg_fdct_ifast;
673
15.0k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
6.49k
  case JDCT_FLOAT:
677
6.49k
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
6.49k
    if (jsimd_can_fdct_float())
680
6.49k
      fdct->float_dct = jsimd_fdct_float;
681
0
    else
682
0
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
6.49k
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
95.3k
  }
690
691
  /* ...then the supporting stages. */
692
95.3k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
73.7k
  case JDCT_ISLOW:
695
73.7k
#endif
696
73.7k
#ifdef DCT_IFAST_SUPPORTED
697
88.8k
  case JDCT_IFAST:
698
88.8k
#endif
699
88.8k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
48.4k
    if (jsimd_can_convsamp())
702
48.4k
      fdct->convsamp = jsimd_convsamp;
703
0
    else
704
0
#endif
705
0
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
48.4k
    if (jsimd_can_quantize())
708
48.4k
      fdct->quantize = jsimd_quantize;
709
0
    else
710
0
#endif
711
0
      fdct->quantize = quantize;
712
88.8k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
6.49k
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
6.49k
    if (jsimd_can_convsamp_float())
718
6.49k
      fdct->float_convsamp = jsimd_convsamp_float;
719
0
    else
720
0
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
6.49k
    if (jsimd_can_quantize_float())
724
6.49k
      fdct->float_quantize = jsimd_quantize_float;
725
0
    else
726
0
#endif
727
0
      fdct->float_quantize = quantize_float;
728
6.49k
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
95.3k
  }
734
735
  /* Allocate workspace memory */
736
95.3k
#ifdef DCT_FLOAT_SUPPORTED
737
95.3k
  if (cinfo->dct_method == JDCT_FLOAT)
738
6.49k
    fdct->float_workspace = (FAST_FLOAT *)
739
6.49k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
6.49k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
88.8k
  else
742
88.8k
#endif
743
88.8k
    fdct->workspace = (DCTELEM *)
744
88.8k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
88.8k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
476k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
381k
    fdct->divisors[i] = NULL;
750
381k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
381k
#endif
753
381k
  }
754
95.3k
}
j12init_forward_dct
Line
Count
Source
638
40.3k
{
639
40.3k
  my_fdct_ptr fdct;
640
40.3k
  int i;
641
642
40.3k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
40.3k
  fdct = (my_fdct_ptr)
646
40.3k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
40.3k
                                sizeof(my_fdct_controller));
648
40.3k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
40.3k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
40.3k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
34.5k
  case JDCT_ISLOW:
655
34.5k
    fdct->pub._forward_DCT = forward_DCT;
656
#ifdef WITH_SIMD
657
    if (jsimd_can_fdct_islow())
658
      fdct->dct = jsimd_fdct_islow;
659
    else
660
#endif
661
34.5k
      fdct->dct = _jpeg_fdct_islow;
662
34.5k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
5.84k
  case JDCT_IFAST:
666
5.84k
    fdct->pub._forward_DCT = forward_DCT;
667
#ifdef WITH_SIMD
668
    if (jsimd_can_fdct_ifast())
669
      fdct->dct = jsimd_fdct_ifast;
670
    else
671
#endif
672
5.84k
      fdct->dct = _jpeg_fdct_ifast;
673
5.84k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
0
  case JDCT_FLOAT:
677
0
    fdct->pub._forward_DCT = forward_DCT_float;
678
#ifdef WITH_SIMD
679
    if (jsimd_can_fdct_float())
680
      fdct->float_dct = jsimd_fdct_float;
681
    else
682
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
0
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
40.3k
  }
690
691
  /* ...then the supporting stages. */
692
40.3k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
34.5k
  case JDCT_ISLOW:
695
34.5k
#endif
696
34.5k
#ifdef DCT_IFAST_SUPPORTED
697
40.3k
  case JDCT_IFAST:
698
40.3k
#endif
699
40.3k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
#ifdef WITH_SIMD
701
    if (jsimd_can_convsamp())
702
      fdct->convsamp = jsimd_convsamp;
703
    else
704
#endif
705
40.3k
      fdct->convsamp = convsamp;
706
#ifdef WITH_SIMD
707
    if (jsimd_can_quantize())
708
      fdct->quantize = jsimd_quantize;
709
    else
710
#endif
711
40.3k
      fdct->quantize = quantize;
712
40.3k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
0
  case JDCT_FLOAT:
716
#ifdef WITH_SIMD
717
    if (jsimd_can_convsamp_float())
718
      fdct->float_convsamp = jsimd_convsamp_float;
719
    else
720
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
#ifdef WITH_SIMD
723
    if (jsimd_can_quantize_float())
724
      fdct->float_quantize = jsimd_quantize_float;
725
    else
726
#endif
727
0
      fdct->float_quantize = quantize_float;
728
0
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
40.3k
  }
734
735
  /* Allocate workspace memory */
736
40.3k
#ifdef DCT_FLOAT_SUPPORTED
737
40.3k
  if (cinfo->dct_method == JDCT_FLOAT)
738
0
    fdct->float_workspace = (FAST_FLOAT *)
739
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
40.3k
  else
742
40.3k
#endif
743
40.3k
    fdct->workspace = (DCTELEM *)
744
40.3k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
40.3k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
201k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
161k
    fdct->divisors[i] = NULL;
750
161k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
161k
#endif
753
161k
  }
754
40.3k
}
jinit_forward_dct
Line
Count
Source
638
54.9k
{
639
54.9k
  my_fdct_ptr fdct;
640
54.9k
  int i;
641
642
54.9k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
643
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
644
645
54.9k
  fdct = (my_fdct_ptr)
646
54.9k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
647
54.9k
                                sizeof(my_fdct_controller));
648
54.9k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
649
54.9k
  fdct->pub.start_pass = start_pass_fdctmgr;
650
651
  /* First determine the DCT... */
652
54.9k
  switch (cinfo->dct_method) {
653
0
#ifdef DCT_ISLOW_SUPPORTED
654
39.2k
  case JDCT_ISLOW:
655
39.2k
    fdct->pub._forward_DCT = forward_DCT;
656
39.2k
#ifdef WITH_SIMD
657
39.2k
    if (jsimd_can_fdct_islow())
658
39.2k
      fdct->dct = jsimd_fdct_islow;
659
0
    else
660
0
#endif
661
0
      fdct->dct = _jpeg_fdct_islow;
662
39.2k
    break;
663
0
#endif
664
0
#ifdef DCT_IFAST_SUPPORTED
665
9.18k
  case JDCT_IFAST:
666
9.18k
    fdct->pub._forward_DCT = forward_DCT;
667
9.18k
#ifdef WITH_SIMD
668
9.18k
    if (jsimd_can_fdct_ifast())
669
9.18k
      fdct->dct = jsimd_fdct_ifast;
670
0
    else
671
0
#endif
672
0
      fdct->dct = _jpeg_fdct_ifast;
673
9.18k
    break;
674
0
#endif
675
0
#ifdef DCT_FLOAT_SUPPORTED
676
6.49k
  case JDCT_FLOAT:
677
6.49k
    fdct->pub._forward_DCT = forward_DCT_float;
678
6.49k
#ifdef WITH_SIMD
679
6.49k
    if (jsimd_can_fdct_float())
680
6.49k
      fdct->float_dct = jsimd_fdct_float;
681
0
    else
682
0
#endif
683
0
      fdct->float_dct = jpeg_fdct_float;
684
6.49k
    break;
685
0
#endif
686
0
  default:
687
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
688
0
    break;
689
54.9k
  }
690
691
  /* ...then the supporting stages. */
692
54.9k
  switch (cinfo->dct_method) {
693
0
#ifdef DCT_ISLOW_SUPPORTED
694
39.2k
  case JDCT_ISLOW:
695
39.2k
#endif
696
39.2k
#ifdef DCT_IFAST_SUPPORTED
697
48.4k
  case JDCT_IFAST:
698
48.4k
#endif
699
48.4k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
700
48.4k
#ifdef WITH_SIMD
701
48.4k
    if (jsimd_can_convsamp())
702
48.4k
      fdct->convsamp = jsimd_convsamp;
703
0
    else
704
0
#endif
705
0
      fdct->convsamp = convsamp;
706
48.4k
#ifdef WITH_SIMD
707
48.4k
    if (jsimd_can_quantize())
708
48.4k
      fdct->quantize = jsimd_quantize;
709
0
    else
710
0
#endif
711
0
      fdct->quantize = quantize;
712
48.4k
    break;
713
0
#endif
714
0
#ifdef DCT_FLOAT_SUPPORTED
715
6.49k
  case JDCT_FLOAT:
716
6.49k
#ifdef WITH_SIMD
717
6.49k
    if (jsimd_can_convsamp_float())
718
6.49k
      fdct->float_convsamp = jsimd_convsamp_float;
719
0
    else
720
0
#endif
721
0
      fdct->float_convsamp = convsamp_float;
722
6.49k
#ifdef WITH_SIMD
723
6.49k
    if (jsimd_can_quantize_float())
724
6.49k
      fdct->float_quantize = jsimd_quantize_float;
725
0
    else
726
0
#endif
727
0
      fdct->float_quantize = quantize_float;
728
6.49k
    break;
729
0
#endif
730
0
  default:
731
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
732
0
    break;
733
54.9k
  }
734
735
  /* Allocate workspace memory */
736
54.9k
#ifdef DCT_FLOAT_SUPPORTED
737
54.9k
  if (cinfo->dct_method == JDCT_FLOAT)
738
6.49k
    fdct->float_workspace = (FAST_FLOAT *)
739
6.49k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
740
6.49k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
741
48.4k
  else
742
48.4k
#endif
743
48.4k
    fdct->workspace = (DCTELEM *)
744
48.4k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
745
48.4k
                                  sizeof(DCTELEM) * DCTSIZE2);
746
747
  /* Mark divisor tables unallocated */
748
274k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
749
219k
    fdct->divisors[i] = NULL;
750
219k
#ifdef DCT_FLOAT_SUPPORTED
751
    fdct->float_divisors[i] = NULL;
752
219k
#endif
753
219k
  }
754
54.9k
}