Coverage Report

Created: 2026-04-12 06:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjpeg-turbo.3.0.x/jcdctmgr.c
Line
Count
Source
1
/*
2
 * jcdctmgr.c
3
 *
4
 * This file was part of the Independent JPEG Group's software:
5
 * Copyright (C) 1994-1996, Thomas G. Lane.
6
 * libjpeg-turbo Modifications:
7
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9
 * Copyright (C) 2011, 2014-2015, 2022, 2024, 2026, D. R. Commander.
10
 * For conditions of distribution and use, see the accompanying README.ijg
11
 * file.
12
 *
13
 * This file contains the forward-DCT management logic.
14
 * This code selects a particular DCT implementation to be used,
15
 * and it performs related housekeeping chores including coefficient
16
 * quantization.
17
 */
18
19
#define JPEG_INTERNALS
20
#include "jinclude.h"
21
#include "jpeglib.h"
22
#include "jdct.h"               /* Private declarations for DCT subsystem */
23
#include "jsimddct.h"
24
25
26
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) || \
27
    defined(DCT_FLOAT_SUPPORTED)
28
29
/* Private subobject for this module */
30
31
typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
32
typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
33
34
typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
35
                                     JDIMENSION start_col,
36
                                     DCTELEM *workspace);
37
typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
38
                                           JDIMENSION start_col,
39
                                           FAST_FLOAT *workspace);
40
41
typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
42
                                     DCTELEM *workspace);
43
typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
44
                                           FAST_FLOAT *divisors,
45
                                           FAST_FLOAT *workspace);
46
47
METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
48
49
typedef struct {
50
  struct jpeg_forward_dct pub;  /* public fields */
51
52
  /* Pointer to the DCT routine actually in use */
53
  forward_DCT_method_ptr dct;
54
  convsamp_method_ptr convsamp;
55
  quantize_method_ptr quantize;
56
57
  /* The actual post-DCT divisors --- not identical to the quant table
58
   * entries, because of scaling (especially for an unnormalized DCT).
59
   * Each table is given in normal array order.
60
   */
61
  DCTELEM *divisors[NUM_QUANT_TBLS];
62
63
  /* work area for FDCT subroutine */
64
  DCTELEM *workspace;
65
66
#ifdef DCT_FLOAT_SUPPORTED
67
  /* Same as above for the floating-point case. */
68
  float_DCT_method_ptr float_dct;
69
  float_convsamp_method_ptr float_convsamp;
70
  float_quantize_method_ptr float_quantize;
71
  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
72
  FAST_FLOAT *float_workspace;
73
#endif
74
} my_fdct_controller;
75
76
typedef my_fdct_controller *my_fdct_ptr;
77
78
79
#if BITS_IN_JSAMPLE == 8
80
81
/*
82
 * Find the highest bit in an integer through binary search.
83
 */
84
85
LOCAL(int)
86
flss(UINT16 val)
87
3.96M
{
88
3.96M
  int bit;
89
90
3.96M
  bit = 16;
91
92
3.96M
  if (!val)
93
0
    return 0;
94
95
3.96M
  if (!(val & 0xff00)) {
96
2.21M
    bit -= 8;
97
2.21M
    val <<= 8;
98
2.21M
  }
99
3.96M
  if (!(val & 0xf000)) {
100
2.50M
    bit -= 4;
101
2.50M
    val <<= 4;
102
2.50M
  }
103
3.96M
  if (!(val & 0xc000)) {
104
2.11M
    bit -= 2;
105
2.11M
    val <<= 2;
106
2.11M
  }
107
3.96M
  if (!(val & 0x8000)) {
108
1.84M
    bit -= 1;
109
1.84M
    val <<= 1;
110
1.84M
  }
111
112
3.96M
  return bit;
113
3.96M
}
114
115
116
/*
117
 * Compute values to do a division using reciprocal.
118
 *
119
 * This implementation is based on an algorithm described in
120
 *   "Optimizing subroutines in assembly language:
121
 *   An optimization guide for x86 platforms" (https://agner.org/optimize).
122
 * More information about the basic algorithm can be found in
123
 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
124
 *
125
 * The basic idea is to replace x/d by x * d^-1. In order to store
126
 * d^-1 with enough precision we shift it left a few places. It turns
127
 * out that this algoright gives just enough precision, and also fits
128
 * into DCTELEM:
129
 *
130
 *   b = (the number of significant bits in divisor) - 1
131
 *   r = (word size) + b
132
 *   f = 2^r / divisor
133
 *
134
 * f will not be an integer for most cases, so we need to compensate
135
 * for the rounding error introduced:
136
 *
137
 *   no fractional part:
138
 *
139
 *       result = input >> r
140
 *
141
 *   fractional part of f < 0.5:
142
 *
143
 *       round f down to nearest integer
144
 *       result = ((input + 1) * f) >> r
145
 *
146
 *   fractional part of f > 0.5:
147
 *
148
 *       round f up to nearest integer
149
 *       result = (input * f) >> r
150
 *
151
 * This is the original algorithm that gives truncated results. But we
152
 * want properly rounded results, so we replace "input" with
153
 * "input + divisor/2".
154
 *
155
 * In order to allow SIMD implementations we also tweak the values to
156
 * allow the same calculation to be made at all times:
157
 *
158
 *   dctbl[0] = f rounded to nearest integer
159
 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
160
 *   dctbl[2] = 1 << ((word size) * 2 - r)
161
 *   dctbl[3] = r - (word size)
162
 *
163
 * dctbl[2] is for stupid instruction sets where the shift operation
164
 * isn't member wise (e.g. MMX).
165
 *
166
 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
167
 * is that most SIMD implementations have a "multiply and store top
168
 * half" operation.
169
 *
170
 * Lastly, we store each of the values in their own table instead
171
 * of in a consecutive manner, yet again in order to allow SIMD
172
 * routines.
173
 */
174
175
LOCAL(int)
176
compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
177
3.96M
{
178
3.96M
  UDCTELEM2 fq, fr;
179
3.96M
  UDCTELEM c;
180
3.96M
  int b, r;
181
182
3.96M
  if (divisor <= 1) {
183
    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
184
     * values will cause the C quantization algorithm to act like the
185
     * identity function.  Since only the C quantization algorithm is used in
186
     * these cases, the scale value is irrelevant.
187
     *
188
     * divisor == 0 can never happen in a normal program, because
189
     * jpeg_add_quant_table() clamps values < 1.  However, a program could
190
     * abuse the API by manually modifying the exposed quantization table just
191
     * before calling jpeg_start_compress().  Thus, we effectively clamp
192
     * values < 1 here as well, to avoid dividing by 0.
193
     */
194
0
    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
195
0
    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
196
0
    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
197
0
    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
198
0
    return 0;
199
0
  }
200
201
3.96M
  b = flss(divisor) - 1;
202
3.96M
  r  = sizeof(DCTELEM) * 8 + b;
203
204
3.96M
  fq = ((UDCTELEM2)1 << r) / divisor;
205
3.96M
  fr = ((UDCTELEM2)1 << r) % divisor;
206
207
3.96M
  c = divisor / 2;                      /* for rounding */
208
209
3.96M
  if (fr == 0) {                        /* divisor is power of two */
210
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
211
870k
    fq >>= 1;
212
870k
    r--;
213
3.09M
  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
214
1.13M
    c++;
215
1.95M
  } else {                              /* fractional part is > 0.5 */
216
1.95M
    fq++;
217
1.95M
  }
218
219
3.96M
  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
220
3.96M
  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
221
3.96M
#ifdef WITH_SIMD
222
3.96M
  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
223
#else
224
  dtbl[DCTSIZE2 * 2] = 1;
225
#endif
226
3.96M
  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
227
228
3.96M
  if (r <= 16) return 0;
229
3.96M
  else return 1;
230
3.96M
}
231
232
#endif
233
234
235
/*
236
 * Initialize for a processing pass.
237
 * Verify that all referenced Q-tables are present, and set up
238
 * the divisor table for each one.
239
 * In the current implementation, DCT of all components is done during
240
 * the first pass, even if only some components will be output in the
241
 * first scan.  Hence all components should be examined here.
242
 */
243
244
METHODDEF(void)
245
start_pass_fdctmgr(j_compress_ptr cinfo)
246
13.7k
{
247
13.7k
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
248
13.7k
  int ci, qtblno, i;
249
13.7k
  jpeg_component_info *compptr;
250
13.7k
  JQUANT_TBL *qtbl;
251
13.7k
  DCTELEM *dtbl;
252
253
49.7k
  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
254
35.9k
       ci++, compptr++) {
255
35.9k
    qtblno = compptr->quant_tbl_no;
256
    /* Make sure specified quantization table is present */
257
35.9k
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
258
35.9k
        cinfo->quant_tbl_ptrs[qtblno] == NULL)
259
0
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
260
35.9k
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
261
    /* Compute divisors for this quant table */
262
    /* We may do this more than once for same table, but it's not a big deal */
263
35.9k
    switch (cinfo->dct_method) {
264
0
#ifdef DCT_ISLOW_SUPPORTED
265
29.9k
    case JDCT_ISLOW:
266
      /* For LL&M IDCT method, divisors are equal to raw quantization
267
       * coefficients multiplied by 8 (to counteract scaling).
268
       */
269
29.9k
      if (fdct->divisors[qtblno] == NULL) {
270
19.8k
        fdct->divisors[qtblno] = (DCTELEM *)
271
19.8k
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
272
19.8k
                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
273
19.8k
      }
274
29.9k
      dtbl = fdct->divisors[qtblno];
275
1.94M
      for (i = 0; i < DCTSIZE2; i++) {
276
#if BITS_IN_JSAMPLE == 8
277
#ifdef WITH_SIMD
278
        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
279
            fdct->quantize == jsimd_quantize)
280
          fdct->quantize = quantize;
281
#else
282
        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
283
#endif
284
#else
285
1.91M
        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
286
1.91M
#endif
287
1.91M
      }
288
29.9k
      break;
289
0
#endif
290
0
#ifdef DCT_IFAST_SUPPORTED
291
6.05k
    case JDCT_IFAST:
292
6.05k
      {
293
        /* For AA&N IDCT method, divisors are equal to quantization
294
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
295
         *   scalefactor[0] = 1
296
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
297
         * We apply a further scale factor of 8.
298
         */
299
6.05k
#define CONST_BITS  14
300
6.05k
        static const INT16 aanscales[DCTSIZE2] = {
301
          /* precomputed values scaled up by 14 bits */
302
6.05k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
303
6.05k
          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
304
6.05k
          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
305
6.05k
          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
306
6.05k
          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
307
6.05k
          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
308
6.05k
           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
309
6.05k
           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
310
6.05k
        };
311
6.05k
        SHIFT_TEMPS
312
313
6.05k
        if (fdct->divisors[qtblno] == NULL) {
314
4.03k
          fdct->divisors[qtblno] = (DCTELEM *)
315
4.03k
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
316
4.03k
                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
317
4.03k
        }
318
6.05k
        dtbl = fdct->divisors[qtblno];
319
393k
        for (i = 0; i < DCTSIZE2; i++) {
320
#if BITS_IN_JSAMPLE == 8
321
#ifdef WITH_SIMD
322
          if (!compute_reciprocal(
323
                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
324
                                      (JLONG)aanscales[i]),
325
                        CONST_BITS - 3), &dtbl[i]) &&
326
              fdct->quantize == jsimd_quantize)
327
            fdct->quantize = quantize;
328
#else
329
          compute_reciprocal(
330
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
331
                                  (JLONG)aanscales[i]),
332
                    CONST_BITS-3), &dtbl[i]);
333
#endif
334
#else
335
387k
          dtbl[i] = (DCTELEM)
336
387k
            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
337
387k
                                  (JLONG)aanscales[i]),
338
387k
                    CONST_BITS - 3);
339
387k
#endif
340
387k
        }
341
6.05k
      }
342
6.05k
      break;
343
0
#endif
344
0
#ifdef DCT_FLOAT_SUPPORTED
345
0
    case JDCT_FLOAT:
346
0
      {
347
        /* For float AA&N IDCT method, divisors are equal to quantization
348
         * coefficients scaled by scalefactor[row]*scalefactor[col], where
349
         *   scalefactor[0] = 1
350
         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
351
         * We apply a further scale factor of 8.
352
         * What's actually stored is 1/divisor so that the inner loop can
353
         * use a multiplication rather than a division.
354
         */
355
0
        FAST_FLOAT *fdtbl;
356
0
        int row, col;
357
0
        static const double aanscalefactor[DCTSIZE] = {
358
0
          1.0, 1.387039845, 1.306562965, 1.175875602,
359
0
          1.0, 0.785694958, 0.541196100, 0.275899379
360
0
        };
361
362
0
        if (fdct->float_divisors[qtblno] == NULL) {
363
0
          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
364
0
            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
365
0
                                        DCTSIZE2 * sizeof(FAST_FLOAT));
366
0
        }
367
0
        fdtbl = fdct->float_divisors[qtblno];
368
0
        i = 0;
369
0
        for (row = 0; row < DCTSIZE; row++) {
370
0
          for (col = 0; col < DCTSIZE; col++) {
371
0
            fdtbl[i] = (FAST_FLOAT)
372
0
              (1.0 / (((double)qtbl->quantval[i] *
373
0
                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
374
0
            i++;
375
0
          }
376
0
        }
377
0
      }
378
0
      break;
379
0
#endif
380
0
    default:
381
0
      ERREXIT(cinfo, JERR_NOT_COMPILED);
382
0
      break;
383
35.9k
    }
384
35.9k
  }
385
13.7k
}
386
387
388
/*
389
 * Load data into workspace, applying unsigned->signed conversion.
390
 */
391
392
METHODDEF(void)
393
convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
394
22.9M
{
395
22.9M
  register DCTELEM *workspaceptr;
396
22.9M
  register _JSAMPROW elemptr;
397
22.9M
  register int elemr;
398
399
22.9M
  workspaceptr = workspace;
400
206M
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
401
183M
    elemptr = sample_data[elemr] + start_col;
402
403
183M
#if DCTSIZE == 8                /* unroll the inner loop */
404
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
405
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
406
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
407
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
408
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
409
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
410
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
411
183M
    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
412
#else
413
    {
414
      register int elemc;
415
      for (elemc = DCTSIZE; elemc > 0; elemc--)
416
        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
417
    }
418
#endif
419
183M
  }
420
22.9M
}
421
422
423
/*
424
 * Quantize/descale the coefficients, and store into coef_blocks[].
425
 */
426
427
METHODDEF(void)
428
quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
429
22.9M
{
430
22.9M
  int i;
431
22.9M
  DCTELEM temp;
432
22.9M
  JCOEFPTR output_ptr = coef_block;
433
434
#if BITS_IN_JSAMPLE == 8
435
436
  UDCTELEM recip, corr;
437
  int shift;
438
  UDCTELEM2 product;
439
440
  for (i = 0; i < DCTSIZE2; i++) {
441
    temp = workspace[i];
442
    recip = divisors[i + DCTSIZE2 * 0];
443
    corr =  divisors[i + DCTSIZE2 * 1];
444
    shift = divisors[i + DCTSIZE2 * 3];
445
446
    if (temp < 0) {
447
      temp = -temp;
448
      product = (UDCTELEM2)(temp + corr) * recip;
449
      product >>= shift + sizeof(DCTELEM) * 8;
450
      temp = (DCTELEM)product;
451
      temp = -temp;
452
    } else {
453
      product = (UDCTELEM2)(temp + corr) * recip;
454
      product >>= shift + sizeof(DCTELEM) * 8;
455
      temp = (DCTELEM)product;
456
    }
457
    output_ptr[i] = (JCOEF)temp;
458
  }
459
460
#else
461
462
22.9M
  register DCTELEM qval;
463
464
1.49G
  for (i = 0; i < DCTSIZE2; i++) {
465
1.46G
    qval = divisors[i];
466
1.46G
    temp = workspace[i];
467
    /* Divide the coefficient value by qval, ensuring proper rounding.
468
     * Since C does not specify the direction of rounding for negative
469
     * quotients, we have to force the dividend positive for portability.
470
     *
471
     * In most files, at least half of the output values will be zero
472
     * (at default quantization settings, more like three-quarters...)
473
     * so we should ensure that this case is fast.  On many machines,
474
     * a comparison is enough cheaper than a divide to make a special test
475
     * a win.  Since both inputs will be nonnegative, we need only test
476
     * for a < b to discover whether a/b is 0.
477
     * If your machine's division is fast enough, define FAST_DIVIDE.
478
     */
479
#ifdef FAST_DIVIDE
480
#define DIVIDE_BY(a, b)  a /= b
481
#else
482
1.46G
#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
483
1.46G
#endif
484
1.46G
    if (temp < 0) {
485
73.6M
      temp = -temp;
486
73.6M
      temp += qval >> 1;        /* for rounding */
487
73.6M
      DIVIDE_BY(temp, qval);
488
73.6M
      temp = -temp;
489
1.39G
    } else {
490
1.39G
      temp += qval >> 1;        /* for rounding */
491
1.39G
      DIVIDE_BY(temp, qval);
492
1.39G
    }
493
1.46G
    output_ptr[i] = (JCOEF)temp;
494
1.46G
  }
495
496
22.9M
#endif
497
498
22.9M
}
499
500
501
/*
502
 * Perform forward DCT on one or more blocks of a component.
503
 *
504
 * The input samples are taken from the sample_data[] array starting at
505
 * position start_row/start_col, and moving to the right for any additional
506
 * blocks. The quantized coefficients are returned in coef_blocks[].
507
 */
508
509
METHODDEF(void)
510
forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
511
            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
512
            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
513
/* This version is used for integer DCT implementations. */
514
45.6M
{
515
  /* This routine is heavily used, so it's worth coding it tightly. */
516
45.6M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
517
45.6M
  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
518
45.6M
  DCTELEM *workspace;
519
45.6M
  JDIMENSION bi;
520
521
  /* Make sure the compiler doesn't look up these every pass */
522
45.6M
  forward_DCT_method_ptr do_dct = fdct->dct;
523
45.6M
  convsamp_method_ptr do_convsamp = fdct->convsamp;
524
45.6M
  quantize_method_ptr do_quantize = fdct->quantize;
525
45.6M
  workspace = fdct->workspace;
526
527
45.6M
  sample_data += start_row;     /* fold in the vertical offset once */
528
529
105M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
530
    /* Load data into workspace, applying unsigned->signed conversion */
531
59.9M
    (*do_convsamp) (sample_data, start_col, workspace);
532
533
    /* Perform the DCT */
534
59.9M
    (*do_dct) (workspace);
535
536
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
537
59.9M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
538
59.9M
  }
539
45.6M
}
540
541
542
#ifdef DCT_FLOAT_SUPPORTED
543
544
METHODDEF(void)
545
convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
546
               FAST_FLOAT *workspace)
547
0
{
548
0
  register FAST_FLOAT *workspaceptr;
549
0
  register _JSAMPROW elemptr;
550
0
  register int elemr;
551
552
0
  workspaceptr = workspace;
553
0
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
554
0
    elemptr = sample_data[elemr] + start_col;
555
0
#if DCTSIZE == 8                /* unroll the inner loop */
556
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
557
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
558
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
559
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
560
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
561
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
562
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
563
0
    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
564
#else
565
    {
566
      register int elemc;
567
      for (elemc = DCTSIZE; elemc > 0; elemc--)
568
        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
569
    }
570
#endif
571
0
  }
572
0
}
573
574
575
METHODDEF(void)
576
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
577
               FAST_FLOAT *workspace)
578
0
{
579
0
  register FAST_FLOAT temp;
580
0
  register int i;
581
0
  register JCOEFPTR output_ptr = coef_block;
582
583
0
  for (i = 0; i < DCTSIZE2; i++) {
584
    /* Apply the quantization and scaling factor */
585
0
    temp = workspace[i] * divisors[i];
586
587
    /* Round to nearest integer.
588
     * Since C does not specify the direction of rounding for negative
589
     * quotients, we have to force the dividend positive for portability.
590
     * The maximum coefficient size is +-16K (for 12-bit data), so this
591
     * code should work for either 16-bit or 32-bit ints.
592
     */
593
0
    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
594
0
  }
595
0
}
596
597
598
METHODDEF(void)
599
forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
600
                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
601
                  JDIMENSION start_row, JDIMENSION start_col,
602
                  JDIMENSION num_blocks)
603
/* This version is used for floating-point DCT implementations. */
604
7.95M
{
605
  /* This routine is heavily used, so it's worth coding it tightly. */
606
7.95M
  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
607
7.95M
  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
608
7.95M
  FAST_FLOAT *workspace;
609
7.95M
  JDIMENSION bi;
610
611
612
  /* Make sure the compiler doesn't look up these every pass */
613
7.95M
  float_DCT_method_ptr do_dct = fdct->float_dct;
614
7.95M
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
615
7.95M
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
616
7.95M
  workspace = fdct->float_workspace;
617
618
7.95M
  sample_data += start_row;     /* fold in the vertical offset once */
619
620
18.7M
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
621
    /* Load data into workspace, applying unsigned->signed conversion */
622
10.8M
    (*do_convsamp) (sample_data, start_col, workspace);
623
624
    /* Perform the DCT */
625
10.8M
    (*do_dct) (workspace);
626
627
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
628
10.8M
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
629
10.8M
  }
630
7.95M
}
631
632
#endif /* DCT_FLOAT_SUPPORTED */
633
634
635
/*
636
 * Initialize FDCT manager.
637
 */
638
639
GLOBAL(void)
640
_jinit_forward_dct(j_compress_ptr cinfo)
641
100k
{
642
100k
  my_fdct_ptr fdct;
643
100k
  int i;
644
645
100k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
646
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
647
648
100k
  fdct = (my_fdct_ptr)
649
100k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
650
100k
                                sizeof(my_fdct_controller));
651
100k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
652
100k
  fdct->pub.start_pass = start_pass_fdctmgr;
653
654
  /* First determine the DCT... */
655
100k
  switch (cinfo->dct_method) {
656
0
#ifdef DCT_ISLOW_SUPPORTED
657
78.4k
  case JDCT_ISLOW:
658
78.4k
    fdct->pub._forward_DCT = forward_DCT;
659
#ifdef WITH_SIMD
660
38.7k
    if (jsimd_can_fdct_islow())
661
38.7k
      fdct->dct = jsimd_fdct_islow;
662
0
    else
663
0
#endif
664
39.7k
      fdct->dct = _jpeg_fdct_islow;
665
78.4k
    break;
666
0
#endif
667
0
#ifdef DCT_IFAST_SUPPORTED
668
15.5k
  case JDCT_IFAST:
669
15.5k
    fdct->pub._forward_DCT = forward_DCT;
670
#ifdef WITH_SIMD
671
8.79k
    if (jsimd_can_fdct_ifast())
672
8.79k
      fdct->dct = jsimd_fdct_ifast;
673
0
    else
674
0
#endif
675
6.71k
      fdct->dct = _jpeg_fdct_ifast;
676
15.5k
    break;
677
0
#endif
678
0
#ifdef DCT_FLOAT_SUPPORTED
679
6.87k
  case JDCT_FLOAT:
680
6.87k
    fdct->pub._forward_DCT = forward_DCT_float;
681
#ifdef WITH_SIMD
682
6.87k
    if (jsimd_can_fdct_float())
683
6.87k
      fdct->float_dct = jsimd_fdct_float;
684
0
    else
685
0
#endif
686
0
      fdct->float_dct = jpeg_fdct_float;
687
6.87k
    break;
688
0
#endif
689
0
  default:
690
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
691
0
    break;
692
100k
  }
693
694
  /* ...then the supporting stages. */
695
100k
  switch (cinfo->dct_method) {
696
0
#ifdef DCT_ISLOW_SUPPORTED
697
78.4k
  case JDCT_ISLOW:
698
78.4k
#endif
699
78.4k
#ifdef DCT_IFAST_SUPPORTED
700
93.9k
  case JDCT_IFAST:
701
93.9k
#endif
702
93.9k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
703
#ifdef WITH_SIMD
704
47.5k
    if (jsimd_can_convsamp())
705
47.5k
      fdct->convsamp = jsimd_convsamp;
706
0
    else
707
0
#endif
708
0
      fdct->convsamp = convsamp;
709
#ifdef WITH_SIMD
710
47.5k
    if (jsimd_can_quantize())
711
47.5k
      fdct->quantize = jsimd_quantize;
712
0
    else
713
0
#endif
714
0
      fdct->quantize = quantize;
715
93.9k
    break;
716
0
#endif
717
0
#ifdef DCT_FLOAT_SUPPORTED
718
6.87k
  case JDCT_FLOAT:
719
#ifdef WITH_SIMD
720
6.87k
    if (jsimd_can_convsamp_float())
721
6.87k
      fdct->float_convsamp = jsimd_convsamp_float;
722
0
    else
723
0
#endif
724
0
      fdct->float_convsamp = convsamp_float;
725
#ifdef WITH_SIMD
726
6.87k
    if (jsimd_can_quantize_float())
727
6.87k
      fdct->float_quantize = jsimd_quantize_float;
728
0
    else
729
0
#endif
730
0
      fdct->float_quantize = quantize_float;
731
6.87k
    break;
732
0
#endif
733
0
  default:
734
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
735
0
    break;
736
100k
  }
737
738
  /* Allocate workspace memory */
739
100k
#ifdef DCT_FLOAT_SUPPORTED
740
100k
  if (cinfo->dct_method == JDCT_FLOAT)
741
6.87k
    fdct->float_workspace = (FAST_FLOAT *)
742
6.87k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
743
6.87k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
744
93.9k
  else
745
93.9k
#endif
746
93.9k
    fdct->workspace = (DCTELEM *)
747
93.9k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
748
93.9k
                                  sizeof(DCTELEM) * DCTSIZE2);
749
750
  /* Mark divisor tables unallocated */
751
504k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
752
403k
    fdct->divisors[i] = NULL;
753
403k
#ifdef DCT_FLOAT_SUPPORTED
754
    fdct->float_divisors[i] = NULL;
755
403k
#endif
756
403k
  }
757
100k
}
j12init_forward_dct
Line
Count
Source
641
46.4k
{
642
46.4k
  my_fdct_ptr fdct;
643
46.4k
  int i;
644
645
46.4k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
646
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
647
648
46.4k
  fdct = (my_fdct_ptr)
649
46.4k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
650
46.4k
                                sizeof(my_fdct_controller));
651
46.4k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
652
46.4k
  fdct->pub.start_pass = start_pass_fdctmgr;
653
654
  /* First determine the DCT... */
655
46.4k
  switch (cinfo->dct_method) {
656
0
#ifdef DCT_ISLOW_SUPPORTED
657
39.7k
  case JDCT_ISLOW:
658
39.7k
    fdct->pub._forward_DCT = forward_DCT;
659
#ifdef WITH_SIMD
660
    if (jsimd_can_fdct_islow())
661
      fdct->dct = jsimd_fdct_islow;
662
    else
663
#endif
664
39.7k
      fdct->dct = _jpeg_fdct_islow;
665
39.7k
    break;
666
0
#endif
667
0
#ifdef DCT_IFAST_SUPPORTED
668
6.71k
  case JDCT_IFAST:
669
6.71k
    fdct->pub._forward_DCT = forward_DCT;
670
#ifdef WITH_SIMD
671
    if (jsimd_can_fdct_ifast())
672
      fdct->dct = jsimd_fdct_ifast;
673
    else
674
#endif
675
6.71k
      fdct->dct = _jpeg_fdct_ifast;
676
6.71k
    break;
677
0
#endif
678
0
#ifdef DCT_FLOAT_SUPPORTED
679
0
  case JDCT_FLOAT:
680
0
    fdct->pub._forward_DCT = forward_DCT_float;
681
#ifdef WITH_SIMD
682
    if (jsimd_can_fdct_float())
683
      fdct->float_dct = jsimd_fdct_float;
684
    else
685
#endif
686
0
      fdct->float_dct = jpeg_fdct_float;
687
0
    break;
688
0
#endif
689
0
  default:
690
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
691
0
    break;
692
46.4k
  }
693
694
  /* ...then the supporting stages. */
695
46.4k
  switch (cinfo->dct_method) {
696
0
#ifdef DCT_ISLOW_SUPPORTED
697
39.7k
  case JDCT_ISLOW:
698
39.7k
#endif
699
39.7k
#ifdef DCT_IFAST_SUPPORTED
700
46.4k
  case JDCT_IFAST:
701
46.4k
#endif
702
46.4k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
703
#ifdef WITH_SIMD
704
    if (jsimd_can_convsamp())
705
      fdct->convsamp = jsimd_convsamp;
706
    else
707
#endif
708
46.4k
      fdct->convsamp = convsamp;
709
#ifdef WITH_SIMD
710
    if (jsimd_can_quantize())
711
      fdct->quantize = jsimd_quantize;
712
    else
713
#endif
714
46.4k
      fdct->quantize = quantize;
715
46.4k
    break;
716
0
#endif
717
0
#ifdef DCT_FLOAT_SUPPORTED
718
0
  case JDCT_FLOAT:
719
#ifdef WITH_SIMD
720
    if (jsimd_can_convsamp_float())
721
      fdct->float_convsamp = jsimd_convsamp_float;
722
    else
723
#endif
724
0
      fdct->float_convsamp = convsamp_float;
725
#ifdef WITH_SIMD
726
    if (jsimd_can_quantize_float())
727
      fdct->float_quantize = jsimd_quantize_float;
728
    else
729
#endif
730
0
      fdct->float_quantize = quantize_float;
731
0
    break;
732
0
#endif
733
0
  default:
734
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
735
0
    break;
736
46.4k
  }
737
738
  /* Allocate workspace memory */
739
46.4k
#ifdef DCT_FLOAT_SUPPORTED
740
46.4k
  if (cinfo->dct_method == JDCT_FLOAT)
741
0
    fdct->float_workspace = (FAST_FLOAT *)
742
0
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
743
0
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
744
46.4k
  else
745
46.4k
#endif
746
46.4k
    fdct->workspace = (DCTELEM *)
747
46.4k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
748
46.4k
                                  sizeof(DCTELEM) * DCTSIZE2);
749
750
  /* Mark divisor tables unallocated */
751
232k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
752
185k
    fdct->divisors[i] = NULL;
753
185k
#ifdef DCT_FLOAT_SUPPORTED
754
    fdct->float_divisors[i] = NULL;
755
185k
#endif
756
185k
  }
757
46.4k
}
jinit_forward_dct
Line
Count
Source
641
54.3k
{
642
54.3k
  my_fdct_ptr fdct;
643
54.3k
  int i;
644
645
54.3k
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
646
0
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
647
648
54.3k
  fdct = (my_fdct_ptr)
649
54.3k
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
650
54.3k
                                sizeof(my_fdct_controller));
651
54.3k
  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
652
54.3k
  fdct->pub.start_pass = start_pass_fdctmgr;
653
654
  /* First determine the DCT... */
655
54.3k
  switch (cinfo->dct_method) {
656
0
#ifdef DCT_ISLOW_SUPPORTED
657
38.7k
  case JDCT_ISLOW:
658
38.7k
    fdct->pub._forward_DCT = forward_DCT;
659
38.7k
#ifdef WITH_SIMD
660
38.7k
    if (jsimd_can_fdct_islow())
661
38.7k
      fdct->dct = jsimd_fdct_islow;
662
0
    else
663
0
#endif
664
0
      fdct->dct = _jpeg_fdct_islow;
665
38.7k
    break;
666
0
#endif
667
0
#ifdef DCT_IFAST_SUPPORTED
668
8.79k
  case JDCT_IFAST:
669
8.79k
    fdct->pub._forward_DCT = forward_DCT;
670
8.79k
#ifdef WITH_SIMD
671
8.79k
    if (jsimd_can_fdct_ifast())
672
8.79k
      fdct->dct = jsimd_fdct_ifast;
673
0
    else
674
0
#endif
675
0
      fdct->dct = _jpeg_fdct_ifast;
676
8.79k
    break;
677
0
#endif
678
0
#ifdef DCT_FLOAT_SUPPORTED
679
6.87k
  case JDCT_FLOAT:
680
6.87k
    fdct->pub._forward_DCT = forward_DCT_float;
681
6.87k
#ifdef WITH_SIMD
682
6.87k
    if (jsimd_can_fdct_float())
683
6.87k
      fdct->float_dct = jsimd_fdct_float;
684
0
    else
685
0
#endif
686
0
      fdct->float_dct = jpeg_fdct_float;
687
6.87k
    break;
688
0
#endif
689
0
  default:
690
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
691
0
    break;
692
54.3k
  }
693
694
  /* ...then the supporting stages. */
695
54.3k
  switch (cinfo->dct_method) {
696
0
#ifdef DCT_ISLOW_SUPPORTED
697
38.7k
  case JDCT_ISLOW:
698
38.7k
#endif
699
38.7k
#ifdef DCT_IFAST_SUPPORTED
700
47.5k
  case JDCT_IFAST:
701
47.5k
#endif
702
47.5k
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
703
47.5k
#ifdef WITH_SIMD
704
47.5k
    if (jsimd_can_convsamp())
705
47.5k
      fdct->convsamp = jsimd_convsamp;
706
0
    else
707
0
#endif
708
0
      fdct->convsamp = convsamp;
709
47.5k
#ifdef WITH_SIMD
710
47.5k
    if (jsimd_can_quantize())
711
47.5k
      fdct->quantize = jsimd_quantize;
712
0
    else
713
0
#endif
714
0
      fdct->quantize = quantize;
715
47.5k
    break;
716
0
#endif
717
0
#ifdef DCT_FLOAT_SUPPORTED
718
6.87k
  case JDCT_FLOAT:
719
6.87k
#ifdef WITH_SIMD
720
6.87k
    if (jsimd_can_convsamp_float())
721
6.87k
      fdct->float_convsamp = jsimd_convsamp_float;
722
0
    else
723
0
#endif
724
0
      fdct->float_convsamp = convsamp_float;
725
6.87k
#ifdef WITH_SIMD
726
6.87k
    if (jsimd_can_quantize_float())
727
6.87k
      fdct->float_quantize = jsimd_quantize_float;
728
0
    else
729
0
#endif
730
0
      fdct->float_quantize = quantize_float;
731
6.87k
    break;
732
0
#endif
733
0
  default:
734
0
    ERREXIT(cinfo, JERR_NOT_COMPILED);
735
0
    break;
736
54.3k
  }
737
738
  /* Allocate workspace memory */
739
54.3k
#ifdef DCT_FLOAT_SUPPORTED
740
54.3k
  if (cinfo->dct_method == JDCT_FLOAT)
741
6.87k
    fdct->float_workspace = (FAST_FLOAT *)
742
6.87k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
743
6.87k
                                  sizeof(FAST_FLOAT) * DCTSIZE2);
744
47.5k
  else
745
47.5k
#endif
746
47.5k
    fdct->workspace = (DCTELEM *)
747
47.5k
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
748
47.5k
                                  sizeof(DCTELEM) * DCTSIZE2);
749
750
  /* Mark divisor tables unallocated */
751
271k
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
752
217k
    fdct->divisors[i] = NULL;
753
217k
#ifdef DCT_FLOAT_SUPPORTED
754
    fdct->float_divisors[i] = NULL;
755
217k
#endif
756
217k
  }
757
54.3k
}
758
759
#endif /* defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) ||
760
          defined(DCT_FLOAT_SUPPORTED) */