/src/libjpeg-turbo.main/jcdctmgr.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * jcdctmgr.c  | 
3  |  |  *  | 
4  |  |  * This file was part of the Independent JPEG Group's software:  | 
5  |  |  * Copyright (C) 1994-1996, Thomas G. Lane.  | 
6  |  |  * libjpeg-turbo Modifications:  | 
7  |  |  * Copyright (C) 1999-2006, MIYASAKA Masaru.  | 
8  |  |  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB  | 
9  |  |  * Copyright (C) 2011, 2014-2015, 2022, D. R. Commander.  | 
10  |  |  * For conditions of distribution and use, see the accompanying README.ijg  | 
11  |  |  * file.  | 
12  |  |  *  | 
13  |  |  * This file contains the forward-DCT management logic.  | 
14  |  |  * This code selects a particular DCT implementation to be used,  | 
15  |  |  * and it performs related housekeeping chores including coefficient  | 
16  |  |  * quantization.  | 
17  |  |  */  | 
18  |  |  | 
19  |  | #define JPEG_INTERNALS  | 
20  |  | #include "jinclude.h"  | 
21  |  | #include "jpeglib.h"  | 
22  |  | #include "jdct.h"               /* Private declarations for DCT subsystem */  | 
23  |  | #include "jsimddct.h"  | 
24  |  |  | 
25  |  |  | 
26  |  | /* Private subobject for this module */  | 
27  |  |  | 
28  |  | typedef void (*forward_DCT_method_ptr) (DCTELEM *data);  | 
29  |  | typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);  | 
30  |  |  | 
31  |  | typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,  | 
32  |  |                                      JDIMENSION start_col,  | 
33  |  |                                      DCTELEM *workspace);  | 
34  |  | typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,  | 
35  |  |                                            JDIMENSION start_col,  | 
36  |  |                                            FAST_FLOAT *workspace);  | 
37  |  |  | 
38  |  | typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,  | 
39  |  |                                      DCTELEM *workspace);  | 
40  |  | typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,  | 
41  |  |                                            FAST_FLOAT *divisors,  | 
42  |  |                                            FAST_FLOAT *workspace);  | 
43  |  |  | 
44  |  | METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);  | 
45  |  |  | 
46  |  | typedef struct { | 
47  |  |   struct jpeg_forward_dct pub;  /* public fields */  | 
48  |  |  | 
49  |  |   /* Pointer to the DCT routine actually in use */  | 
50  |  |   forward_DCT_method_ptr dct;  | 
51  |  |   convsamp_method_ptr convsamp;  | 
52  |  |   quantize_method_ptr quantize;  | 
53  |  |  | 
54  |  |   /* The actual post-DCT divisors --- not identical to the quant table  | 
55  |  |    * entries, because of scaling (especially for an unnormalized DCT).  | 
56  |  |    * Each table is given in normal array order.  | 
57  |  |    */  | 
58  |  |   DCTELEM *divisors[NUM_QUANT_TBLS];  | 
59  |  |  | 
60  |  |   /* work area for FDCT subroutine */  | 
61  |  |   DCTELEM *workspace;  | 
62  |  |  | 
63  |  | #ifdef DCT_FLOAT_SUPPORTED  | 
64  |  |   /* Same as above for the floating-point case. */  | 
65  |  |   float_DCT_method_ptr float_dct;  | 
66  |  |   float_convsamp_method_ptr float_convsamp;  | 
67  |  |   float_quantize_method_ptr float_quantize;  | 
68  |  |   FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];  | 
69  |  |   FAST_FLOAT *float_workspace;  | 
70  |  | #endif  | 
71  |  | } my_fdct_controller;  | 
72  |  |  | 
73  |  | typedef my_fdct_controller *my_fdct_ptr;  | 
74  |  |  | 
75  |  |  | 
76  |  | #if BITS_IN_JSAMPLE == 8  | 
77  |  |  | 
78  |  | /*  | 
79  |  |  * Find the highest bit in an integer through binary search.  | 
80  |  |  */  | 
81  |  |  | 
82  |  | LOCAL(int)  | 
83  |  | flss(UINT16 val)  | 
84  | 0  | { | 
85  | 0  |   int bit;  | 
86  |  | 
  | 
87  | 0  |   bit = 16;  | 
88  |  | 
  | 
89  | 0  |   if (!val)  | 
90  | 0  |     return 0;  | 
91  |  |  | 
92  | 0  |   if (!(val & 0xff00)) { | 
93  | 0  |     bit -= 8;  | 
94  | 0  |     val <<= 8;  | 
95  | 0  |   }  | 
96  | 0  |   if (!(val & 0xf000)) { | 
97  | 0  |     bit -= 4;  | 
98  | 0  |     val <<= 4;  | 
99  | 0  |   }  | 
100  | 0  |   if (!(val & 0xc000)) { | 
101  | 0  |     bit -= 2;  | 
102  | 0  |     val <<= 2;  | 
103  | 0  |   }  | 
104  | 0  |   if (!(val & 0x8000)) { | 
105  | 0  |     bit -= 1;  | 
106  | 0  |     val <<= 1;  | 
107  | 0  |   }  | 
108  |  | 
  | 
109  | 0  |   return bit;  | 
110  | 0  | }  | 
111  |  |  | 
112  |  |  | 
113  |  | /*  | 
114  |  |  * Compute values to do a division using reciprocal.  | 
115  |  |  *  | 
116  |  |  * This implementation is based on an algorithm described in  | 
117  |  |  *   "How to optimize for the Pentium family of microprocessors"  | 
118  |  |  *   (http://www.agner.org/assem/).  | 
119  |  |  * More information about the basic algorithm can be found in  | 
120  |  |  * the paper "Integer Division Using Reciprocals" by Robert Alverson.  | 
121  |  |  *  | 
122  |  |  * The basic idea is to replace x/d by x * d^-1. In order to store  | 
123  |  |  * d^-1 with enough precision we shift it left a few places. It turns  | 
124  |  |  * out that this algoright gives just enough precision, and also fits  | 
125  |  |  * into DCTELEM:  | 
126  |  |  *  | 
127  |  |  *   b = (the number of significant bits in divisor) - 1  | 
128  |  |  *   r = (word size) + b  | 
129  |  |  *   f = 2^r / divisor  | 
130  |  |  *  | 
131  |  |  * f will not be an integer for most cases, so we need to compensate  | 
132  |  |  * for the rounding error introduced:  | 
133  |  |  *  | 
134  |  |  *   no fractional part:  | 
135  |  |  *  | 
136  |  |  *       result = input >> r  | 
137  |  |  *  | 
138  |  |  *   fractional part of f < 0.5:  | 
139  |  |  *  | 
140  |  |  *       round f down to nearest integer  | 
141  |  |  *       result = ((input + 1) * f) >> r  | 
142  |  |  *  | 
143  |  |  *   fractional part of f > 0.5:  | 
144  |  |  *  | 
145  |  |  *       round f up to nearest integer  | 
146  |  |  *       result = (input * f) >> r  | 
147  |  |  *  | 
148  |  |  * This is the original algorithm that gives truncated results. But we  | 
149  |  |  * want properly rounded results, so we replace "input" with  | 
150  |  |  * "input + divisor/2".  | 
151  |  |  *  | 
152  |  |  * In order to allow SIMD implementations we also tweak the values to  | 
153  |  |  * allow the same calculation to be made at all times:  | 
154  |  |  *  | 
155  |  |  *   dctbl[0] = f rounded to nearest integer  | 
156  |  |  *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)  | 
157  |  |  *   dctbl[2] = 1 << ((word size) * 2 - r)  | 
158  |  |  *   dctbl[3] = r - (word size)  | 
159  |  |  *  | 
160  |  |  * dctbl[2] is for stupid instruction sets where the shift operation  | 
161  |  |  * isn't member wise (e.g. MMX).  | 
162  |  |  *  | 
163  |  |  * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)  | 
164  |  |  * is that most SIMD implementations have a "multiply and store top  | 
165  |  |  * half" operation.  | 
166  |  |  *  | 
167  |  |  * Lastly, we store each of the values in their own table instead  | 
168  |  |  * of in a consecutive manner, yet again in order to allow SIMD  | 
169  |  |  * routines.  | 
170  |  |  */  | 
171  |  |  | 
172  |  | LOCAL(int)  | 
173  |  | compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)  | 
174  | 0  | { | 
175  | 0  |   UDCTELEM2 fq, fr;  | 
176  | 0  |   UDCTELEM c;  | 
177  | 0  |   int b, r;  | 
178  |  | 
  | 
179  | 0  |   if (divisor == 1) { | 
180  |  |     /* divisor == 1 means unquantized, so these reciprocal/correction/shift  | 
181  |  |      * values will cause the C quantization algorithm to act like the  | 
182  |  |      * identity function.  Since only the C quantization algorithm is used in  | 
183  |  |      * these cases, the scale value is irrelevant.  | 
184  |  |      */  | 
185  | 0  |     dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */  | 
186  | 0  |     dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */  | 
187  | 0  |     dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */  | 
188  | 0  |     dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */  | 
189  | 0  |     return 0;  | 
190  | 0  |   }  | 
191  |  |  | 
192  | 0  |   b = flss(divisor) - 1;  | 
193  | 0  |   r  = sizeof(DCTELEM) * 8 + b;  | 
194  |  | 
  | 
195  | 0  |   fq = ((UDCTELEM2)1 << r) / divisor;  | 
196  | 0  |   fr = ((UDCTELEM2)1 << r) % divisor;  | 
197  |  | 
  | 
198  | 0  |   c = divisor / 2;                      /* for rounding */  | 
199  |  | 
  | 
200  | 0  |   if (fr == 0) {                        /* divisor is power of two */ | 
201  |  |     /* fq will be one bit too large to fit in DCTELEM, so adjust */  | 
202  | 0  |     fq >>= 1;  | 
203  | 0  |     r--;  | 
204  | 0  |   } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */ | 
205  | 0  |     c++;  | 
206  | 0  |   } else {                              /* fractional part is > 0.5 */ | 
207  | 0  |     fq++;  | 
208  | 0  |   }  | 
209  |  | 
  | 
210  | 0  |   dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */  | 
211  | 0  |   dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */  | 
212  | 0  | #ifdef WITH_SIMD  | 
213  | 0  |   dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */  | 
214  |  | #else  | 
215  |  |   dtbl[DCTSIZE2 * 2] = 1;  | 
216  |  | #endif  | 
217  | 0  |   dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */  | 
218  |  | 
  | 
219  | 0  |   if (r <= 16) return 0;  | 
220  | 0  |   else return 1;  | 
221  | 0  | }  | 
222  |  |  | 
223  |  | #endif  | 
224  |  |  | 
225  |  |  | 
226  |  | /*  | 
227  |  |  * Initialize for a processing pass.  | 
228  |  |  * Verify that all referenced Q-tables are present, and set up  | 
229  |  |  * the divisor table for each one.  | 
230  |  |  * In the current implementation, DCT of all components is done during  | 
231  |  |  * the first pass, even if only some components will be output in the  | 
232  |  |  * first scan.  Hence all components should be examined here.  | 
233  |  |  */  | 
234  |  |  | 
235  |  | METHODDEF(void)  | 
236  |  | start_pass_fdctmgr(j_compress_ptr cinfo)  | 
237  | 12.9k  | { | 
238  | 12.9k  |   my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;  | 
239  | 12.9k  |   int ci, qtblno, i;  | 
240  | 12.9k  |   jpeg_component_info *compptr;  | 
241  | 12.9k  |   JQUANT_TBL *qtbl;  | 
242  | 12.9k  |   DCTELEM *dtbl;  | 
243  |  |  | 
244  | 46.5k  |   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;  | 
245  | 33.6k  |        ci++, compptr++) { | 
246  | 33.6k  |     qtblno = compptr->quant_tbl_no;  | 
247  |  |     /* Make sure specified quantization table is present */  | 
248  | 33.6k  |     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||  | 
249  | 33.6k  |         cinfo->quant_tbl_ptrs[qtblno] == NULL)  | 
250  | 0  |       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);  | 
251  | 33.6k  |     qtbl = cinfo->quant_tbl_ptrs[qtblno];  | 
252  |  |     /* Compute divisors for this quant table */  | 
253  |  |     /* We may do this more than once for same table, but it's not a big deal */  | 
254  | 33.6k  |     switch (cinfo->dct_method) { | 
255  | 0  | #ifdef DCT_ISLOW_SUPPORTED  | 
256  | 28.0k  |     case JDCT_ISLOW:  | 
257  |  |       /* For LL&M IDCT method, divisors are equal to raw quantization  | 
258  |  |        * coefficients multiplied by 8 (to counteract scaling).  | 
259  |  |        */  | 
260  | 28.0k  |       if (fdct->divisors[qtblno] == NULL) { | 
261  | 18.5k  |         fdct->divisors[qtblno] = (DCTELEM *)  | 
262  | 18.5k  |           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
263  | 18.5k  |                                       (DCTSIZE2 * 4) * sizeof(DCTELEM));  | 
264  | 18.5k  |       }  | 
265  | 28.0k  |       dtbl = fdct->divisors[qtblno];  | 
266  | 1.82M  |       for (i = 0; i < DCTSIZE2; i++) { | 
267  |  | #if BITS_IN_JSAMPLE == 8  | 
268  |  | #ifdef WITH_SIMD  | 
269  |  |         if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&  | 
270  |  |             fdct->quantize == jsimd_quantize)  | 
271  |  |           fdct->quantize = quantize;  | 
272  |  | #else  | 
273  |  |         compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);  | 
274  |  | #endif  | 
275  |  | #else  | 
276  | 1.79M  |         dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;  | 
277  | 1.79M  | #endif  | 
278  | 1.79M  |       }  | 
279  | 28.0k  |       break;  | 
280  | 0  | #endif  | 
281  | 0  | #ifdef DCT_IFAST_SUPPORTED  | 
282  | 5.65k  |     case JDCT_IFAST:  | 
283  | 5.65k  |       { | 
284  |  |         /* For AA&N IDCT method, divisors are equal to quantization  | 
285  |  |          * coefficients scaled by scalefactor[row]*scalefactor[col], where  | 
286  |  |          *   scalefactor[0] = 1  | 
287  |  |          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7  | 
288  |  |          * We apply a further scale factor of 8.  | 
289  |  |          */  | 
290  | 5.65k  | #define CONST_BITS  14  | 
291  | 5.65k  |         static const INT16 aanscales[DCTSIZE2] = { | 
292  |  |           /* precomputed values scaled up by 14 bits */  | 
293  | 5.65k  |           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,  | 
294  | 5.65k  |           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,  | 
295  | 5.65k  |           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,  | 
296  | 5.65k  |           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,  | 
297  | 5.65k  |           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,  | 
298  | 5.65k  |           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,  | 
299  | 5.65k  |            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,  | 
300  | 5.65k  |            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247  | 
301  | 5.65k  |         };  | 
302  | 5.65k  |         SHIFT_TEMPS  | 
303  |  |  | 
304  | 5.65k  |         if (fdct->divisors[qtblno] == NULL) { | 
305  | 3.77k  |           fdct->divisors[qtblno] = (DCTELEM *)  | 
306  | 3.77k  |             (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
307  | 3.77k  |                                         (DCTSIZE2 * 4) * sizeof(DCTELEM));  | 
308  | 3.77k  |         }  | 
309  | 5.65k  |         dtbl = fdct->divisors[qtblno];  | 
310  | 367k  |         for (i = 0; i < DCTSIZE2; i++) { | 
311  |  | #if BITS_IN_JSAMPLE == 8  | 
312  |  | #ifdef WITH_SIMD  | 
313  |  |           if (!compute_reciprocal(  | 
314  |  |                 DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],  | 
315  |  |                                       (JLONG)aanscales[i]),  | 
316  |  |                         CONST_BITS - 3), &dtbl[i]) &&  | 
317  |  |               fdct->quantize == jsimd_quantize)  | 
318  |  |             fdct->quantize = quantize;  | 
319  |  | #else  | 
320  |  |           compute_reciprocal(  | 
321  |  |             DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],  | 
322  |  |                                   (JLONG)aanscales[i]),  | 
323  |  |                     CONST_BITS-3), &dtbl[i]);  | 
324  |  | #endif  | 
325  |  | #else  | 
326  | 362k  |           dtbl[i] = (DCTELEM)  | 
327  | 362k  |             DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],  | 
328  | 362k  |                                   (JLONG)aanscales[i]),  | 
329  | 362k  |                     CONST_BITS - 3);  | 
330  | 362k  | #endif  | 
331  | 362k  |         }  | 
332  | 5.65k  |       }  | 
333  | 5.65k  |       break;  | 
334  | 0  | #endif  | 
335  | 0  | #ifdef DCT_FLOAT_SUPPORTED  | 
336  | 0  |     case JDCT_FLOAT:  | 
337  | 0  |       { | 
338  |  |         /* For float AA&N IDCT method, divisors are equal to quantization  | 
339  |  |          * coefficients scaled by scalefactor[row]*scalefactor[col], where  | 
340  |  |          *   scalefactor[0] = 1  | 
341  |  |          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7  | 
342  |  |          * We apply a further scale factor of 8.  | 
343  |  |          * What's actually stored is 1/divisor so that the inner loop can  | 
344  |  |          * use a multiplication rather than a division.  | 
345  |  |          */  | 
346  | 0  |         FAST_FLOAT *fdtbl;  | 
347  | 0  |         int row, col;  | 
348  | 0  |         static const double aanscalefactor[DCTSIZE] = { | 
349  | 0  |           1.0, 1.387039845, 1.306562965, 1.175875602,  | 
350  | 0  |           1.0, 0.785694958, 0.541196100, 0.275899379  | 
351  | 0  |         };  | 
352  |  | 
  | 
353  | 0  |         if (fdct->float_divisors[qtblno] == NULL) { | 
354  | 0  |           fdct->float_divisors[qtblno] = (FAST_FLOAT *)  | 
355  | 0  |             (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
356  | 0  |                                         DCTSIZE2 * sizeof(FAST_FLOAT));  | 
357  | 0  |         }  | 
358  | 0  |         fdtbl = fdct->float_divisors[qtblno];  | 
359  | 0  |         i = 0;  | 
360  | 0  |         for (row = 0; row < DCTSIZE; row++) { | 
361  | 0  |           for (col = 0; col < DCTSIZE; col++) { | 
362  | 0  |             fdtbl[i] = (FAST_FLOAT)  | 
363  | 0  |               (1.0 / (((double)qtbl->quantval[i] *  | 
364  | 0  |                        aanscalefactor[row] * aanscalefactor[col] * 8.0)));  | 
365  | 0  |             i++;  | 
366  | 0  |           }  | 
367  | 0  |         }  | 
368  | 0  |       }  | 
369  | 0  |       break;  | 
370  | 0  | #endif  | 
371  | 0  |     default:  | 
372  | 0  |       ERREXIT(cinfo, JERR_NOT_COMPILED);  | 
373  | 0  |       break;  | 
374  | 33.6k  |     }  | 
375  | 33.6k  |   }  | 
376  | 12.9k  | }  | 
377  |  |  | 
378  |  |  | 
379  |  | /*  | 
380  |  |  * Load data into workspace, applying unsigned->signed conversion.  | 
381  |  |  */  | 
382  |  |  | 
383  |  | METHODDEF(void)  | 
384  |  | convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)  | 
385  | 18.3M  | { | 
386  | 18.3M  |   register DCTELEM *workspaceptr;  | 
387  | 18.3M  |   register _JSAMPROW elemptr;  | 
388  | 18.3M  |   register int elemr;  | 
389  |  |  | 
390  | 18.3M  |   workspaceptr = workspace;  | 
391  | 164M  |   for (elemr = 0; elemr < DCTSIZE; elemr++) { | 
392  | 146M  |     elemptr = sample_data[elemr] + start_col;  | 
393  |  |  | 
394  | 146M  | #if DCTSIZE == 8                /* unroll the inner loop */  | 
395  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
396  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
397  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
398  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
399  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
400  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
401  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
402  | 146M  |     *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
403  |  | #else  | 
404  |  |     { | 
405  |  |       register int elemc;  | 
406  |  |       for (elemc = DCTSIZE; elemc > 0; elemc--)  | 
407  |  |         *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;  | 
408  |  |     }  | 
409  |  | #endif  | 
410  | 146M  |   }  | 
411  | 18.3M  | }  | 
412  |  |  | 
413  |  |  | 
414  |  | /*  | 
415  |  |  * Quantize/descale the coefficients, and store into coef_blocks[].  | 
416  |  |  */  | 
417  |  |  | 
418  |  | METHODDEF(void)  | 
419  |  | quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)  | 
420  | 18.3M  | { | 
421  | 18.3M  |   int i;  | 
422  | 18.3M  |   DCTELEM temp;  | 
423  | 18.3M  |   JCOEFPTR output_ptr = coef_block;  | 
424  |  |  | 
425  |  | #if BITS_IN_JSAMPLE == 8  | 
426  |  |  | 
427  |  |   UDCTELEM recip, corr;  | 
428  |  |   int shift;  | 
429  |  |   UDCTELEM2 product;  | 
430  |  |  | 
431  |  |   for (i = 0; i < DCTSIZE2; i++) { | 
432  |  |     temp = workspace[i];  | 
433  |  |     recip = divisors[i + DCTSIZE2 * 0];  | 
434  |  |     corr =  divisors[i + DCTSIZE2 * 1];  | 
435  |  |     shift = divisors[i + DCTSIZE2 * 3];  | 
436  |  |  | 
437  |  |     if (temp < 0) { | 
438  |  |       temp = -temp;  | 
439  |  |       product = (UDCTELEM2)(temp + corr) * recip;  | 
440  |  |       product >>= shift + sizeof(DCTELEM) * 8;  | 
441  |  |       temp = (DCTELEM)product;  | 
442  |  |       temp = -temp;  | 
443  |  |     } else { | 
444  |  |       product = (UDCTELEM2)(temp + corr) * recip;  | 
445  |  |       product >>= shift + sizeof(DCTELEM) * 8;  | 
446  |  |       temp = (DCTELEM)product;  | 
447  |  |     }  | 
448  |  |     output_ptr[i] = (JCOEF)temp;  | 
449  |  |   }  | 
450  |  |  | 
451  |  | #else  | 
452  |  |  | 
453  | 18.3M  |   register DCTELEM qval;  | 
454  |  |  | 
455  | 1.19G  |   for (i = 0; i < DCTSIZE2; i++) { | 
456  | 1.17G  |     qval = divisors[i];  | 
457  | 1.17G  |     temp = workspace[i];  | 
458  |  |     /* Divide the coefficient value by qval, ensuring proper rounding.  | 
459  |  |      * Since C does not specify the direction of rounding for negative  | 
460  |  |      * quotients, we have to force the dividend positive for portability.  | 
461  |  |      *  | 
462  |  |      * In most files, at least half of the output values will be zero  | 
463  |  |      * (at default quantization settings, more like three-quarters...)  | 
464  |  |      * so we should ensure that this case is fast.  On many machines,  | 
465  |  |      * a comparison is enough cheaper than a divide to make a special test  | 
466  |  |      * a win.  Since both inputs will be nonnegative, we need only test  | 
467  |  |      * for a < b to discover whether a/b is 0.  | 
468  |  |      * If your machine's division is fast enough, define FAST_DIVIDE.  | 
469  |  |      */  | 
470  |  | #ifdef FAST_DIVIDE  | 
471  |  | #define DIVIDE_BY(a, b)  a /= b  | 
472  |  | #else  | 
473  | 1.17G  | #define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0  | 
474  | 1.17G  | #endif  | 
475  | 1.17G  |     if (temp < 0) { | 
476  | 43.8M  |       temp = -temp;  | 
477  | 43.8M  |       temp += qval >> 1;        /* for rounding */  | 
478  | 43.8M  |       DIVIDE_BY(temp, qval);  | 
479  | 43.8M  |       temp = -temp;  | 
480  | 1.12G  |     } else { | 
481  | 1.12G  |       temp += qval >> 1;        /* for rounding */  | 
482  | 1.12G  |       DIVIDE_BY(temp, qval);  | 
483  | 1.12G  |     }  | 
484  | 1.17G  |     output_ptr[i] = (JCOEF)temp;  | 
485  | 1.17G  |   }  | 
486  |  |  | 
487  | 18.3M  | #endif  | 
488  |  |  | 
489  | 18.3M  | }  | 
490  |  |  | 
491  |  |  | 
492  |  | /*  | 
493  |  |  * Perform forward DCT on one or more blocks of a component.  | 
494  |  |  *  | 
495  |  |  * The input samples are taken from the sample_data[] array starting at  | 
496  |  |  * position start_row/start_col, and moving to the right for any additional  | 
497  |  |  * blocks. The quantized coefficients are returned in coef_blocks[].  | 
498  |  |  */  | 
499  |  |  | 
500  |  | METHODDEF(void)  | 
501  |  | forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,  | 
502  |  |             _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,  | 
503  |  |             JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)  | 
504  |  | /* This version is used for integer DCT implementations. */  | 
505  | 15.1M  | { | 
506  |  |   /* This routine is heavily used, so it's worth coding it tightly. */  | 
507  | 15.1M  |   my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;  | 
508  | 15.1M  |   DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];  | 
509  | 15.1M  |   DCTELEM *workspace;  | 
510  | 15.1M  |   JDIMENSION bi;  | 
511  |  |  | 
512  |  |   /* Make sure the compiler doesn't look up these every pass */  | 
513  | 15.1M  |   forward_DCT_method_ptr do_dct = fdct->dct;  | 
514  | 15.1M  |   convsamp_method_ptr do_convsamp = fdct->convsamp;  | 
515  | 15.1M  |   quantize_method_ptr do_quantize = fdct->quantize;  | 
516  | 15.1M  |   workspace = fdct->workspace;  | 
517  |  |  | 
518  | 15.1M  |   sample_data += start_row;     /* fold in the vertical offset once */  | 
519  |  |  | 
520  | 33.4M  |   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { | 
521  |  |     /* Load data into workspace, applying unsigned->signed conversion */  | 
522  | 18.3M  |     (*do_convsamp) (sample_data, start_col, workspace);  | 
523  |  |  | 
524  |  |     /* Perform the DCT */  | 
525  | 18.3M  |     (*do_dct) (workspace);  | 
526  |  |  | 
527  |  |     /* Quantize/descale the coefficients, and store into coef_blocks[] */  | 
528  | 18.3M  |     (*do_quantize) (coef_blocks[bi], divisors, workspace);  | 
529  | 18.3M  |   }  | 
530  | 15.1M  | }  | 
531  |  |  | 
532  |  |  | 
533  |  | #ifdef DCT_FLOAT_SUPPORTED  | 
534  |  |  | 
535  |  | METHODDEF(void)  | 
536  |  | convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,  | 
537  |  |                FAST_FLOAT *workspace)  | 
538  | 0  | { | 
539  | 0  |   register FAST_FLOAT *workspaceptr;  | 
540  | 0  |   register _JSAMPROW elemptr;  | 
541  | 0  |   register int elemr;  | 
542  |  | 
  | 
543  | 0  |   workspaceptr = workspace;  | 
544  | 0  |   for (elemr = 0; elemr < DCTSIZE; elemr++) { | 
545  | 0  |     elemptr = sample_data[elemr] + start_col;  | 
546  | 0  | #if DCTSIZE == 8                /* unroll the inner loop */  | 
547  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
548  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
549  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
550  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
551  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
552  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
553  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
554  | 0  |     *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
555  |  | #else  | 
556  |  |     { | 
557  |  |       register int elemc;  | 
558  |  |       for (elemc = DCTSIZE; elemc > 0; elemc--)  | 
559  |  |         *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);  | 
560  |  |     }  | 
561  |  | #endif  | 
562  | 0  |   }  | 
563  | 0  | }  | 
564  |  |  | 
565  |  |  | 
566  |  | METHODDEF(void)  | 
567  |  | quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,  | 
568  |  |                FAST_FLOAT *workspace)  | 
569  | 0  | { | 
570  | 0  |   register FAST_FLOAT temp;  | 
571  | 0  |   register int i;  | 
572  | 0  |   register JCOEFPTR output_ptr = coef_block;  | 
573  |  | 
  | 
574  | 0  |   for (i = 0; i < DCTSIZE2; i++) { | 
575  |  |     /* Apply the quantization and scaling factor */  | 
576  | 0  |     temp = workspace[i] * divisors[i];  | 
577  |  |  | 
578  |  |     /* Round to nearest integer.  | 
579  |  |      * Since C does not specify the direction of rounding for negative  | 
580  |  |      * quotients, we have to force the dividend positive for portability.  | 
581  |  |      * The maximum coefficient size is +-16K (for 12-bit data), so this  | 
582  |  |      * code should work for either 16-bit or 32-bit ints.  | 
583  |  |      */  | 
584  | 0  |     output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);  | 
585  | 0  |   }  | 
586  | 0  | }  | 
587  |  |  | 
588  |  |  | 
589  |  | METHODDEF(void)  | 
590  |  | forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,  | 
591  |  |                   _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,  | 
592  |  |                   JDIMENSION start_row, JDIMENSION start_col,  | 
593  |  |                   JDIMENSION num_blocks)  | 
594  |  | /* This version is used for floating-point DCT implementations. */  | 
595  | 0  | { | 
596  |  |   /* This routine is heavily used, so it's worth coding it tightly. */  | 
597  | 0  |   my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;  | 
598  | 0  |   FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];  | 
599  | 0  |   FAST_FLOAT *workspace;  | 
600  | 0  |   JDIMENSION bi;  | 
601  |  |  | 
602  |  |  | 
603  |  |   /* Make sure the compiler doesn't look up these every pass */  | 
604  | 0  |   float_DCT_method_ptr do_dct = fdct->float_dct;  | 
605  | 0  |   float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;  | 
606  | 0  |   float_quantize_method_ptr do_quantize = fdct->float_quantize;  | 
607  | 0  |   workspace = fdct->float_workspace;  | 
608  |  | 
  | 
609  | 0  |   sample_data += start_row;     /* fold in the vertical offset once */  | 
610  |  | 
  | 
611  | 0  |   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { | 
612  |  |     /* Load data into workspace, applying unsigned->signed conversion */  | 
613  | 0  |     (*do_convsamp) (sample_data, start_col, workspace);  | 
614  |  |  | 
615  |  |     /* Perform the DCT */  | 
616  | 0  |     (*do_dct) (workspace);  | 
617  |  |  | 
618  |  |     /* Quantize/descale the coefficients, and store into coef_blocks[] */  | 
619  | 0  |     (*do_quantize) (coef_blocks[bi], divisors, workspace);  | 
620  | 0  |   }  | 
621  | 0  | }  | 
622  |  |  | 
623  |  | #endif /* DCT_FLOAT_SUPPORTED */  | 
624  |  |  | 
625  |  |  | 
626  |  | /*  | 
627  |  |  * Initialize FDCT manager.  | 
628  |  |  */  | 
629  |  |  | 
630  |  | GLOBAL(void)  | 
631  |  | _jinit_forward_dct(j_compress_ptr cinfo)  | 
632  | 12.9k  | { | 
633  | 12.9k  |   my_fdct_ptr fdct;  | 
634  | 12.9k  |   int i;  | 
635  |  |  | 
636  | 12.9k  |   if (cinfo->data_precision != BITS_IN_JSAMPLE)  | 
637  | 0  |     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);  | 
638  |  |  | 
639  | 12.9k  |   fdct = (my_fdct_ptr)  | 
640  | 12.9k  |     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
641  | 12.9k  |                                 sizeof(my_fdct_controller));  | 
642  | 12.9k  |   cinfo->fdct = (struct jpeg_forward_dct *)fdct;  | 
643  | 12.9k  |   fdct->pub.start_pass = start_pass_fdctmgr;  | 
644  |  |  | 
645  |  |   /* First determine the DCT... */  | 
646  | 12.9k  |   switch (cinfo->dct_method) { | 
647  | 0  | #ifdef DCT_ISLOW_SUPPORTED  | 
648  | 11.0k  |   case JDCT_ISLOW:  | 
649  | 11.0k  |     fdct->pub._forward_DCT = forward_DCT;  | 
650  |  | #ifdef WITH_SIMD  | 
651  | 0  |     if (jsimd_can_fdct_islow())  | 
652  | 0  |       fdct->dct = jsimd_fdct_islow;  | 
653  | 0  |     else  | 
654  | 0  | #endif  | 
655  | 11.0k  |       fdct->dct = _jpeg_fdct_islow;  | 
656  | 11.0k  |     break;  | 
657  | 0  | #endif  | 
658  | 0  | #ifdef DCT_IFAST_SUPPORTED  | 
659  | 1.88k  |   case JDCT_IFAST:  | 
660  | 1.88k  |     fdct->pub._forward_DCT = forward_DCT;  | 
661  |  | #ifdef WITH_SIMD  | 
662  | 0  |     if (jsimd_can_fdct_ifast())  | 
663  | 0  |       fdct->dct = jsimd_fdct_ifast;  | 
664  | 0  |     else  | 
665  | 0  | #endif  | 
666  | 1.88k  |       fdct->dct = _jpeg_fdct_ifast;  | 
667  | 1.88k  |     break;  | 
668  | 0  | #endif  | 
669  | 0  | #ifdef DCT_FLOAT_SUPPORTED  | 
670  | 0  |   case JDCT_FLOAT:  | 
671  | 0  |     fdct->pub._forward_DCT = forward_DCT_float;  | 
672  |  | #ifdef WITH_SIMD  | 
673  | 0  |     if (jsimd_can_fdct_float())  | 
674  | 0  |       fdct->float_dct = jsimd_fdct_float;  | 
675  | 0  |     else  | 
676  | 0  | #endif  | 
677  | 0  |       fdct->float_dct = jpeg_fdct_float;  | 
678  | 0  |     break;  | 
679  | 0  | #endif  | 
680  | 0  |   default:  | 
681  | 0  |     ERREXIT(cinfo, JERR_NOT_COMPILED);  | 
682  | 0  |     break;  | 
683  | 12.9k  |   }  | 
684  |  |  | 
685  |  |   /* ...then the supporting stages. */  | 
686  | 12.9k  |   switch (cinfo->dct_method) { | 
687  | 0  | #ifdef DCT_ISLOW_SUPPORTED  | 
688  | 11.0k  |   case JDCT_ISLOW:  | 
689  | 11.0k  | #endif  | 
690  | 11.0k  | #ifdef DCT_IFAST_SUPPORTED  | 
691  | 12.9k  |   case JDCT_IFAST:  | 
692  | 12.9k  | #endif  | 
693  | 12.9k  | #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)  | 
694  |  | #ifdef WITH_SIMD  | 
695  | 0  |     if (jsimd_can_convsamp())  | 
696  | 0  |       fdct->convsamp = jsimd_convsamp;  | 
697  | 0  |     else  | 
698  | 0  | #endif  | 
699  | 0  |       fdct->convsamp = convsamp;  | 
700  |  | #ifdef WITH_SIMD  | 
701  | 0  |     if (jsimd_can_quantize())  | 
702  | 0  |       fdct->quantize = jsimd_quantize;  | 
703  | 0  |     else  | 
704  | 0  | #endif  | 
705  | 0  |       fdct->quantize = quantize;  | 
706  | 12.9k  |     break;  | 
707  | 0  | #endif  | 
708  | 0  | #ifdef DCT_FLOAT_SUPPORTED  | 
709  | 0  |   case JDCT_FLOAT:  | 
710  |  | #ifdef WITH_SIMD  | 
711  | 0  |     if (jsimd_can_convsamp_float())  | 
712  | 0  |       fdct->float_convsamp = jsimd_convsamp_float;  | 
713  | 0  |     else  | 
714  | 0  | #endif  | 
715  | 0  |       fdct->float_convsamp = convsamp_float;  | 
716  |  | #ifdef WITH_SIMD  | 
717  | 0  |     if (jsimd_can_quantize_float())  | 
718  | 0  |       fdct->float_quantize = jsimd_quantize_float;  | 
719  | 0  |     else  | 
720  | 0  | #endif  | 
721  | 0  |       fdct->float_quantize = quantize_float;  | 
722  | 0  |     break;  | 
723  | 0  | #endif  | 
724  | 0  |   default:  | 
725  | 0  |     ERREXIT(cinfo, JERR_NOT_COMPILED);  | 
726  | 0  |     break;  | 
727  | 12.9k  |   }  | 
728  |  |  | 
729  |  |   /* Allocate workspace memory */  | 
730  | 12.9k  | #ifdef DCT_FLOAT_SUPPORTED  | 
731  | 12.9k  |   if (cinfo->dct_method == JDCT_FLOAT)  | 
732  | 0  |     fdct->float_workspace = (FAST_FLOAT *)  | 
733  | 0  |       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
734  | 0  |                                   sizeof(FAST_FLOAT) * DCTSIZE2);  | 
735  | 12.9k  |   else  | 
736  | 12.9k  | #endif  | 
737  | 12.9k  |     fdct->workspace = (DCTELEM *)  | 
738  | 12.9k  |       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
739  | 12.9k  |                                   sizeof(DCTELEM) * DCTSIZE2);  | 
740  |  |  | 
741  |  |   /* Mark divisor tables unallocated */  | 
742  | 64.5k  |   for (i = 0; i < NUM_QUANT_TBLS; i++) { | 
743  | 51.6k  |     fdct->divisors[i] = NULL;  | 
744  | 51.6k  | #ifdef DCT_FLOAT_SUPPORTED  | 
745  | 51.6k  |     fdct->float_divisors[i] = NULL;  | 
746  | 51.6k  | #endif  | 
747  | 51.6k  |   }  | 
748  | 12.9k  | } Line  | Count  | Source  |  632  | 12.9k  | { |  633  | 12.9k  |   my_fdct_ptr fdct;  |  634  | 12.9k  |   int i;  |  635  |  |  |  636  | 12.9k  |   if (cinfo->data_precision != BITS_IN_JSAMPLE)  |  637  | 0  |     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);  |  638  |  |  |  639  | 12.9k  |   fdct = (my_fdct_ptr)  |  640  | 12.9k  |     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  |  641  | 12.9k  |                                 sizeof(my_fdct_controller));  |  642  | 12.9k  |   cinfo->fdct = (struct jpeg_forward_dct *)fdct;  |  643  | 12.9k  |   fdct->pub.start_pass = start_pass_fdctmgr;  |  644  |  |  |  645  |  |   /* First determine the DCT... */  |  646  | 12.9k  |   switch (cinfo->dct_method) { |  647  | 0  | #ifdef DCT_ISLOW_SUPPORTED  |  648  | 11.0k  |   case JDCT_ISLOW:  |  649  | 11.0k  |     fdct->pub._forward_DCT = forward_DCT;  |  650  |  | #ifdef WITH_SIMD  |  651  |  |     if (jsimd_can_fdct_islow())  |  652  |  |       fdct->dct = jsimd_fdct_islow;  |  653  |  |     else  |  654  |  | #endif  |  655  | 11.0k  |       fdct->dct = _jpeg_fdct_islow;  |  656  | 11.0k  |     break;  |  657  | 0  | #endif  |  658  | 0  | #ifdef DCT_IFAST_SUPPORTED  |  659  | 1.88k  |   case JDCT_IFAST:  |  660  | 1.88k  |     fdct->pub._forward_DCT = forward_DCT;  |  661  |  | #ifdef WITH_SIMD  |  662  |  |     if (jsimd_can_fdct_ifast())  |  663  |  |       fdct->dct = jsimd_fdct_ifast;  |  664  |  |     else  |  665  |  | #endif  |  666  | 1.88k  |       fdct->dct = _jpeg_fdct_ifast;  |  667  | 1.88k  |     break;  |  668  | 0  | #endif  |  669  | 0  | #ifdef DCT_FLOAT_SUPPORTED  |  670  | 0  |   case JDCT_FLOAT:  |  671  | 0  |     fdct->pub._forward_DCT = forward_DCT_float;  |  672  |  | #ifdef WITH_SIMD  |  673  |  |     if (jsimd_can_fdct_float())  |  674  |  |       fdct->float_dct = jsimd_fdct_float;  |  675  |  |     else  |  676  |  | #endif  |  677  | 0  |       fdct->float_dct = jpeg_fdct_float;  |  678  | 0  |     break;  |  679  | 0  | #endif  |  680  | 0  |   default:  |  681  | 0  |     ERREXIT(cinfo, JERR_NOT_COMPILED);  |  682  | 0  |     break;  |  683  | 12.9k  |   }  |  684  |  |  |  685  |  |   /* ...then the supporting stages. */  |  686  | 12.9k  |   switch (cinfo->dct_method) { |  687  | 0  | #ifdef DCT_ISLOW_SUPPORTED  |  688  | 11.0k  |   case JDCT_ISLOW:  |  689  | 11.0k  | #endif  |  690  | 11.0k  | #ifdef DCT_IFAST_SUPPORTED  |  691  | 12.9k  |   case JDCT_IFAST:  |  692  | 12.9k  | #endif  |  693  | 12.9k  | #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)  |  694  |  | #ifdef WITH_SIMD  |  695  |  |     if (jsimd_can_convsamp())  |  696  |  |       fdct->convsamp = jsimd_convsamp;  |  697  |  |     else  |  698  |  | #endif  |  699  | 12.9k  |       fdct->convsamp = convsamp;  |  700  |  | #ifdef WITH_SIMD  |  701  |  |     if (jsimd_can_quantize())  |  702  |  |       fdct->quantize = jsimd_quantize;  |  703  |  |     else  |  704  |  | #endif  |  705  | 12.9k  |       fdct->quantize = quantize;  |  706  | 12.9k  |     break;  |  707  | 0  | #endif  |  708  | 0  | #ifdef DCT_FLOAT_SUPPORTED  |  709  | 0  |   case JDCT_FLOAT:  |  710  |  | #ifdef WITH_SIMD  |  711  |  |     if (jsimd_can_convsamp_float())  |  712  |  |       fdct->float_convsamp = jsimd_convsamp_float;  |  713  |  |     else  |  714  |  | #endif  |  715  | 0  |       fdct->float_convsamp = convsamp_float;  |  716  |  | #ifdef WITH_SIMD  |  717  |  |     if (jsimd_can_quantize_float())  |  718  |  |       fdct->float_quantize = jsimd_quantize_float;  |  719  |  |     else  |  720  |  | #endif  |  721  | 0  |       fdct->float_quantize = quantize_float;  |  722  | 0  |     break;  |  723  | 0  | #endif  |  724  | 0  |   default:  |  725  | 0  |     ERREXIT(cinfo, JERR_NOT_COMPILED);  |  726  | 0  |     break;  |  727  | 12.9k  |   }  |  728  |  |  |  729  |  |   /* Allocate workspace memory */  |  730  | 12.9k  | #ifdef DCT_FLOAT_SUPPORTED  |  731  | 12.9k  |   if (cinfo->dct_method == JDCT_FLOAT)  |  732  | 0  |     fdct->float_workspace = (FAST_FLOAT *)  |  733  | 0  |       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  |  734  | 0  |                                   sizeof(FAST_FLOAT) * DCTSIZE2);  |  735  | 12.9k  |   else  |  736  | 12.9k  | #endif  |  737  | 12.9k  |     fdct->workspace = (DCTELEM *)  |  738  | 12.9k  |       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  |  739  | 12.9k  |                                   sizeof(DCTELEM) * DCTSIZE2);  |  740  |  |  |  741  |  |   /* Mark divisor tables unallocated */  |  742  | 64.5k  |   for (i = 0; i < NUM_QUANT_TBLS; i++) { |  743  | 51.6k  |     fdct->divisors[i] = NULL;  |  744  | 51.6k  | #ifdef DCT_FLOAT_SUPPORTED  |  745  | 51.6k  |     fdct->float_divisors[i] = NULL;  |  746  | 51.6k  | #endif  |  747  | 51.6k  |   }  |  748  | 12.9k  | }  |  
 Unexecuted instantiation: jinit_forward_dct  |