/src/libjpeg-turbo.main/jcphuff.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * jcphuff.c  | 
3  |  |  *  | 
4  |  |  * This file was part of the Independent JPEG Group's software:  | 
5  |  |  * Copyright (C) 1995-1997, Thomas G. Lane.  | 
6  |  |  * Lossless JPEG Modifications:  | 
7  |  |  * Copyright (C) 1999, Ken Murchison.  | 
8  |  |  * libjpeg-turbo Modifications:  | 
9  |  |  * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.  | 
10  |  |  * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.  | 
11  |  |  * Copyright (C) 2020, Arm Limited.  | 
12  |  |  * Copyright (C) 2021, Alex Richardson.  | 
13  |  |  * For conditions of distribution and use, see the accompanying README.ijg  | 
14  |  |  * file.  | 
15  |  |  *  | 
16  |  |  * This file contains Huffman entropy encoding routines for progressive JPEG.  | 
17  |  |  *  | 
18  |  |  * We do not support output suspension in this module, since the library  | 
19  |  |  * currently does not allow multiple-scan files to be written with output  | 
20  |  |  * suspension.  | 
21  |  |  */  | 
22  |  |  | 
23  |  | #define JPEG_INTERNALS  | 
24  |  | #include "jinclude.h"  | 
25  |  | #include "jpeglib.h"  | 
26  |  | #ifdef WITH_SIMD  | 
27  |  | #include "jsimd.h"  | 
28  |  | #else  | 
29  |  | #include "jchuff.h"             /* Declarations shared with jc*huff.c */  | 
30  |  | #endif  | 
31  |  | #include <limits.h>  | 
32  |  |  | 
33  |  | #ifdef HAVE_INTRIN_H  | 
34  |  | #include <intrin.h>  | 
35  |  | #ifdef _MSC_VER  | 
36  |  | #ifdef HAVE_BITSCANFORWARD64  | 
37  |  | #pragma intrinsic(_BitScanForward64)  | 
38  |  | #endif  | 
39  |  | #ifdef HAVE_BITSCANFORWARD  | 
40  |  | #pragma intrinsic(_BitScanForward)  | 
41  |  | #endif  | 
42  |  | #endif  | 
43  |  | #endif  | 
44  |  |  | 
45  |  | #ifdef C_PROGRESSIVE_SUPPORTED  | 
46  |  |  | 
47  |  | /*  | 
48  |  |  * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be  | 
49  |  |  * used for bit counting rather than the lookup table.  This will reduce the  | 
50  |  |  * memory footprint by 64k, which is important for some mobile applications  | 
51  |  |  * that create many isolated instances of libjpeg-turbo (web browsers, for  | 
52  |  |  * instance.)  This may improve performance on some mobile platforms as well.  | 
53  |  |  * This feature is enabled by default only on Arm processors, because some x86  | 
54  |  |  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be  | 
55  |  |  * shown to have a significant performance impact even on the x86 chips that  | 
56  |  |  * have a fast implementation of it.  When building for Armv6, you can  | 
57  |  |  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler  | 
58  |  |  * flags (this defines __thumb__).  | 
59  |  |  */  | 
60  |  |  | 
61  |  | /* NOTE: Both GCC and Clang define __GNUC__ */  | 
62  |  | #if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \  | 
63  |  |     defined(_M_ARM) || defined(_M_ARM64)  | 
64  |  | #if !defined(__thumb__) || defined(__thumb2__)  | 
65  |  | #define USE_CLZ_INTRINSIC  | 
66  |  | #endif  | 
67  |  | #endif  | 
68  |  |  | 
69  |  | #ifdef USE_CLZ_INTRINSIC  | 
70  |  | #if defined(_MSC_VER) && !defined(__clang__)  | 
71  |  | #define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))  | 
72  |  | #else  | 
73  |  | #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))  | 
74  |  | #endif  | 
75  |  | #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)  | 
76  |  | #else  | 
77  |  | #include "jpeg_nbits_table.h"  | 
78  | 31.6M  | #define JPEG_NBITS(x)          (jpeg_nbits_table[x])  | 
79  | 10.0M  | #define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)  | 
80  |  | #endif  | 
81  |  |  | 
82  |  |  | 
83  |  | /* Expanded entropy encoder object for progressive Huffman encoding. */  | 
84  |  |  | 
85  |  | typedef struct { | 
86  |  |   struct jpeg_entropy_encoder pub; /* public fields */  | 
87  |  |  | 
88  |  |   /* Pointer to routine to prepare data for encode_mcu_AC_first() */  | 
89  |  |   void (*AC_first_prepare) (const JCOEF *block,  | 
90  |  |                             const int *jpeg_natural_order_start, int Sl,  | 
91  |  |                             int Al, UJCOEF *values, size_t *zerobits);  | 
92  |  |   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */  | 
93  |  |   int (*AC_refine_prepare) (const JCOEF *block,  | 
94  |  |                             const int *jpeg_natural_order_start, int Sl,  | 
95  |  |                             int Al, UJCOEF *absvalues, size_t *bits);  | 
96  |  |  | 
97  |  |   /* Mode flag: TRUE for optimization, FALSE for actual data output */  | 
98  |  |   boolean gather_statistics;  | 
99  |  |  | 
100  |  |   /* Bit-level coding status.  | 
101  |  |    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.  | 
102  |  |    */  | 
103  |  |   JOCTET *next_output_byte;     /* => next byte to write in buffer */  | 
104  |  |   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */  | 
105  |  |   size_t put_buffer;            /* current bit-accumulation buffer */  | 
106  |  |   int put_bits;                 /* # of bits now in it */  | 
107  |  |   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */  | 
108  |  |  | 
109  |  |   /* Coding status for DC components */  | 
110  |  |   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */  | 
111  |  |  | 
112  |  |   /* Coding status for AC components */  | 
113  |  |   int ac_tbl_no;                /* the table number of the single component */  | 
114  |  |   unsigned int EOBRUN;          /* run length of EOBs */  | 
115  |  |   unsigned int BE;              /* # of buffered correction bits before MCU */  | 
116  |  |   char *bit_buffer;             /* buffer for correction bits (1 per char) */  | 
117  |  |   /* packing correction bits tightly would save some space but cost time... */  | 
118  |  |  | 
119  |  |   unsigned int restarts_to_go;  /* MCUs left in this restart interval */  | 
120  |  |   int next_restart_num;         /* next restart number to write (0-7) */  | 
121  |  |  | 
122  |  |   /* Pointers to derived tables (these workspaces have image lifespan).  | 
123  |  |    * Since any one scan codes only DC or only AC, we only need one set  | 
124  |  |    * of tables, not one for DC and one for AC.  | 
125  |  |    */  | 
126  |  |   c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];  | 
127  |  |  | 
128  |  |   /* Statistics tables for optimization; again, one set is enough */  | 
129  |  |   long *count_ptrs[NUM_HUFF_TBLS];  | 
130  |  | } phuff_entropy_encoder;  | 
131  |  |  | 
132  |  | typedef phuff_entropy_encoder *phuff_entropy_ptr;  | 
133  |  |  | 
134  |  | /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit  | 
135  |  |  * buffer can hold.  Larger sizes may slightly improve compression, but  | 
136  |  |  * 1000 is already well into the realm of overkill.  | 
137  |  |  * The minimum safe size is 64 bits.  | 
138  |  |  */  | 
139  |  |  | 
140  | 40.1M  | #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */  | 
141  |  |  | 
142  |  | /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.  | 
143  |  |  * We assume that int right shift is unsigned if JLONG right shift is,  | 
144  |  |  * which should be safe.  | 
145  |  |  */  | 
146  |  |  | 
147  |  | #ifdef RIGHT_SHIFT_IS_UNSIGNED  | 
148  |  | #define ISHIFT_TEMPS    int ishift_temp;  | 
149  |  | #define IRIGHT_SHIFT(x, shft) \  | 
150  |  |   ((ishift_temp = (x)) < 0 ? \  | 
151  |  |    (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \  | 
152  |  |    (ishift_temp >> (shft)))  | 
153  |  | #else  | 
154  |  | #define ISHIFT_TEMPS  | 
155  | 21.6M  | #define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))  | 
156  |  | #endif  | 
157  |  |  | 
158  | 81.3M  | #define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))  | 
159  |  |  | 
160  |  | /* Forward declarations */  | 
161  |  | METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,  | 
162  |  |                                        JBLOCKROW *MCU_data);  | 
163  |  | METHODDEF(void) encode_mcu_AC_first_prepare  | 
164  |  |   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,  | 
165  |  |    UJCOEF *values, size_t *zerobits);  | 
166  |  | METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,  | 
167  |  |                                        JBLOCKROW *MCU_data);  | 
168  |  | METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,  | 
169  |  |                                         JBLOCKROW *MCU_data);  | 
170  |  | METHODDEF(int) encode_mcu_AC_refine_prepare  | 
171  |  |   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,  | 
172  |  |    UJCOEF *absvalues, size_t *bits);  | 
173  |  | METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,  | 
174  |  |                                         JBLOCKROW *MCU_data);  | 
175  |  | METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);  | 
176  |  | METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);  | 
177  |  |  | 
178  |  |  | 
179  |  | /* Count bit loop zeroes */  | 
180  |  | INLINE  | 
181  |  | METHODDEF(int)  | 
182  |  | count_zeroes(size_t *x)  | 
183  | 52.6M  | { | 
184  | 52.6M  | #if defined(HAVE_BUILTIN_CTZL)  | 
185  | 52.6M  |   int result;  | 
186  | 52.6M  |   result = __builtin_ctzl(*x);  | 
187  | 52.6M  |   *x >>= result;  | 
188  |  | #elif defined(HAVE_BITSCANFORWARD64)  | 
189  |  |   unsigned long result;  | 
190  |  |   _BitScanForward64(&result, *x);  | 
191  |  |   *x >>= result;  | 
192  |  | #elif defined(HAVE_BITSCANFORWARD)  | 
193  |  |   unsigned long result;  | 
194  |  |   _BitScanForward(&result, *x);  | 
195  |  |   *x >>= result;  | 
196  |  | #else  | 
197  |  |   int result = 0;  | 
198  |  |   while ((*x & 1) == 0) { | 
199  |  |     ++result;  | 
200  |  |     *x >>= 1;  | 
201  |  |   }  | 
202  |  | #endif  | 
203  | 52.6M  |   return (int)result;  | 
204  | 52.6M  | }  | 
205  |  |  | 
206  |  |  | 
207  |  | /*  | 
208  |  |  * Initialize for a Huffman-compressed scan using progressive JPEG.  | 
209  |  |  */  | 
210  |  |  | 
211  |  | METHODDEF(void)  | 
212  |  | start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)  | 
213  | 33.9k  | { | 
214  | 33.9k  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
215  | 33.9k  |   boolean is_DC_band;  | 
216  | 33.9k  |   int ci, tbl;  | 
217  | 33.9k  |   jpeg_component_info *compptr;  | 
218  |  |  | 
219  | 33.9k  |   entropy->cinfo = cinfo;  | 
220  | 33.9k  |   entropy->gather_statistics = gather_statistics;  | 
221  |  |  | 
222  | 33.9k  |   is_DC_band = (cinfo->Ss == 0);  | 
223  |  |  | 
224  |  |   /* We assume jcmaster.c already validated the scan parameters. */  | 
225  |  |  | 
226  |  |   /* Select execution routines */  | 
227  | 33.9k  |   if (cinfo->Ah == 0) { | 
228  | 18.5k  |     if (is_DC_band)  | 
229  | 5.58k  |       entropy->pub.encode_mcu = encode_mcu_DC_first;  | 
230  | 12.9k  |     else  | 
231  | 12.9k  |       entropy->pub.encode_mcu = encode_mcu_AC_first;  | 
232  | 18.5k  | #ifdef WITH_SIMD  | 
233  | 18.5k  |     if (jsimd_can_encode_mcu_AC_first_prepare())  | 
234  | 18.5k  |       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;  | 
235  | 0  |     else  | 
236  | 0  | #endif  | 
237  | 0  |       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;  | 
238  | 18.5k  |   } else { | 
239  | 15.3k  |     if (is_DC_band)  | 
240  | 2.63k  |       entropy->pub.encode_mcu = encode_mcu_DC_refine;  | 
241  | 12.7k  |     else { | 
242  | 12.7k  |       entropy->pub.encode_mcu = encode_mcu_AC_refine;  | 
243  | 12.7k  | #ifdef WITH_SIMD  | 
244  | 12.7k  |       if (jsimd_can_encode_mcu_AC_refine_prepare())  | 
245  | 12.7k  |         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;  | 
246  | 0  |       else  | 
247  | 0  | #endif  | 
248  | 0  |         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;  | 
249  |  |       /* AC refinement needs a correction bit buffer */  | 
250  | 12.7k  |       if (entropy->bit_buffer == NULL)  | 
251  | 2.63k  |         entropy->bit_buffer = (char *)  | 
252  | 2.63k  |           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
253  | 2.63k  |                                       MAX_CORR_BITS * sizeof(char));  | 
254  | 12.7k  |     }  | 
255  | 15.3k  |   }  | 
256  | 33.9k  |   if (gather_statistics)  | 
257  | 15.7k  |     entropy->pub.finish_pass = finish_pass_gather_phuff;  | 
258  | 18.1k  |   else  | 
259  | 18.1k  |     entropy->pub.finish_pass = finish_pass_phuff;  | 
260  |  |  | 
261  |  |   /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1  | 
262  |  |    * for AC coefficients.  | 
263  |  |    */  | 
264  | 70.1k  |   for (ci = 0; ci < cinfo->comps_in_scan; ci++) { | 
265  | 36.2k  |     compptr = cinfo->cur_comp_info[ci];  | 
266  |  |     /* Initialize DC predictions to 0 */  | 
267  | 36.2k  |     entropy->last_dc_val[ci] = 0;  | 
268  |  |     /* Get table index */  | 
269  | 36.2k  |     if (is_DC_band) { | 
270  | 10.5k  |       if (cinfo->Ah != 0)       /* DC refinement needs no table */  | 
271  | 3.36k  |         continue;  | 
272  | 7.17k  |       tbl = compptr->dc_tbl_no;  | 
273  | 25.6k  |     } else { | 
274  | 25.6k  |       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;  | 
275  | 25.6k  |     }  | 
276  | 32.8k  |     if (gather_statistics) { | 
277  |  |       /* Check for invalid table index */  | 
278  |  |       /* (make_c_derived_tbl does this in the other path) */  | 
279  | 16.5k  |       if (tbl < 0 || tbl >= NUM_HUFF_TBLS)  | 
280  | 0  |         ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);  | 
281  |  |       /* Allocate and zero the statistics tables */  | 
282  |  |       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */  | 
283  | 16.5k  |       if (entropy->count_ptrs[tbl] == NULL)  | 
284  | 3.07k  |         entropy->count_ptrs[tbl] = (long *)  | 
285  | 3.07k  |           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
286  | 3.07k  |                                       257 * sizeof(long));  | 
287  | 16.5k  |       memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));  | 
288  | 16.5k  |     } else { | 
289  |  |       /* Compute derived values for Huffman table */  | 
290  |  |       /* We may do this more than once for a table, but it's not expensive */  | 
291  | 16.3k  |       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,  | 
292  | 16.3k  |                               &entropy->derived_tbls[tbl]);  | 
293  | 16.3k  |     }  | 
294  | 32.8k  |   }  | 
295  |  |  | 
296  |  |   /* Initialize AC stuff */  | 
297  | 33.9k  |   entropy->EOBRUN = 0;  | 
298  | 33.9k  |   entropy->BE = 0;  | 
299  |  |  | 
300  |  |   /* Initialize bit buffer to empty */  | 
301  | 33.9k  |   entropy->put_buffer = 0;  | 
302  | 33.9k  |   entropy->put_bits = 0;  | 
303  |  |  | 
304  |  |   /* Initialize restart stuff */  | 
305  | 33.9k  |   entropy->restarts_to_go = cinfo->restart_interval;  | 
306  | 33.9k  |   entropy->next_restart_num = 0;  | 
307  | 33.9k  | }  | 
308  |  |  | 
309  |  |  | 
310  |  | /* Outputting bytes to the file.  | 
311  |  |  * NB: these must be called only when actually outputting,  | 
312  |  |  * that is, entropy->gather_statistics == FALSE.  | 
313  |  |  */  | 
314  |  |  | 
315  |  | /* Emit a byte */  | 
316  | 10.7M  | #define emit_byte(entropy, val) { \ | 
317  | 10.7M  |   *(entropy)->next_output_byte++ = (JOCTET)(val); \  | 
318  | 10.7M  |   if (--(entropy)->free_in_buffer == 0) \  | 
319  | 10.7M  |     dump_buffer(entropy); \  | 
320  | 10.7M  | }  | 
321  |  |  | 
322  |  |  | 
323  |  | LOCAL(void)  | 
324  |  | dump_buffer(phuff_entropy_ptr entropy)  | 
325  |  | /* Empty the output buffer; we do not support suspension in this module. */  | 
326  | 294  | { | 
327  | 294  |   struct jpeg_destination_mgr *dest = entropy->cinfo->dest;  | 
328  |  |  | 
329  | 294  |   if (!(*dest->empty_output_buffer) (entropy->cinfo))  | 
330  | 0  |     ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);  | 
331  |  |   /* After a successful buffer dump, must reset buffer pointers */  | 
332  | 294  |   entropy->next_output_byte = dest->next_output_byte;  | 
333  | 294  |   entropy->free_in_buffer = dest->free_in_buffer;  | 
334  | 294  | }  | 
335  |  |  | 
336  |  |  | 
337  |  | /* Outputting bits to the file */  | 
338  |  |  | 
339  |  | /* Only the right 24 bits of put_buffer are used; the valid bits are  | 
340  |  |  * left-justified in this part.  At most 16 bits can be passed to emit_bits  | 
341  |  |  * in one call, and we never retain more than 7 bits in put_buffer  | 
342  |  |  * between calls, so 24 bits are sufficient.  | 
343  |  |  */  | 
344  |  |  | 
345  |  | LOCAL(void)  | 
346  |  | emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)  | 
347  |  | /* Emit some bits, unless we are in gather mode */  | 
348  | 81.1M  | { | 
349  |  |   /* This routine is heavily used, so it's worth coding tightly. */  | 
350  | 81.1M  |   register size_t put_buffer = (size_t)code;  | 
351  | 81.1M  |   register int put_bits = entropy->put_bits;  | 
352  |  |  | 
353  |  |   /* if size is 0, caller used an invalid Huffman table entry */  | 
354  | 81.1M  |   if (size == 0)  | 
355  | 0  |     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);  | 
356  |  |  | 
357  | 81.1M  |   if (entropy->gather_statistics)  | 
358  | 16.2M  |     return;                     /* do nothing if we're only getting stats */  | 
359  |  |  | 
360  | 64.8M  |   put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */  | 
361  |  |  | 
362  | 64.8M  |   put_bits += size;             /* new number of bits in buffer */  | 
363  |  |  | 
364  | 64.8M  |   put_buffer <<= 24 - put_bits; /* align incoming bits */  | 
365  |  |  | 
366  | 64.8M  |   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */  | 
367  |  |  | 
368  | 75.0M  |   while (put_bits >= 8) { | 
369  | 10.1M  |     int c = (int)((put_buffer >> 16) & 0xFF);  | 
370  |  |  | 
371  | 10.1M  |     emit_byte(entropy, c);  | 
372  | 10.1M  |     if (c == 0xFF) {            /* need to stuff a zero byte? */ | 
373  | 523k  |       emit_byte(entropy, 0);  | 
374  | 523k  |     }  | 
375  | 10.1M  |     put_buffer <<= 8;  | 
376  | 10.1M  |     put_bits -= 8;  | 
377  | 10.1M  |   }  | 
378  |  |  | 
379  | 64.8M  |   entropy->put_buffer = put_buffer; /* update variables */  | 
380  | 64.8M  |   entropy->put_bits = put_bits;  | 
381  | 64.8M  | }  | 
382  |  |  | 
383  |  |  | 
384  |  | LOCAL(void)  | 
385  |  | flush_bits(phuff_entropy_ptr entropy)  | 
386  | 18.1k  | { | 
387  | 18.1k  |   emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */  | 
388  | 18.1k  |   entropy->put_buffer = 0;     /* and reset bit-buffer to empty */  | 
389  | 18.1k  |   entropy->put_bits = 0;  | 
390  | 18.1k  | }  | 
391  |  |  | 
392  |  |  | 
393  |  | /*  | 
394  |  |  * Emit (or just count) a Huffman symbol.  | 
395  |  |  */  | 
396  |  |  | 
397  |  | LOCAL(void)  | 
398  |  | emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)  | 
399  | 55.9M  | { | 
400  | 55.9M  |   if (entropy->gather_statistics)  | 
401  | 27.9M  |     entropy->count_ptrs[tbl_no][symbol]++;  | 
402  | 27.9M  |   else { | 
403  | 27.9M  |     c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];  | 
404  | 27.9M  |     emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);  | 
405  | 27.9M  |   }  | 
406  | 55.9M  | }  | 
407  |  |  | 
408  |  |  | 
409  |  | /*  | 
410  |  |  * Emit bits from a correction bit buffer.  | 
411  |  |  */  | 
412  |  |  | 
413  |  | LOCAL(void)  | 
414  |  | emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,  | 
415  |  |                    unsigned int nbits)  | 
416  | 25.3M  | { | 
417  | 25.3M  |   if (entropy->gather_statistics)  | 
418  | 12.6M  |     return;                     /* no real work */  | 
419  |  |  | 
420  | 22.9M  |   while (nbits > 0) { | 
421  | 10.2M  |     emit_bits(entropy, (unsigned int)(*bufstart), 1);  | 
422  | 10.2M  |     bufstart++;  | 
423  | 10.2M  |     nbits--;  | 
424  | 10.2M  |   }  | 
425  | 12.6M  | }  | 
426  |  |  | 
427  |  |  | 
428  |  | /*  | 
429  |  |  * Emit any pending EOBRUN symbol.  | 
430  |  |  */  | 
431  |  |  | 
432  |  | LOCAL(void)  | 
433  |  | emit_eobrun(phuff_entropy_ptr entropy)  | 
434  | 24.5M  | { | 
435  | 24.5M  |   register int temp, nbits;  | 
436  |  |  | 
437  | 24.5M  |   if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */ | 
438  | 1.39M  |     temp = entropy->EOBRUN;  | 
439  | 1.39M  |     nbits = JPEG_NBITS_NONZERO(temp) - 1;  | 
440  |  |     /* safety check: shouldn't happen given limited correction-bit buffer */  | 
441  | 1.39M  |     if (nbits > 14)  | 
442  | 0  |       ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);  | 
443  |  |  | 
444  | 1.39M  |     emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);  | 
445  | 1.39M  |     if (nbits)  | 
446  | 168k  |       emit_bits(entropy, entropy->EOBRUN, nbits);  | 
447  |  |  | 
448  | 1.39M  |     entropy->EOBRUN = 0;  | 
449  |  |  | 
450  |  |     /* Emit any buffered correction bits */  | 
451  | 1.39M  |     emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);  | 
452  | 1.39M  |     entropy->BE = 0;  | 
453  | 1.39M  |   }  | 
454  | 24.5M  | }  | 
455  |  |  | 
456  |  |  | 
457  |  | /*  | 
458  |  |  * Emit a restart marker & resynchronize predictions.  | 
459  |  |  */  | 
460  |  |  | 
461  |  | LOCAL(void)  | 
462  |  | emit_restart(phuff_entropy_ptr entropy, int restart_num)  | 
463  | 0  | { | 
464  | 0  |   int ci;  | 
465  |  | 
  | 
466  | 0  |   emit_eobrun(entropy);  | 
467  |  | 
  | 
468  | 0  |   if (!entropy->gather_statistics) { | 
469  | 0  |     flush_bits(entropy);  | 
470  | 0  |     emit_byte(entropy, 0xFF);  | 
471  | 0  |     emit_byte(entropy, JPEG_RST0 + restart_num);  | 
472  | 0  |   }  | 
473  |  | 
  | 
474  | 0  |   if (entropy->cinfo->Ss == 0) { | 
475  |  |     /* Re-initialize DC predictions to 0 */  | 
476  | 0  |     for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)  | 
477  | 0  |       entropy->last_dc_val[ci] = 0;  | 
478  | 0  |   } else { | 
479  |  |     /* Re-initialize all AC-related fields to 0 */  | 
480  | 0  |     entropy->EOBRUN = 0;  | 
481  | 0  |     entropy->BE = 0;  | 
482  | 0  |   }  | 
483  | 0  | }  | 
484  |  |  | 
485  |  |  | 
486  |  | /*  | 
487  |  |  * MCU encoding for DC initial scan (either spectral selection,  | 
488  |  |  * or first pass of successive approximation).  | 
489  |  |  */  | 
490  |  |  | 
491  |  | METHODDEF(boolean)  | 
492  |  | encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)  | 
493  | 17.5M  | { | 
494  | 17.5M  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
495  | 17.5M  |   register int temp, temp2, temp3;  | 
496  | 17.5M  |   register int nbits;  | 
497  | 17.5M  |   int blkn, ci;  | 
498  | 17.5M  |   int Al = cinfo->Al;  | 
499  | 17.5M  |   JBLOCKROW block;  | 
500  | 17.5M  |   jpeg_component_info *compptr;  | 
501  | 17.5M  |   ISHIFT_TEMPS  | 
502  | 17.5M  |   int max_coef_bits = cinfo->data_precision + 2;  | 
503  |  |  | 
504  | 17.5M  |   entropy->next_output_byte = cinfo->dest->next_output_byte;  | 
505  | 17.5M  |   entropy->free_in_buffer = cinfo->dest->free_in_buffer;  | 
506  |  |  | 
507  |  |   /* Emit restart marker if needed */  | 
508  | 17.5M  |   if (cinfo->restart_interval)  | 
509  | 0  |     if (entropy->restarts_to_go == 0)  | 
510  | 0  |       emit_restart(entropy, entropy->next_restart_num);  | 
511  |  |  | 
512  |  |   /* Encode the MCU data blocks */  | 
513  | 39.1M  |   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { | 
514  | 21.6M  |     block = MCU_data[blkn];  | 
515  | 21.6M  |     ci = cinfo->MCU_membership[blkn];  | 
516  | 21.6M  |     compptr = cinfo->cur_comp_info[ci];  | 
517  |  |  | 
518  |  |     /* Compute the DC value after the required point transform by Al.  | 
519  |  |      * This is simply an arithmetic right shift.  | 
520  |  |      */  | 
521  | 21.6M  |     temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);  | 
522  |  |  | 
523  |  |     /* DC differences are figured on the point-transformed values. */  | 
524  | 21.6M  |     temp = temp2 - entropy->last_dc_val[ci];  | 
525  | 21.6M  |     entropy->last_dc_val[ci] = temp2;  | 
526  |  |  | 
527  |  |     /* Encode the DC coefficient difference per section G.1.2.1 */  | 
528  |  |  | 
529  |  |     /* This is a well-known technique for obtaining the absolute value without  | 
530  |  |      * a branch.  It is derived from an assembly language technique presented  | 
531  |  |      * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,  | 
532  |  |      * 1997 by Agner Fog.  | 
533  |  |      */  | 
534  | 21.6M  |     temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);  | 
535  | 21.6M  |     temp ^= temp3;  | 
536  | 21.6M  |     temp -= temp3;              /* temp is abs value of input */  | 
537  |  |     /* For a negative input, want temp2 = bitwise complement of abs(input) */  | 
538  | 21.6M  |     temp2 = temp ^ temp3;  | 
539  |  |  | 
540  |  |     /* Find the number of bits needed for the magnitude of the coefficient */  | 
541  | 21.6M  |     nbits = JPEG_NBITS(temp);  | 
542  |  |     /* Check for out-of-range coefficient values.  | 
543  |  |      * Since we're encoding a difference, the range limit is twice as much.  | 
544  |  |      */  | 
545  | 21.6M  |     if (nbits > max_coef_bits + 1)  | 
546  | 128  |       ERREXIT(cinfo, JERR_BAD_DCT_COEF);  | 
547  |  |  | 
548  |  |     /* Count/emit the Huffman-coded symbol for the number of bits */  | 
549  | 21.6M  |     emit_symbol(entropy, compptr->dc_tbl_no, nbits);  | 
550  |  |  | 
551  |  |     /* Emit that number of bits of the value, if positive, */  | 
552  |  |     /* or the complement of its magnitude, if negative. */  | 
553  | 21.6M  |     if (nbits)                  /* emit_bits rejects calls with size 0 */  | 
554  | 248k  |       emit_bits(entropy, (unsigned int)temp2, nbits);  | 
555  | 21.6M  |   }  | 
556  |  |  | 
557  | 17.5M  |   cinfo->dest->next_output_byte = entropy->next_output_byte;  | 
558  | 17.5M  |   cinfo->dest->free_in_buffer = entropy->free_in_buffer;  | 
559  |  |  | 
560  |  |   /* Update restart-interval state too */  | 
561  | 17.5M  |   if (cinfo->restart_interval) { | 
562  | 0  |     if (entropy->restarts_to_go == 0) { | 
563  | 0  |       entropy->restarts_to_go = cinfo->restart_interval;  | 
564  | 0  |       entropy->next_restart_num++;  | 
565  | 0  |       entropy->next_restart_num &= 7;  | 
566  | 0  |     }  | 
567  | 0  |     entropy->restarts_to_go--;  | 
568  | 0  |   }  | 
569  |  |  | 
570  | 17.5M  |   return TRUE;  | 
571  | 17.5M  | }  | 
572  |  |  | 
573  |  |  | 
574  |  | /*  | 
575  |  |  * Data preparation for encode_mcu_AC_first().  | 
576  |  |  */  | 
577  |  |  | 
578  | 0  | #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ | 
579  | 0  |   for (k = 0; k < Sl; k++) { \ | 
580  | 0  |     temp = block[jpeg_natural_order_start[k]]; \  | 
581  | 0  |     if (temp == 0) \  | 
582  | 0  |       continue; \  | 
583  | 0  |     /* We must apply the point transform by Al.  For AC coefficients this \  | 
584  | 0  |      * is an integer division with rounding towards 0.  To do this portably \  | 
585  | 0  |      * in C, we shift after obtaining the absolute value; so the code is \  | 
586  | 0  |      * interwoven with finding the abs value (temp) and output bits (temp2). \  | 
587  | 0  |      */ \  | 
588  | 0  |     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \  | 
589  | 0  |     temp ^= temp2; \  | 
590  | 0  |     temp -= temp2;              /* temp is abs value of input */ \  | 
591  | 0  |     temp >>= Al;                /* apply the point transform */ \  | 
592  | 0  |     /* Watch out for case that nonzero coef is zero after point transform */ \  | 
593  | 0  |     if (temp == 0) \  | 
594  | 0  |       continue; \  | 
595  | 0  |     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \  | 
596  | 0  |     temp2 ^= temp; \  | 
597  | 0  |     values[k] = (UJCOEF)temp; \  | 
598  | 0  |     values[k + DCTSIZE2] = (UJCOEF)temp2; \  | 
599  | 0  |     zerobits |= ((size_t)1U) << k; \  | 
600  | 0  |   } \  | 
601  | 0  | }  | 
602  |  |  | 
603  |  | METHODDEF(void)  | 
604  |  | encode_mcu_AC_first_prepare(const JCOEF *block,  | 
605  |  |                             const int *jpeg_natural_order_start, int Sl,  | 
606  |  |                             int Al, UJCOEF *values, size_t *bits)  | 
607  | 0  | { | 
608  | 0  |   register int k, temp, temp2;  | 
609  | 0  |   size_t zerobits = 0U;  | 
610  | 0  |   int Sl0 = Sl;  | 
611  |  | 
  | 
612  |  | #if SIZEOF_SIZE_T == 4  | 
613  |  |   if (Sl0 > 32)  | 
614  |  |     Sl0 = 32;  | 
615  |  | #endif  | 
616  |  | 
  | 
617  | 0  |   COMPUTE_ABSVALUES_AC_FIRST(Sl0);  | 
618  |  | 
  | 
619  | 0  |   bits[0] = zerobits;  | 
620  |  | #if SIZEOF_SIZE_T == 4  | 
621  |  |   zerobits = 0U;  | 
622  |  |  | 
623  |  |   if (Sl > 32) { | 
624  |  |     Sl -= 32;  | 
625  |  |     jpeg_natural_order_start += 32;  | 
626  |  |     values += 32;  | 
627  |  |  | 
628  |  |     COMPUTE_ABSVALUES_AC_FIRST(Sl);  | 
629  |  |   }  | 
630  |  |   bits[1] = zerobits;  | 
631  |  | #endif  | 
632  | 0  | }  | 
633  |  |  | 
634  |  | /*  | 
635  |  |  * MCU encoding for AC initial scan (either spectral selection,  | 
636  |  |  * or first pass of successive approximation).  | 
637  |  |  */  | 
638  |  |  | 
639  | 40.9M  | #define ENCODE_COEFS_AC_FIRST(label) { \ | 
640  | 49.5M  |   while (zerobits) { \ | 
641  | 8.65M  |     r = count_zeroes(&zerobits); \  | 
642  | 8.65M  |     cvalue += r; \  | 
643  | 8.65M  | label \  | 
644  | 8.65M  |     temp  = cvalue[0]; \  | 
645  | 8.65M  |     temp2 = cvalue[DCTSIZE2]; \  | 
646  | 8.65M  |     \  | 
647  | 8.65M  |     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \  | 
648  | 8.93M  |     while (r > 15) { \ | 
649  | 282k  |       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \  | 
650  | 282k  |       r -= 16; \  | 
651  | 282k  |     } \  | 
652  | 8.65M  |     \  | 
653  | 8.65M  |     /* Find the number of bits needed for the magnitude of the coefficient */ \  | 
654  | 8.65M  |     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \  | 
655  | 8.65M  |     /* Check for out-of-range coefficient values */ \  | 
656  | 8.65M  |     if (nbits > max_coef_bits) \  | 
657  | 8.65M  |       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \  | 
658  | 8.65M  |     \  | 
659  | 8.65M  |     /* Count/emit Huffman symbol for run length / number of bits */ \  | 
660  | 8.65M  |     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \  | 
661  | 8.65M  |     \  | 
662  | 8.65M  |     /* Emit that number of bits of the value, if positive, */ \  | 
663  | 8.65M  |     /* or the complement of its magnitude, if negative. */ \  | 
664  | 8.65M  |     emit_bits(entropy, (unsigned int)temp2, nbits); \  | 
665  | 8.65M  |     \  | 
666  | 8.65M  |     cvalue++; \  | 
667  | 8.65M  |     zerobits >>= 1; \  | 
668  | 8.65M  |   } \  | 
669  | 40.9M  | }  | 
670  |  |  | 
671  |  | METHODDEF(boolean)  | 
672  |  | encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)  | 
673  | 40.9M  | { | 
674  | 40.9M  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
675  | 40.9M  |   register int temp, temp2;  | 
676  | 40.9M  |   register int nbits, r;  | 
677  | 40.9M  |   int Sl = cinfo->Se - cinfo->Ss + 1;  | 
678  | 40.9M  |   int Al = cinfo->Al;  | 
679  | 40.9M  |   UJCOEF values_unaligned[2 * DCTSIZE2 + 15];  | 
680  | 40.9M  |   UJCOEF *values;  | 
681  | 40.9M  |   const UJCOEF *cvalue;  | 
682  | 40.9M  |   size_t zerobits;  | 
683  | 40.9M  |   size_t bits[8 / SIZEOF_SIZE_T];  | 
684  | 40.9M  |   int max_coef_bits = cinfo->data_precision + 2;  | 
685  |  |  | 
686  | 40.9M  |   entropy->next_output_byte = cinfo->dest->next_output_byte;  | 
687  | 40.9M  |   entropy->free_in_buffer = cinfo->dest->free_in_buffer;  | 
688  |  |  | 
689  |  |   /* Emit restart marker if needed */  | 
690  | 40.9M  |   if (cinfo->restart_interval)  | 
691  | 0  |     if (entropy->restarts_to_go == 0)  | 
692  | 0  |       emit_restart(entropy, entropy->next_restart_num);  | 
693  |  |  | 
694  | 40.9M  | #ifdef WITH_SIMD  | 
695  | 40.9M  |   cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);  | 
696  |  | #else  | 
697  |  |   /* Not using SIMD, so alignment is not needed */  | 
698  |  |   cvalue = values = values_unaligned;  | 
699  |  | #endif  | 
700  |  |  | 
701  |  |   /* Prepare data */  | 
702  | 40.9M  |   entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,  | 
703  | 40.9M  |                             Sl, Al, values, bits);  | 
704  |  |  | 
705  | 40.9M  |   zerobits = bits[0];  | 
706  |  | #if SIZEOF_SIZE_T == 4  | 
707  |  |   zerobits |= bits[1];  | 
708  |  | #endif  | 
709  |  |  | 
710  |  |   /* Emit any pending EOBRUN */  | 
711  | 40.9M  |   if (zerobits && (entropy->EOBRUN > 0))  | 
712  | 549k  |     emit_eobrun(entropy);  | 
713  |  |  | 
714  |  | #if SIZEOF_SIZE_T == 4  | 
715  |  |   zerobits = bits[0];  | 
716  |  | #endif  | 
717  |  |  | 
718  |  |   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */  | 
719  |  |  | 
720  | 40.9M  |   ENCODE_COEFS_AC_FIRST((void)0;);  | 
721  |  |  | 
722  |  | #if SIZEOF_SIZE_T == 4  | 
723  |  |   zerobits = bits[1];  | 
724  |  |   if (zerobits) { | 
725  |  |     int diff = ((values + DCTSIZE2 / 2) - cvalue);  | 
726  |  |     r = count_zeroes(&zerobits);  | 
727  |  |     r += diff;  | 
728  |  |     cvalue += r;  | 
729  |  |     goto first_iter_ac_first;  | 
730  |  |   }  | 
731  |  |  | 
732  |  |   ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);  | 
733  |  | #endif  | 
734  |  |  | 
735  | 40.9M  |   if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ | 
736  | 40.7M  |     entropy->EOBRUN++;          /* count an EOB */  | 
737  | 40.7M  |     if (entropy->EOBRUN == 0x7FFF)  | 
738  | 0  |       emit_eobrun(entropy);     /* force it out to avoid overflow */  | 
739  | 40.7M  |   }  | 
740  |  |  | 
741  | 40.9M  |   cinfo->dest->next_output_byte = entropy->next_output_byte;  | 
742  | 40.9M  |   cinfo->dest->free_in_buffer = entropy->free_in_buffer;  | 
743  |  |  | 
744  |  |   /* Update restart-interval state too */  | 
745  | 40.9M  |   if (cinfo->restart_interval) { | 
746  | 0  |     if (entropy->restarts_to_go == 0) { | 
747  | 0  |       entropy->restarts_to_go = cinfo->restart_interval;  | 
748  | 0  |       entropy->next_restart_num++;  | 
749  | 0  |       entropy->next_restart_num &= 7;  | 
750  | 0  |     }  | 
751  | 0  |     entropy->restarts_to_go--;  | 
752  | 0  |   }  | 
753  |  |  | 
754  | 40.9M  |   return TRUE;  | 
755  | 40.9M  | }  | 
756  |  |  | 
757  |  |  | 
758  |  | /*  | 
759  |  |  * MCU encoding for DC successive approximation refinement scan.  | 
760  |  |  * Note: we assume such scans can be multi-component, although the spec  | 
761  |  |  * is not very clear on the point.  | 
762  |  |  */  | 
763  |  |  | 
764  |  | METHODDEF(boolean)  | 
765  |  | encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)  | 
766  | 8.57M  | { | 
767  | 8.57M  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
768  | 8.57M  |   register int temp;  | 
769  | 8.57M  |   int blkn;  | 
770  | 8.57M  |   int Al = cinfo->Al;  | 
771  | 8.57M  |   JBLOCKROW block;  | 
772  |  |  | 
773  | 8.57M  |   entropy->next_output_byte = cinfo->dest->next_output_byte;  | 
774  | 8.57M  |   entropy->free_in_buffer = cinfo->dest->free_in_buffer;  | 
775  |  |  | 
776  |  |   /* Emit restart marker if needed */  | 
777  | 8.57M  |   if (cinfo->restart_interval)  | 
778  | 0  |     if (entropy->restarts_to_go == 0)  | 
779  | 0  |       emit_restart(entropy, entropy->next_restart_num);  | 
780  |  |  | 
781  |  |   /* Encode the MCU data blocks */  | 
782  | 18.9M  |   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { | 
783  | 10.4M  |     block = MCU_data[blkn];  | 
784  |  |  | 
785  |  |     /* We simply emit the Al'th bit of the DC coefficient value. */  | 
786  | 10.4M  |     temp = (*block)[0];  | 
787  | 10.4M  |     emit_bits(entropy, (unsigned int)(temp >> Al), 1);  | 
788  | 10.4M  |   }  | 
789  |  |  | 
790  | 8.57M  |   cinfo->dest->next_output_byte = entropy->next_output_byte;  | 
791  | 8.57M  |   cinfo->dest->free_in_buffer = entropy->free_in_buffer;  | 
792  |  |  | 
793  |  |   /* Update restart-interval state too */  | 
794  | 8.57M  |   if (cinfo->restart_interval) { | 
795  | 0  |     if (entropy->restarts_to_go == 0) { | 
796  | 0  |       entropy->restarts_to_go = cinfo->restart_interval;  | 
797  | 0  |       entropy->next_restart_num++;  | 
798  | 0  |       entropy->next_restart_num &= 7;  | 
799  | 0  |     }  | 
800  | 0  |     entropy->restarts_to_go--;  | 
801  | 0  |   }  | 
802  |  |  | 
803  | 8.57M  |   return TRUE;  | 
804  | 8.57M  | }  | 
805  |  |  | 
806  |  |  | 
807  |  | /*  | 
808  |  |  * Data preparation for encode_mcu_AC_refine().  | 
809  |  |  */  | 
810  |  |  | 
811  | 0  | #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ | 
812  | 0  |   /* It is convenient to make a pre-pass to determine the transformed \  | 
813  | 0  |    * coefficients' absolute values and the EOB position. \  | 
814  | 0  |    */ \  | 
815  | 0  |   for (k = 0; k < Sl; k++) { \ | 
816  | 0  |     temp = block[jpeg_natural_order_start[k]]; \  | 
817  | 0  |     /* We must apply the point transform by Al.  For AC coefficients this \  | 
818  | 0  |      * is an integer division with rounding towards 0.  To do this portably \  | 
819  | 0  |      * in C, we shift after obtaining the absolute value. \  | 
820  | 0  |      */ \  | 
821  | 0  |     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \  | 
822  | 0  |     temp ^= temp2; \  | 
823  | 0  |     temp -= temp2;              /* temp is abs value of input */ \  | 
824  | 0  |     temp >>= Al;                /* apply the point transform */ \  | 
825  | 0  |     if (temp != 0) { \ | 
826  | 0  |       zerobits |= ((size_t)1U) << k; \  | 
827  | 0  |       signbits |= ((size_t)(temp2 + 1)) << k; \  | 
828  | 0  |     } \  | 
829  | 0  |     absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \  | 
830  | 0  |     if (temp == 1) \  | 
831  | 0  |       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \  | 
832  | 0  |   } \  | 
833  | 0  | }  | 
834  |  |  | 
835  |  | METHODDEF(int)  | 
836  |  | encode_mcu_AC_refine_prepare(const JCOEF *block,  | 
837  |  |                              const int *jpeg_natural_order_start, int Sl,  | 
838  |  |                              int Al, UJCOEF *absvalues, size_t *bits)  | 
839  | 0  | { | 
840  | 0  |   register int k, temp, temp2;  | 
841  | 0  |   int EOB = 0;  | 
842  | 0  |   size_t zerobits = 0U, signbits = 0U;  | 
843  | 0  |   int Sl0 = Sl;  | 
844  |  | 
  | 
845  |  | #if SIZEOF_SIZE_T == 4  | 
846  |  |   if (Sl0 > 32)  | 
847  |  |     Sl0 = 32;  | 
848  |  | #endif  | 
849  |  | 
  | 
850  | 0  |   COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);  | 
851  |  | 
  | 
852  | 0  |   bits[0] = zerobits;  | 
853  | 0  | #if SIZEOF_SIZE_T == 8  | 
854  | 0  |   bits[1] = signbits;  | 
855  |  | #else  | 
856  |  |   bits[2] = signbits;  | 
857  |  |  | 
858  |  |   zerobits = 0U;  | 
859  |  |   signbits = 0U;  | 
860  |  |  | 
861  |  |   if (Sl > 32) { | 
862  |  |     Sl -= 32;  | 
863  |  |     jpeg_natural_order_start += 32;  | 
864  |  |     absvalues += 32;  | 
865  |  |  | 
866  |  |     COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);  | 
867  |  |   }  | 
868  |  |  | 
869  |  |   bits[1] = zerobits;  | 
870  |  |   bits[3] = signbits;  | 
871  |  | #endif  | 
872  |  | 
  | 
873  | 0  |   return EOB;  | 
874  | 0  | }  | 
875  |  |  | 
876  |  |  | 
877  |  | /*  | 
878  |  |  * MCU encoding for AC successive approximation refinement scan.  | 
879  |  |  */  | 
880  |  |  | 
881  | 40.4M  | #define ENCODE_COEFS_AC_REFINE(label) { \ | 
882  | 84.4M  |   while (zerobits) { \ | 
883  | 43.9M  |     idx = count_zeroes(&zerobits); \  | 
884  | 43.9M  |     r += idx; \  | 
885  | 43.9M  |     cabsvalue += idx; \  | 
886  | 43.9M  |     signbits >>= idx; \  | 
887  | 43.9M  | label \  | 
888  | 43.9M  |     /* Emit any required ZRLs, but not if they can be folded into EOB */ \  | 
889  | 44.4M  |     while (r > 15 && (cabsvalue <= EOBPTR)) { \ | 
890  | 464k  |       /* emit any pending EOBRUN and the BE correction bits */ \  | 
891  | 464k  |       emit_eobrun(entropy); \  | 
892  | 464k  |       /* Emit ZRL */ \  | 
893  | 464k  |       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \  | 
894  | 464k  |       r -= 16; \  | 
895  | 464k  |       /* Emit buffered correction bits that must be associated with ZRL */ \  | 
896  | 464k  |       emit_buffered_bits(entropy, BR_buffer, BR); \  | 
897  | 464k  |       BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \  | 
898  | 464k  |       BR = 0; \  | 
899  | 464k  |     } \  | 
900  | 43.9M  |     \  | 
901  | 43.9M  |     temp = *cabsvalue++; \  | 
902  | 43.9M  |     \  | 
903  | 43.9M  |     /* If the coef was previously nonzero, it only needs a correction bit. \  | 
904  | 43.9M  |      * NOTE: a straight translation of the spec's figure G.7 would suggest \  | 
905  | 43.9M  |      * that we also need to test r > 15.  But if r > 15, we can only get here \  | 
906  | 43.9M  |      * if k > EOB, which implies that this coefficient is not 1. \  | 
907  | 43.9M  |      */ \  | 
908  | 43.9M  |     if (temp > 1) { \ | 
909  | 20.4M  |       /* The correction bit is the next bit of the absolute value. */ \  | 
910  | 20.4M  |       BR_buffer[BR++] = (char)(temp & 1); \  | 
911  | 20.4M  |       signbits >>= 1; \  | 
912  | 20.4M  |       zerobits >>= 1; \  | 
913  | 20.4M  |       continue; \  | 
914  | 20.4M  |     } \  | 
915  | 43.9M  |     \  | 
916  | 43.9M  |     /* Emit any pending EOBRUN and the BE correction bits */ \  | 
917  | 43.9M  |     emit_eobrun(entropy); \  | 
918  | 23.5M  |     \  | 
919  | 23.5M  |     /* Count/emit Huffman symbol for run length / number of bits */ \  | 
920  | 23.5M  |     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \  | 
921  | 23.5M  |     \  | 
922  | 23.5M  |     /* Emit output bit for newly-nonzero coef */ \  | 
923  | 23.5M  |     temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \  | 
924  | 23.5M  |     emit_bits(entropy, (unsigned int)temp, 1); \  | 
925  | 23.5M  |     \  | 
926  | 23.5M  |     /* Emit buffered correction bits that must be associated with this code */ \  | 
927  | 23.5M  |     emit_buffered_bits(entropy, BR_buffer, BR); \  | 
928  | 23.5M  |     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \  | 
929  | 23.5M  |     BR = 0; \  | 
930  | 23.5M  |     r = 0;                      /* reset zero run length */ \  | 
931  | 23.5M  |     signbits >>= 1; \  | 
932  | 23.5M  |     zerobits >>= 1; \  | 
933  | 23.5M  |   } \  | 
934  | 40.4M  | }  | 
935  |  |  | 
936  |  | METHODDEF(boolean)  | 
937  |  | encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)  | 
938  | 40.4M  | { | 
939  | 40.4M  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
940  | 40.4M  |   register int temp, r, idx;  | 
941  | 40.4M  |   char *BR_buffer;  | 
942  | 40.4M  |   unsigned int BR;  | 
943  | 40.4M  |   int Sl = cinfo->Se - cinfo->Ss + 1;  | 
944  | 40.4M  |   int Al = cinfo->Al;  | 
945  | 40.4M  |   UJCOEF absvalues_unaligned[DCTSIZE2 + 15];  | 
946  | 40.4M  |   UJCOEF *absvalues;  | 
947  | 40.4M  |   const UJCOEF *cabsvalue, *EOBPTR;  | 
948  | 40.4M  |   size_t zerobits, signbits;  | 
949  | 40.4M  |   size_t bits[16 / SIZEOF_SIZE_T];  | 
950  |  |  | 
951  | 40.4M  |   entropy->next_output_byte = cinfo->dest->next_output_byte;  | 
952  | 40.4M  |   entropy->free_in_buffer = cinfo->dest->free_in_buffer;  | 
953  |  |  | 
954  |  |   /* Emit restart marker if needed */  | 
955  | 40.4M  |   if (cinfo->restart_interval)  | 
956  | 0  |     if (entropy->restarts_to_go == 0)  | 
957  | 0  |       emit_restart(entropy, entropy->next_restart_num);  | 
958  |  |  | 
959  | 40.4M  | #ifdef WITH_SIMD  | 
960  | 40.4M  |   cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);  | 
961  |  | #else  | 
962  |  |   /* Not using SIMD, so alignment is not needed */  | 
963  |  |   cabsvalue = absvalues = absvalues_unaligned;  | 
964  |  | #endif  | 
965  |  |  | 
966  |  |   /* Prepare data */  | 
967  | 40.4M  |   EOBPTR = absvalues +  | 
968  | 40.4M  |     entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,  | 
969  | 40.4M  |                                Sl, Al, absvalues, bits);  | 
970  |  |  | 
971  |  |   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */  | 
972  |  |  | 
973  | 40.4M  |   r = 0;                        /* r = run length of zeros */  | 
974  | 40.4M  |   BR = 0;                       /* BR = count of buffered bits added now */  | 
975  | 40.4M  |   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */  | 
976  |  |  | 
977  | 40.4M  |   zerobits = bits[0];  | 
978  | 40.4M  | #if SIZEOF_SIZE_T == 8  | 
979  | 40.4M  |   signbits = bits[1];  | 
980  |  | #else  | 
981  |  |   signbits = bits[2];  | 
982  |  | #endif  | 
983  | 40.4M  |   ENCODE_COEFS_AC_REFINE((void)0;);  | 
984  |  |  | 
985  |  | #if SIZEOF_SIZE_T == 4  | 
986  |  |   zerobits = bits[1];  | 
987  |  |   signbits = bits[3];  | 
988  |  |  | 
989  |  |   if (zerobits) { | 
990  |  |     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);  | 
991  |  |     idx = count_zeroes(&zerobits);  | 
992  |  |     signbits >>= idx;  | 
993  |  |     idx += diff;  | 
994  |  |     r += idx;  | 
995  |  |     cabsvalue += idx;  | 
996  |  |     goto first_iter_ac_refine;  | 
997  |  |   }  | 
998  |  |  | 
999  |  |   ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);  | 
1000  |  | #endif  | 
1001  |  |  | 
1002  | 40.4M  |   r |= (int)((absvalues + Sl) - cabsvalue);  | 
1003  |  |  | 
1004  | 40.4M  |   if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */ | 
1005  | 40.1M  |     entropy->EOBRUN++;          /* count an EOB */  | 
1006  | 40.1M  |     entropy->BE += BR;          /* concat my correction bits to older ones */  | 
1007  |  |     /* We force out the EOB if we risk either:  | 
1008  |  |      * 1. overflow of the EOB counter;  | 
1009  |  |      * 2. overflow of the correction bit buffer during the next MCU.  | 
1010  |  |      */  | 
1011  | 40.1M  |     if (entropy->EOBRUN == 0x7FFF ||  | 
1012  | 40.1M  |         entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))  | 
1013  | 3.60k  |       emit_eobrun(entropy);  | 
1014  | 40.1M  |   }  | 
1015  |  |  | 
1016  | 40.4M  |   cinfo->dest->next_output_byte = entropy->next_output_byte;  | 
1017  | 40.4M  |   cinfo->dest->free_in_buffer = entropy->free_in_buffer;  | 
1018  |  |  | 
1019  |  |   /* Update restart-interval state too */  | 
1020  | 40.4M  |   if (cinfo->restart_interval) { | 
1021  | 0  |     if (entropy->restarts_to_go == 0) { | 
1022  | 0  |       entropy->restarts_to_go = cinfo->restart_interval;  | 
1023  | 0  |       entropy->next_restart_num++;  | 
1024  | 0  |       entropy->next_restart_num &= 7;  | 
1025  | 0  |     }  | 
1026  | 0  |     entropy->restarts_to_go--;  | 
1027  | 0  |   }  | 
1028  |  |  | 
1029  | 40.4M  |   return TRUE;  | 
1030  | 40.4M  | }  | 
1031  |  |  | 
1032  |  |  | 
1033  |  | /*  | 
1034  |  |  * Finish up at the end of a Huffman-compressed progressive scan.  | 
1035  |  |  */  | 
1036  |  |  | 
1037  |  | METHODDEF(void)  | 
1038  |  | finish_pass_phuff(j_compress_ptr cinfo)  | 
1039  | 18.1k  | { | 
1040  | 18.1k  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
1041  |  |  | 
1042  | 18.1k  |   entropy->next_output_byte = cinfo->dest->next_output_byte;  | 
1043  | 18.1k  |   entropy->free_in_buffer = cinfo->dest->free_in_buffer;  | 
1044  |  |  | 
1045  |  |   /* Flush out any buffered data */  | 
1046  | 18.1k  |   emit_eobrun(entropy);  | 
1047  | 18.1k  |   flush_bits(entropy);  | 
1048  |  |  | 
1049  | 18.1k  |   cinfo->dest->next_output_byte = entropy->next_output_byte;  | 
1050  | 18.1k  |   cinfo->dest->free_in_buffer = entropy->free_in_buffer;  | 
1051  | 18.1k  | }  | 
1052  |  |  | 
1053  |  |  | 
1054  |  | /*  | 
1055  |  |  * Finish up a statistics-gathering pass and create the new Huffman tables.  | 
1056  |  |  */  | 
1057  |  |  | 
1058  |  | METHODDEF(void)  | 
1059  |  | finish_pass_gather_phuff(j_compress_ptr cinfo)  | 
1060  | 15.5k  | { | 
1061  | 15.5k  |   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;  | 
1062  | 15.5k  |   boolean is_DC_band;  | 
1063  | 15.5k  |   int ci, tbl;  | 
1064  | 15.5k  |   jpeg_component_info *compptr;  | 
1065  | 15.5k  |   JHUFF_TBL **htblptr;  | 
1066  | 15.5k  |   boolean did[NUM_HUFF_TBLS];  | 
1067  |  |  | 
1068  |  |   /* Flush out buffered data (all we care about is counting the EOB symbol) */  | 
1069  | 15.5k  |   emit_eobrun(entropy);  | 
1070  |  |  | 
1071  | 15.5k  |   is_DC_band = (cinfo->Ss == 0);  | 
1072  |  |  | 
1073  |  |   /* It's important not to apply jpeg_gen_optimal_table more than once  | 
1074  |  |    * per table, because it clobbers the input frequency counts!  | 
1075  |  |    */  | 
1076  | 15.5k  |   memset(did, 0, sizeof(did));  | 
1077  |  |  | 
1078  | 31.8k  |   for (ci = 0; ci < cinfo->comps_in_scan; ci++) { | 
1079  | 16.3k  |     compptr = cinfo->cur_comp_info[ci];  | 
1080  | 16.3k  |     if (is_DC_band) { | 
1081  | 3.50k  |       if (cinfo->Ah != 0)       /* DC refinement needs no table */  | 
1082  | 0  |         continue;  | 
1083  | 3.50k  |       tbl = compptr->dc_tbl_no;  | 
1084  | 12.7k  |     } else { | 
1085  | 12.7k  |       tbl = compptr->ac_tbl_no;  | 
1086  | 12.7k  |     }  | 
1087  | 16.3k  |     if (!did[tbl]) { | 
1088  | 15.7k  |       if (is_DC_band)  | 
1089  | 2.93k  |         htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];  | 
1090  | 12.7k  |       else  | 
1091  | 12.7k  |         htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];  | 
1092  | 15.7k  |       if (*htblptr == NULL)  | 
1093  | 0  |         *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);  | 
1094  | 15.7k  |       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);  | 
1095  | 15.7k  |       did[tbl] = TRUE;  | 
1096  | 15.7k  |     }  | 
1097  | 16.3k  |   }  | 
1098  | 15.5k  | }  | 
1099  |  |  | 
1100  |  |  | 
1101  |  | /*  | 
1102  |  |  * Module initialization routine for progressive Huffman entropy encoding.  | 
1103  |  |  */  | 
1104  |  |  | 
1105  |  | GLOBAL(void)  | 
1106  |  | jinit_phuff_encoder(j_compress_ptr cinfo)  | 
1107  | 2.86k  | { | 
1108  | 2.86k  |   phuff_entropy_ptr entropy;  | 
1109  | 2.86k  |   int i;  | 
1110  |  |  | 
1111  | 2.86k  |   entropy = (phuff_entropy_ptr)  | 
1112  | 2.86k  |     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,  | 
1113  | 2.86k  |                                 sizeof(phuff_entropy_encoder));  | 
1114  | 2.86k  |   cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;  | 
1115  | 2.86k  |   entropy->pub.start_pass = start_pass_phuff;  | 
1116  |  |  | 
1117  |  |   /* Mark tables unallocated */  | 
1118  | 14.3k  |   for (i = 0; i < NUM_HUFF_TBLS; i++) { | 
1119  | 11.4k  |     entropy->derived_tbls[i] = NULL;  | 
1120  | 11.4k  |     entropy->count_ptrs[i] = NULL;  | 
1121  | 11.4k  |   }  | 
1122  | 2.86k  |   entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */  | 
1123  | 2.86k  | }  | 
1124  |  |  | 
1125  |  | #endif /* C_PROGRESSIVE_SUPPORTED */  |