/src/gdal/build/frmts/jpeg/libjpeg12/jcdctmgr12.c

Source (jump to first uncovered line)
/*
 * jcdctmgr.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
 * This file contains the forward-DCT management logic.
 * This code selects a particular DCT implementation to be used,
 * and it performs related housekeeping chores including coefficient
 * quantization.
 */

#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jdct.h"   /* Private declarations for DCT subsystem */


/* Private subobject for this module */

typedef struct {
  struct jpeg_forward_dct pub;  /* public fields */

  /* Pointer to the DCT routine actually in use */
  forward_DCT_method_ptr do_dct;

  /* The actual post-DCT divisors --- not identical to the quant table
   * entries, because of scaling (especially for an unnormalized DCT).
   * Each table is given in normal array order.
   */
  DCTELEM * divisors[NUM_QUANT_TBLS];

#ifdef DCT_FLOAT_SUPPORTED
  /* Same as above for the floating-point case. */
  float_DCT_method_ptr do_float_dct;
  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
#endif
} my_fdct_controller;

typedef my_fdct_controller * my_fdct_ptr;


/*
 * Initialize for a processing pass.
 * Verify that all referenced Q-tables are present, and set up
 * the divisor table for each one.
 * In the current implementation, DCT of all components is done during
 * the first pass, even if only some components will be output in the
 * first scan.  Hence all components should be examined here.
 */

METHODDEF(void)
start_pass_fdctmgr (j_compress_ptr cinfo)
{
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  int ci, qtblno, i;
  jpeg_component_info *compptr;
  JQUANT_TBL * qtbl;
  DCTELEM * dtbl;

  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++) {
    qtblno = compptr->quant_tbl_no;
    /* Make sure specified quantization table is present */
    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
  cinfo->quant_tbl_ptrs[qtblno] == NULL)
      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
    qtbl = cinfo->quant_tbl_ptrs[qtblno];
    /* Compute divisors for this quant table */
    /* We may do this more than once for same table, but it's not a big deal */
    switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
    case JDCT_ISLOW:
      /* For LL&M IDCT method, divisors are equal to raw quantization
       * coefficients multiplied by 8 (to counteract scaling).
       */
      if (fdct->divisors[qtblno] == NULL) {
  fdct->divisors[qtblno] = (DCTELEM *)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
              DCTSIZE2 * SIZEOF(DCTELEM));
      }
      dtbl = fdct->divisors[qtblno];
      for (i = 0; i < DCTSIZE2; i++) {
  dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
      }
      break;
#endif
#ifdef DCT_IFAST_SUPPORTED
    case JDCT_IFAST:
      {
  /* For AA&N IDCT method, divisors are equal to quantization
   * coefficients scaled by scalefactor[row]*scalefactor[col], where
   *   scalefactor[0] = 1
   *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
   * We apply a further scale factor of 8.
   */
#define CONST_BITS 14
  static const INT16 aanscales[DCTSIZE2] = {
    /* precomputed values scaled up by 14 bits */
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  };
  SHIFT_TEMPS

  if (fdct->divisors[qtblno] == NULL) {
    fdct->divisors[qtblno] = (DCTELEM *)
      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
          DCTSIZE2 * SIZEOF(DCTELEM));
  }
  dtbl = fdct->divisors[qtblno];
  for (i = 0; i < DCTSIZE2; i++) {
    dtbl[i] = (DCTELEM)
      DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
          (INT32) aanscales[i]),
        CONST_BITS-3);
  }
      }
      break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
    case JDCT_FLOAT:
      {
  /* For float AA&N IDCT method, divisors are equal to quantization
   * coefficients scaled by scalefactor[row]*scalefactor[col], where
   *   scalefactor[0] = 1
   *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
   * We apply a further scale factor of 8.
   * What's actually stored is 1/divisor so that the inner loop can
   * use a multiplication rather than a division.
   */
  FAST_FLOAT * fdtbl;
  int row, col;
  static const double aanscalefactor[DCTSIZE] = {
    1.0, 1.387039845, 1.306562965, 1.175875602,
    1.0, 0.785694958, 0.541196100, 0.275899379
  };

  if (fdct->float_divisors[qtblno] == NULL) {
    fdct->float_divisors[qtblno] = (FAST_FLOAT *)
      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
          DCTSIZE2 * SIZEOF(FAST_FLOAT));
  }
  fdtbl = fdct->float_divisors[qtblno];
  i = 0;
  for (row = 0; row < DCTSIZE; row++) {
    for (col = 0; col < DCTSIZE; col++) {
      fdtbl[i] = (FAST_FLOAT)
        (1.0 / (((double) qtbl->quantval[i] *
           aanscalefactor[row] * aanscalefactor[col] * 8.0)));
      i++;
    }
  }
      }
      break;
#endif
    default:
      ERREXIT(cinfo, JERR_NOT_COMPILED);
      break;
    }
  }
}


/*
 * Perform forward DCT on one or more blocks of a component.
 *
 * The input samples are taken from the sample_data[] array starting at
 * position start_row/start_col, and moving to the right for any additional
 * blocks. The quantized coefficients are returned in coef_blocks[].
 */

METHODDEF(void)
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
       JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       JDIMENSION start_row, JDIMENSION start_col,
       JDIMENSION num_blocks)
/* This version is used for integer DCT implementations. */
{
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  forward_DCT_method_ptr do_dct = fdct->do_dct;
  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  DCTELEM workspace[DCTSIZE2];  /* work area for FDCT subroutine */
  JDIMENSION bi;

  sample_data += start_row; /* fold in the vertical offset once */

  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    { register DCTELEM *workspaceptr;
      register JSAMPROW elemptr;
      register int elemr;

      workspaceptr = workspace;
      for (elemr = 0; elemr < DCTSIZE; elemr++) {
  elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8    /* unroll the inner loop */
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
#else
  { register int elemc;
    for (elemc = DCTSIZE; elemc > 0; elemc--) {
      *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
    }
  }
#endif
      }
    }

    /* Perform the DCT */
    (*do_dct) (workspace);

    /* Quantize/descale the coefficients, and store into coef_blocks[] */
    { register DCTELEM temp, qval;
      register int i;
      register JCOEFPTR output_ptr = coef_blocks[bi];

      for (i = 0; i < DCTSIZE2; i++) {
  qval = divisors[i];
  temp = workspace[i];
  /* Divide the coefficient value by qval, ensuring proper rounding.
   * Since C does not specify the direction of rounding for negative
   * quotients, we have to force the dividend positive for portability.
   *
   * In most files, at least half of the output values will be zero
   * (at default quantization settings, more like three-quarters...)
   * so we should ensure that this case is fast.  On many machines,
   * a comparison is enough cheaper than a divide to make a special test
   * a win.  Since both inputs will be nonnegative, we need only test
   * for a < b to discover whether a/b is 0.
   * If your machine's division is fast enough, define FAST_DIVIDE.
   */
#ifdef FAST_DIVIDE
#define DIVIDE_BY(a,b)  a /= b
#else
#define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
#endif
  if (temp < 0) {
    temp = -temp;
    temp += qval>>1;  /* for rounding */
    DIVIDE_BY(temp, qval);
    temp = -temp;
  } else {
    temp += qval>>1;  /* for rounding */
    DIVIDE_BY(temp, qval);
  }
  output_ptr[i] = (JCOEF) temp;
      }
    }
  }
}


#ifdef DCT_FLOAT_SUPPORTED

METHODDEF(void)
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
       JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       JDIMENSION start_row, JDIMENSION start_col,
       JDIMENSION num_blocks)
/* This version is used for floating-point DCT implementations. */
{
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  float_DCT_method_ptr do_dct = fdct->do_float_dct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
  JDIMENSION bi;

  sample_data += start_row; /* fold in the vertical offset once */

  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    { register FAST_FLOAT *workspaceptr;
      register JSAMPROW elemptr;
      register int elemr;

      workspaceptr = workspace;
      for (elemr = 0; elemr < DCTSIZE; elemr++) {
  elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8    /* unroll the inner loop */
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
#else
  { register int elemc;
    for (elemc = DCTSIZE; elemc > 0; elemc--) {
      *workspaceptr++ = (FAST_FLOAT)
        (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
    }
  }
#endif
      }
    }

    /* Perform the DCT */
    (*do_dct) (workspace);

    /* Quantize/descale the coefficients, and store into coef_blocks[] */
    { register FAST_FLOAT temp;
      register int i;
      register JCOEFPTR output_ptr = coef_blocks[bi];

      for (i = 0; i < DCTSIZE2; i++) {
  /* Apply the quantization and scaling factor */
  temp = workspace[i] * divisors[i];
  /* Round to nearest integer.
   * Since C does not specify the direction of rounding for negative
   * quotients, we have to force the dividend positive for portability.
   * The maximum coefficient size is +-16K (for 12-bit data), so this
   * code should work for either 16-bit or 32-bit ints.
   */
  output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
      }
    }
  }
}

#endif /* DCT_FLOAT_SUPPORTED */


/*
 * Initialize FDCT manager.
 */

GLOBAL(void)
jinit_forward_dct (j_compress_ptr cinfo)
{
  my_fdct_ptr fdct;
  int i;

  fdct = (my_fdct_ptr)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
        SIZEOF(my_fdct_controller));
  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
  fdct->pub.start_pass = start_pass_fdctmgr;

  switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
  case JDCT_ISLOW:
    fdct->pub.forward_DCT = forward_DCT;
    fdct->do_dct = jpeg_fdct_islow;
    break;
#endif
#ifdef DCT_IFAST_SUPPORTED
  case JDCT_IFAST:
    fdct->pub.forward_DCT = forward_DCT;
    fdct->do_dct = jpeg_fdct_ifast;
    break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
  case JDCT_FLOAT:
    fdct->pub.forward_DCT = forward_DCT_float;
    fdct->do_float_dct = jpeg_fdct_float;
    break;
#endif
  default:
    ERREXIT(cinfo, JERR_NOT_COMPILED);
    break;
  }

  /* Mark divisor tables unallocated */
  for (i = 0; i < NUM_QUANT_TBLS; i++) {
    fdct->divisors[i] = NULL;
#ifdef DCT_FLOAT_SUPPORTED
    fdct->float_divisors[i] = NULL;
#endif
  }
}

Coverage Report

Created: 2025-06-13 06:29

Line	Count	Source (jump to first uncovered line)
1		/*
2		* jcdctmgr.c
3		*
4		* Copyright (C) 1994-1996, Thomas G. Lane.
5		* This file is part of the Independent JPEG Group's software.
6		* For conditions of distribution and use, see the accompanying README file.
7		*
8		* This file contains the forward-DCT management logic.
9		* This code selects a particular DCT implementation to be used,
10		* and it performs related housekeeping chores including coefficient
11		* quantization.
12		*/
13
14		#define JPEG_INTERNALS
15		#include "jinclude.h"
16		#include "jpeglib.h"
17		#include "jdct.h" /* Private declarations for DCT subsystem */
18
19
20		/* Private subobject for this module */
21
22		typedef struct {
23		struct jpeg_forward_dct pub; /* public fields */
24
25		/* Pointer to the DCT routine actually in use */
26		forward_DCT_method_ptr do_dct;
27
28		/* The actual post-DCT divisors --- not identical to the quant table
29		* entries, because of scaling (especially for an unnormalized DCT).
30		* Each table is given in normal array order.
31		*/
32		DCTELEM * divisors[NUM_QUANT_TBLS];
33
34		#ifdef DCT_FLOAT_SUPPORTED
35		/* Same as above for the floating-point case. */
36		float_DCT_method_ptr do_float_dct;
37		FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
38		#endif
39		} my_fdct_controller;
40
41		typedef my_fdct_controller * my_fdct_ptr;
42
43
44		/*
45		* Initialize for a processing pass.
46		* Verify that all referenced Q-tables are present, and set up
47		* the divisor table for each one.
48		* In the current implementation, DCT of all components is done during
49		* the first pass, even if only some components will be output in the
50		* first scan. Hence all components should be examined here.
51		*/
52
53		METHODDEF(void)
54		start_pass_fdctmgr (j_compress_ptr cinfo)
55	0	{
56	0	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
57	0	int ci, qtblno, i;
58	0	jpeg_component_info *compptr;
59	0	JQUANT_TBL * qtbl;
60	0	DCTELEM * dtbl;
61
62	0	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
63	0	ci++, compptr++) {
64	0	qtblno = compptr->quant_tbl_no;
65		/* Make sure specified quantization table is present */
66	0	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
67	0	cinfo->quant_tbl_ptrs[qtblno] == NULL)
68	0	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
69	0	qtbl = cinfo->quant_tbl_ptrs[qtblno];
70		/* Compute divisors for this quant table */
71		/* We may do this more than once for same table, but it's not a big deal */
72	0	switch (cinfo->dct_method) {
73	0	#ifdef DCT_ISLOW_SUPPORTED
74	0	case JDCT_ISLOW:
75		/* For LL&M IDCT method, divisors are equal to raw quantization
76		* coefficients multiplied by 8 (to counteract scaling).
77		*/
78	0	if (fdct->divisors[qtblno] == NULL) {
79	0	fdct->divisors[qtblno] = (DCTELEM *)
80	0	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
81	0	DCTSIZE2 * SIZEOF(DCTELEM));
82	0	}
83	0	dtbl = fdct->divisors[qtblno];
84	0	for (i = 0; i < DCTSIZE2; i++) {
85	0	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
86	0	}
87	0	break;
88	0	#endif
89	0	#ifdef DCT_IFAST_SUPPORTED
90	0	case JDCT_IFAST:
91	0	{
92		/* For AA&N IDCT method, divisors are equal to quantization
93		* coefficients scaled by scalefactor[row]*scalefactor[col], where
94		* scalefactor[0] = 1
95		* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
96		* We apply a further scale factor of 8.
97		*/
98	0	#define CONST_BITS 14
99	0	static const INT16 aanscales[DCTSIZE2] = {
100		/* precomputed values scaled up by 14 bits */
101	0	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
102	0	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
103	0	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
104	0	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
105	0	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
106	0	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
107	0	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
108	0	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
109	0	};
110	0	SHIFT_TEMPS
111
112	0	if (fdct->divisors[qtblno] == NULL) {
113	0	fdct->divisors[qtblno] = (DCTELEM *)
114	0	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
115	0	DCTSIZE2 * SIZEOF(DCTELEM));
116	0	}
117	0	dtbl = fdct->divisors[qtblno];
118	0	for (i = 0; i < DCTSIZE2; i++) {
119	0	dtbl[i] = (DCTELEM)
120	0	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
121	0	(INT32) aanscales[i]),
122	0	CONST_BITS-3);
123	0	}
124	0	}
125	0	break;
126	0	#endif
127	0	#ifdef DCT_FLOAT_SUPPORTED
128	0	case JDCT_FLOAT:
129	0	{
130		/* For float AA&N IDCT method, divisors are equal to quantization
131		* coefficients scaled by scalefactor[row]*scalefactor[col], where
132		* scalefactor[0] = 1
133		* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
134		* We apply a further scale factor of 8.
135		* What's actually stored is 1/divisor so that the inner loop can
136		* use a multiplication rather than a division.
137		*/
138	0	FAST_FLOAT * fdtbl;
139	0	int row, col;
140	0	static const double aanscalefactor[DCTSIZE] = {
141	0	1.0, 1.387039845, 1.306562965, 1.175875602,
142	0	1.0, 0.785694958, 0.541196100, 0.275899379
143	0	};
144
145	0	if (fdct->float_divisors[qtblno] == NULL) {
146	0	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
147	0	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
148	0	DCTSIZE2 * SIZEOF(FAST_FLOAT));
149	0	}
150	0	fdtbl = fdct->float_divisors[qtblno];
151	0	i = 0;
152	0	for (row = 0; row < DCTSIZE; row++) {
153	0	for (col = 0; col < DCTSIZE; col++) {
154	0	fdtbl[i] = (FAST_FLOAT)
155	0	(1.0 / (((double) qtbl->quantval[i] *
156	0	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
157	0	i++;
158	0	}
159	0	}
160	0	}
161	0	break;
162	0	#endif
163	0	default:
164	0	ERREXIT(cinfo, JERR_NOT_COMPILED);
165	0	break;
166	0	}
167	0	}
168	0	}
169
170
171		/*
172		* Perform forward DCT on one or more blocks of a component.
173		*
174		* The input samples are taken from the sample_data[] array starting at
175		* position start_row/start_col, and moving to the right for any additional
176		* blocks. The quantized coefficients are returned in coef_blocks[].
177		*/
178
179		METHODDEF(void)
180		forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
181		JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
182		JDIMENSION start_row, JDIMENSION start_col,
183		JDIMENSION num_blocks)
184		/* This version is used for integer DCT implementations. */
185	0	{
186		/* This routine is heavily used, so it's worth coding it tightly. */
187	0	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
188	0	forward_DCT_method_ptr do_dct = fdct->do_dct;
189	0	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
190	0	DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
191	0	JDIMENSION bi;
192
193	0	sample_data += start_row; /* fold in the vertical offset once */
194
195	0	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
196		/* Load data into workspace, applying unsigned->signed conversion */
197	0	{ register DCTELEM *workspaceptr;
198	0	register JSAMPROW elemptr;
199	0	register int elemr;
200
201	0	workspaceptr = workspace;
202	0	for (elemr = 0; elemr < DCTSIZE; elemr++) {
203	0	elemptr = sample_data[elemr] + start_col;
204	0	#if DCTSIZE == 8 /* unroll the inner loop */
205	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
206	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
207	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
208	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
209	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
210	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
211	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
212	0	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
213		#else
214		{ register int elemc;
215		for (elemc = DCTSIZE; elemc > 0; elemc--) {
216		workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
217		}
218		}
219		#endif
220	0	}
221	0	}
222
223		/* Perform the DCT */
224	0	(*do_dct) (workspace);
225
226		/* Quantize/descale the coefficients, and store into coef_blocks[] */
227	0	{ register DCTELEM temp, qval;
228	0	register int i;
229	0	register JCOEFPTR output_ptr = coef_blocks[bi];
230
231	0	for (i = 0; i < DCTSIZE2; i++) {
232	0	qval = divisors[i];
233	0	temp = workspace[i];
234		/* Divide the coefficient value by qval, ensuring proper rounding.
235		* Since C does not specify the direction of rounding for negative
236		* quotients, we have to force the dividend positive for portability.
237		*
238		* In most files, at least half of the output values will be zero
239		* (at default quantization settings, more like three-quarters...)
240		* so we should ensure that this case is fast. On many machines,
241		* a comparison is enough cheaper than a divide to make a special test
242		* a win. Since both inputs will be nonnegative, we need only test
243		* for a < b to discover whether a/b is 0.
244		* If your machine's division is fast enough, define FAST_DIVIDE.
245		*/
246		#ifdef FAST_DIVIDE
247		#define DIVIDE_BY(a,b) a /= b
248		#else
249	0	#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
250	0	#endif
251	0	if (temp < 0) {
252	0	temp = -temp;
253	0	temp += qval>>1; /* for rounding */
254	0	DIVIDE_BY(temp, qval);
255	0	temp = -temp;
256	0	} else {
257	0	temp += qval>>1; /* for rounding */
258	0	DIVIDE_BY(temp, qval);
259	0	}
260	0	output_ptr[i] = (JCOEF) temp;
261	0	}
262	0	}
263	0	}
264	0	}
265
266
267		#ifdef DCT_FLOAT_SUPPORTED
268
269		METHODDEF(void)
270		forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
271		JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
272		JDIMENSION start_row, JDIMENSION start_col,
273		JDIMENSION num_blocks)
274		/* This version is used for floating-point DCT implementations. */
275	0	{
276		/* This routine is heavily used, so it's worth coding it tightly. */
277	0	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
278	0	float_DCT_method_ptr do_dct = fdct->do_float_dct;
279	0	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
280	0	FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
281	0	JDIMENSION bi;
282
283	0	sample_data += start_row; /* fold in the vertical offset once */
284
285	0	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
286		/* Load data into workspace, applying unsigned->signed conversion */
287	0	{ register FAST_FLOAT *workspaceptr;
288	0	register JSAMPROW elemptr;
289	0	register int elemr;
290
291	0	workspaceptr = workspace;
292	0	for (elemr = 0; elemr < DCTSIZE; elemr++) {
293	0	elemptr = sample_data[elemr] + start_col;
294	0	#if DCTSIZE == 8 /* unroll the inner loop */
295	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
296	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
297	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
298	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
299	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
300	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
301	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
302	0	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
303		#else
304		{ register int elemc;
305		for (elemc = DCTSIZE; elemc > 0; elemc--) {
306		*workspaceptr++ = (FAST_FLOAT)
307		(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
308		}
309		}
310		#endif
311	0	}
312	0	}
313
314		/* Perform the DCT */
315	0	(*do_dct) (workspace);
316
317		/* Quantize/descale the coefficients, and store into coef_blocks[] */
318	0	{ register FAST_FLOAT temp;
319	0	register int i;
320	0	register JCOEFPTR output_ptr = coef_blocks[bi];
321
322	0	for (i = 0; i < DCTSIZE2; i++) {
323		/* Apply the quantization and scaling factor */
324	0	temp = workspace[i] * divisors[i];
325		/* Round to nearest integer.
326		* Since C does not specify the direction of rounding for negative
327		* quotients, we have to force the dividend positive for portability.
328		* The maximum coefficient size is +-16K (for 12-bit data), so this
329		* code should work for either 16-bit or 32-bit ints.
330		*/
331	0	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
332	0	}
333	0	}
334	0	}
335	0	}
336
337		#endif /* DCT_FLOAT_SUPPORTED */
338
339
340		/*
341		* Initialize FDCT manager.
342		*/
343
344		GLOBAL(void)
345		jinit_forward_dct (j_compress_ptr cinfo)
346	0	{
347	0	my_fdct_ptr fdct;
348	0	int i;
349
350	0	fdct = (my_fdct_ptr)
351	0	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
352	0	SIZEOF(my_fdct_controller));
353	0	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
354	0	fdct->pub.start_pass = start_pass_fdctmgr;
355
356	0	switch (cinfo->dct_method) {
357	0	#ifdef DCT_ISLOW_SUPPORTED
358	0	case JDCT_ISLOW:
359	0	fdct->pub.forward_DCT = forward_DCT;
360	0	fdct->do_dct = jpeg_fdct_islow;
361	0	break;
362	0	#endif
363	0	#ifdef DCT_IFAST_SUPPORTED
364	0	case JDCT_IFAST:
365	0	fdct->pub.forward_DCT = forward_DCT;
366	0	fdct->do_dct = jpeg_fdct_ifast;
367	0	break;
368	0	#endif
369	0	#ifdef DCT_FLOAT_SUPPORTED
370	0	case JDCT_FLOAT:
371	0	fdct->pub.forward_DCT = forward_DCT_float;
372	0	fdct->do_float_dct = jpeg_fdct_float;
373	0	break;
374	0	#endif
375	0	default:
376	0	ERREXIT(cinfo, JERR_NOT_COMPILED);
377	0	break;
378	0	}
379
380		/* Mark divisor tables unallocated */
381	0	for (i = 0; i < NUM_QUANT_TBLS; i++) {
382	0	fdct->divisors[i] = NULL;
383	0	#ifdef DCT_FLOAT_SUPPORTED
384	0	fdct->float_divisors[i] = NULL;
385	0	#endif
386	0	}
387	0	}