Coverage Report

Created: 2022-10-31 07:00

/src/ghostpdl/base/gxht_thresh.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2021 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
17
/*$Id: gxhts_thresh.c  $ */
18
/* Halftone thresholding code */
19
20
#include <stdlib.h> /* abs() */
21
#include "memory_.h"
22
#include "gx.h"
23
#include "gxgstate.h"
24
#include "gsiparam.h"
25
#include "math_.h"
26
#include "gxfixed.h"  /* needed for gximage.h */
27
#include "gximage.h"
28
#include "gxdevice.h"
29
#include "gxdht.h"
30
#include "gxht_thresh.h"
31
#include "gzht.h"
32
#include "gxdevsop.h"
33
34
/* Enable the following define to perform a little extra work to stop
35
 * spurious valgrind errors. The code should perform perfectly even without
36
 * this enabled, but enabling it makes debugging much easier.
37
 */
38
/* #define PACIFY_VALGRIND */
39
40
#ifndef __WIN32__
41
1
#define __align16  __attribute__((aligned(16)))
42
#else
43
#define __align16 __declspec(align(16))
44
#endif
45
211k
#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
46
47
#ifdef HAVE_SSE2
48
49
#include <emmintrin.h>
50
51
static const byte bitreverse[] =
52
{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
53
  0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
54
  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
55
  0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
56
  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
57
  0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
58
  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
59
  0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
60
  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
61
  0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
62
  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
63
  0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
64
  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
65
  0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
66
  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
67
  0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
68
  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
69
  0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
70
  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
71
  0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
72
  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
73
  0x3F, 0xBF, 0x7F, 0xFF};
74
#endif
75
76
#if RAW_HT_DUMP
77
/* This is slow thresholding, byte output for debug only */
78
void
79
gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
80
                              byte *halftone, int dithered_stride, int width,
81
                              int num_rows)
82
{
83
    int k, j;
84
    byte *contone_ptr;
85
    byte *thresh_ptr;
86
    byte *halftone_ptr;
87
88
    /* For the moment just do a very slow compare until we get
89
       get this working */
90
    for (j = 0; j < num_rows; j++) {
91
        contone_ptr = contone;
92
        thresh_ptr = threshold_strip + contone_stride * j;
93
        halftone_ptr = halftone + dithered_stride * j;
94
        for (k = 0; k < width; k++) {
95
            if (contone_ptr[k] < thresh_ptr[k]) {
96
                halftone_ptr[k] = 0;
97
            } else {
98
                halftone_ptr[k] = 255;
99
            }
100
        }
101
    }
102
}
103
#endif
104
105
#ifndef HAVE_SSE2
106
/* A simple case for use in the landscape mode. Could probably be coded up
107
   faster */
108
static void
109
threshold_16_bit(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
110
{
111
    int j;
112
113
    for (j = 2; j > 0; j--) {
114
        byte h = 0;
115
        byte bit_init = 0x80;
116
        do {
117
            if (*contone_ptr++ < *thresh_ptr++) {
118
                h |=  bit_init;
119
            }
120
            bit_init >>= 1;
121
        } while (bit_init != 0);
122
        *ht_data++ = h;
123
    }
124
}
125
#else
126
/* Note this function has strict data alignment needs */
127
static void
128
threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
129
13.1M
{
130
13.1M
    __m128i input1;
131
13.1M
    __m128i input2;
132
13.1M
    register int result_int;
133
13.1M
    const unsigned int mask1 = 0x80808080;
134
13.1M
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
135
136
    /* Load */
137
13.1M
    input1 = _mm_load_si128((const __m128i *)contone_ptr);
138
13.1M
    input2 = _mm_load_si128((const __m128i *) thresh_ptr);
139
    /* Unsigned subtraction does Unsigned saturation so we
140
       have to use the signed operation */
141
13.1M
    input1 = _mm_xor_si128(input1, sign_fix);
142
13.1M
    input2 = _mm_xor_si128(input2, sign_fix);
143
    /* Subtract the two */
144
13.1M
    input2 = _mm_subs_epi8(input1, input2);
145
    /* Grab the sign mask */
146
13.1M
    result_int = _mm_movemask_epi8(input2);
147
    /* bit wise reversal on 16 bit word */
148
13.1M
    ht_data[0] = bitreverse[(result_int & 0xff)];
149
13.1M
    ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
150
13.1M
}
151
152
/* Not so fussy on its alignment */
153
static void
154
threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
155
299k
{
156
299k
    __m128i input1;
157
299k
    __m128i input2;
158
299k
    int result_int;
159
299k
    byte *sse_data;
160
299k
    const unsigned int mask1 = 0x80808080;
161
299k
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
162
163
299k
    sse_data = (byte*) &(result_int);
164
    /* Load */
165
299k
    input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
166
299k
    input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
167
    /* Unsigned subtraction does Unsigned saturation so we
168
       have to use the signed operation */
169
299k
    input1 = _mm_xor_si128(input1, sign_fix);
170
299k
    input2 = _mm_xor_si128(input2, sign_fix);
171
    /* Subtract the two */
172
299k
    input2 = _mm_subs_epi8(input1, input2);
173
    /* Grab the sign mask */
174
299k
    result_int = _mm_movemask_epi8(input2);
175
    /* bit wise reversal on 16 bit word */
176
299k
    ht_data[0] = bitreverse[sse_data[0]];
177
299k
    ht_data[1] = bitreverse[sse_data[1]];
178
299k
}
179
#endif
180
181
/* SSE2 and non-SSE2 implememntation of thresholding a row. Subtractive case
182
   There is some code replication between the two of these (additive and subtractive)
183
   that I need to go back and determine how we can combine them without
184
   any performance loss. */
185
void
186
gx_ht_threshold_row_bit_sub(byte *contone,  byte *threshold_strip,  int contone_stride,
187
                  byte *halftone, int dithered_stride, int width,
188
                  int num_rows, int offset_bits)
189
25.3k
{
190
#ifndef HAVE_SSE2
191
    int k, j;
192
    byte *contone_ptr;
193
    byte *thresh_ptr;
194
    byte *halftone_ptr;
195
    byte bit_init;
196
197
    /* For the moment just do a very slow compare until we get
198
       get this working.  This could use some serious optimization */
199
    width -= offset_bits;
200
    for (j = 0; j < num_rows; j++) {
201
        byte h;
202
        contone_ptr = contone;
203
        thresh_ptr = threshold_strip + contone_stride * j;
204
        halftone_ptr = halftone + dithered_stride * j;
205
        /* First get the left remainder portion.  Put into MSBs of first byte */
206
        bit_init = 0x80;
207
        h = 0;
208
        k = offset_bits;
209
        if (k > 0) {
210
            do {
211
                if (*contone_ptr++ > *thresh_ptr++) {
212
                    h |=  bit_init;
213
                }
214
                bit_init >>= 1;
215
                if (bit_init == 0) {
216
                    bit_init = 0x80;
217
                    *halftone_ptr++ = h;
218
                    h = 0;
219
                }
220
                k--;
221
            } while (k > 0);
222
            bit_init = 0x80;
223
            *halftone_ptr++ = h;
224
            h = 0;
225
            if (offset_bits < 8)
226
                *halftone_ptr++ = 0;
227
        }
228
        /* Now get the rest, which will be 16 bit aligned. */
229
        k = width;
230
        if (k > 0) {
231
            do {
232
                if (*contone_ptr++ > *thresh_ptr++) {
233
                    h |=  bit_init;
234
                }
235
                bit_init >>= 1;
236
                if (bit_init == 0) {
237
                    bit_init = 0x80;
238
                    *halftone_ptr++ = h;
239
                    h = 0;
240
                }
241
                k--;
242
            } while (k > 0);
243
            if (bit_init != 0x80) {
244
                *halftone_ptr++ = h;
245
            }
246
            if ((width & 15) < 8)
247
                *halftone_ptr++ = 0;
248
        }
249
    }
250
#else
251
25.3k
    byte *contone_ptr;
252
25.3k
    byte *thresh_ptr;
253
25.3k
    byte *halftone_ptr;
254
25.3k
    int num_tiles = (width - offset_bits + 15)>>4;
255
25.3k
    int k, j;
256
257
69.9k
    for (j = 0; j < num_rows; j++) {
258
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
259
           two steps to ensure that we pack the bits in an aligned fashion
260
           into halftone_ptr.  */
261
44.5k
        contone_ptr = contone;
262
44.5k
        thresh_ptr = threshold_strip + contone_stride * j;
263
44.5k
        halftone_ptr = halftone + dithered_stride * j;
264
44.5k
        if (offset_bits > 0) {
265
            /* Since we allowed for 16 bits in our left remainder
266
               we can go directly in to the destination.  threshold_16_SSE
267
               requires 128 bit alignment.  contone_ptr and thresh_ptr
268
               are set up so that after we move in by offset_bits elements
269
               then we are 128 bit aligned.  */
270
30.2k
            threshold_16_SSE_unaligned(thresh_ptr, contone_ptr,
271
30.2k
                                       halftone_ptr);
272
30.2k
            halftone_ptr += 2;
273
30.2k
            thresh_ptr += offset_bits;
274
30.2k
            contone_ptr += offset_bits;
275
30.2k
        }
276
        /* Now we should have 128 bit aligned with our input data. Iterate
277
           over sets of 16 going directly into our HT buffer.  Sources and
278
           halftone_ptr buffers should be padded to allow 15 bit overrun */
279
1.32M
        for (k = 0; k < num_tiles; k++) {
280
1.28M
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
281
1.28M
            thresh_ptr += 16;
282
1.28M
            contone_ptr += 16;
283
1.28M
            halftone_ptr += 2;
284
1.28M
        }
285
44.5k
    }
286
25.3k
#endif
287
25.3k
}
288
289
/* SSE2 and non-SSE2 implememntation of thresholding a row. additive case  */
290
void
291
gx_ht_threshold_row_bit(byte *contone,  byte *threshold_strip,  int contone_stride,
292
                  byte *halftone, int dithered_stride, int width,
293
                  int num_rows, int offset_bits)
294
186k
{
295
#ifndef HAVE_SSE2
296
    int k, j;
297
    byte *contone_ptr;
298
    byte *thresh_ptr;
299
    byte *halftone_ptr;
300
    byte bit_init;
301
302
    /* For the moment just do a very slow compare until we get
303
       get this working.  This could use some serious optimization */
304
    width -= offset_bits;
305
    for (j = 0; j < num_rows; j++) {
306
        byte h;
307
        contone_ptr = contone;
308
        thresh_ptr = threshold_strip + contone_stride * j;
309
        halftone_ptr = halftone + dithered_stride * j;
310
        /* First get the left remainder portion.  Put into MSBs of first byte */
311
        bit_init = 0x80;
312
        h = 0;
313
        k = offset_bits;
314
        if (k > 0) {
315
            do {
316
                if (*contone_ptr++ < *thresh_ptr++) {
317
                    h |=  bit_init;
318
                }
319
                bit_init >>= 1;
320
                if (bit_init == 0) {
321
                    bit_init = 0x80;
322
                    *halftone_ptr++ = h;
323
                    h = 0;
324
                }
325
                k--;
326
            } while (k > 0);
327
            bit_init = 0x80;
328
            *halftone_ptr++ = h;
329
            h = 0;
330
            if (offset_bits < 8)
331
                *halftone_ptr++ = 0;
332
        }
333
        /* Now get the rest, which will be 16 bit aligned. */
334
        k = width;
335
        if (k > 0) {
336
            do {
337
                if (*contone_ptr++ < *thresh_ptr++) {
338
                    h |=  bit_init;
339
                }
340
                bit_init >>= 1;
341
                if (bit_init == 0) {
342
                    bit_init = 0x80;
343
                    *halftone_ptr++ = h;
344
                    h = 0;
345
                }
346
                k--;
347
            } while (k > 0);
348
            if (bit_init != 0x80) {
349
                *halftone_ptr++ = h;
350
            }
351
            if ((width & 15) < 8)
352
                *halftone_ptr++ = 0;
353
        }
354
    }
355
#else
356
186k
    byte *contone_ptr;
357
186k
    byte *thresh_ptr;
358
186k
    byte *halftone_ptr;
359
186k
    int num_tiles = (width - offset_bits + 15)>>4;
360
186k
    int k, j;
361
362
471k
    for (j = 0; j < num_rows; j++) {
363
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
364
           two steps to ensure that we pack the bits in an aligned fashion
365
           into halftone_ptr.  */
366
285k
        contone_ptr = contone;
367
285k
        thresh_ptr = threshold_strip + contone_stride * j;
368
285k
        halftone_ptr = halftone + dithered_stride * j;
369
285k
        if (offset_bits > 0) {
370
            /* Since we allowed for 16 bits in our left remainder
371
               we can go directly in to the destination.  threshold_16_SSE
372
               requires 128 bit alignment.  contone_ptr and thresh_ptr
373
               are set up so that after we move in by offset_bits elements
374
               then we are 128 bit aligned.  */
375
269k
            threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
376
269k
                                       halftone_ptr);
377
269k
            halftone_ptr += 2;
378
269k
            thresh_ptr += offset_bits;
379
269k
            contone_ptr += offset_bits;
380
269k
        }
381
        /* Now we should have 128 bit aligned with our input data. Iterate
382
           over sets of 16 going directly into our HT buffer.  Sources and
383
           halftone_ptr buffers should be padded to allow 15 bit overrun */
384
12.1M
        for (k = 0; k < num_tiles; k++) {
385
11.8M
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
386
11.8M
            thresh_ptr += 16;
387
11.8M
            contone_ptr += 16;
388
11.8M
            halftone_ptr += 2;
389
11.8M
        }
390
285k
    }
391
186k
#endif
392
186k
}
393
394
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
395
   Subtractive case */
396
void
397
gx_ht_threshold_landscape_sub(byte *contone_align, byte *thresh_align,
398
                    ht_landscape_info_t *ht_landscape, byte *halftone,
399
                    int data_length)
400
0
{
401
0
    __align16 byte contone[LAND_BITS];
402
0
    int position_start, position, curr_position;
403
0
    int *widths = &(ht_landscape->widths[0]);
404
0
    int local_widths[LAND_BITS];
405
0
    int num_contone = ht_landscape->num_contones;
406
0
    int k, j, w, contone_out_posit;
407
0
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
408
0
#ifdef PACIFY_VALGRIND
409
0
    int extra = 0;
410
0
#endif
411
412
    /* Work through chunks of 16.  */
413
    /* Data may have come in left to right or right to left. */
414
0
    if (ht_landscape->index > 0) {
415
0
        position = position_start = 0;
416
0
    } else {
417
0
        position = position_start = ht_landscape->curr_pos + 1;
418
0
    }
419
0
    thresh_ptr = thresh_align;
420
0
    halftone_ptr = halftone;
421
    /* Copy the widths to a local array, and truncate the last one (which may
422
     * be the first one!) if required. */
423
0
    k = 0;
424
0
    for (j = 0; j < num_contone; j++)
425
0
        k += (local_widths[j] = widths[position_start+j]);
426
0
    if (k > LAND_BITS) {
427
0
        if (ht_landscape->index > 0) {
428
0
            local_widths[num_contone-1] -= k-LAND_BITS;
429
0
        } else {
430
0
            local_widths[0] -= k-LAND_BITS;
431
0
        }
432
0
    }
433
0
#ifdef PACIFY_VALGRIND
434
0
    if (k < LAND_BITS) {
435
0
        extra = LAND_BITS - k;
436
0
    }
437
0
#endif
438
0
    for (k = data_length; k > 0; k--) { /* Loop on rows */
439
0
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
440
0
        curr_position = 0; /* We use this in keeping track of widths */
441
0
        contone_out_posit = 0; /* Our index out */
442
0
        for (j = num_contone; j > 0; j--) {
443
0
            byte c = *contone_ptr;
444
            /* The microsoft compiler, cleverly spots that the following loop
445
             * can be replaced by a memset. Unfortunately, it can't spot that
446
             * the typical length values of the memset are so small that we'd
447
             * be better off doing it the slow way. We therefore introduce a
448
             * sneaky 'volatile' cast below that stops this optimisation. */
449
0
            w = local_widths[curr_position];
450
0
            do {
451
0
                ((volatile byte *)contone)[contone_out_posit] = c;
452
0
                contone_out_posit++;
453
0
            } while (--w);
454
0
#ifdef PACIFY_VALGRIND
455
0
            if (extra)
456
0
                memset(contone+contone_out_posit, 0, extra);
457
0
#endif
458
0
            curr_position++; /* Move us to the next position in our width array */
459
0
            contone_ptr++;   /* Move us to a new location in our contone buffer */
460
0
        }
461
        /* Now we have our left justified and expanded contone data for
462
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
463
0
        contone_ptr = &contone[0];
464
0
#if LAND_BITS > 16
465
0
        j = LAND_BITS;
466
0
        do {
467
0
#endif
468
0
#ifdef HAVE_SSE2
469
0
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
470
#else
471
            threshold_16_bit(thresh_ptr, contone_ptr, halftone_ptr);
472
#endif
473
0
            thresh_ptr += 16;
474
0
            position += 16;
475
0
            halftone_ptr += 2;
476
0
            contone_ptr += 16;
477
0
#if LAND_BITS > 16
478
0
            j -= 16;
479
0
        } while (j > 0);
480
0
#endif
481
0
    }
482
0
}
483
484
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
485
   Additive case.  Note I could likely do some code reduction between
486
   the additive and subtractive cases */
487
void
488
gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
489
                    ht_landscape_info_t *ht_landscape, byte *halftone,
490
                    int data_length)
491
1
{
492
1
    __align16 byte contone[LAND_BITS];
493
1
    int position_start, position, curr_position;
494
1
    int *widths = &(ht_landscape->widths[0]);
495
1
    int local_widths[LAND_BITS];
496
1
    int num_contone = ht_landscape->num_contones;
497
1
    int k, j, w, contone_out_posit;
498
1
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
499
1
#ifdef PACIFY_VALGRIND
500
1
    int extra = 0;
501
1
#endif
502
503
    /* Work through chunks of 16.  */
504
    /* Data may have come in left to right or right to left. */
505
1
    if (ht_landscape->index > 0) {
506
1
        position = position_start = 0;
507
1
    } else {
508
0
        position = position_start = ht_landscape->curr_pos + 1;
509
0
    }
510
1
    thresh_ptr = thresh_align;
511
1
    halftone_ptr = halftone;
512
    /* Copy the widths to a local array, and truncate the last one (which may
513
     * be the first one!) if required. */
514
1
    k = 0;
515
2
    for (j = 0; j < num_contone; j++)
516
1
        k += (local_widths[j] = widths[position_start+j]);
517
1
    if (k > LAND_BITS) {
518
0
        if (ht_landscape->index > 0) {
519
0
            local_widths[num_contone-1] -= k-LAND_BITS;
520
0
        } else {
521
0
            local_widths[0] -= k-LAND_BITS;
522
0
        }
523
0
    }
524
1
#ifdef PACIFY_VALGRIND
525
1
    if (k < LAND_BITS) {
526
1
        extra = LAND_BITS - k;
527
1
    }
528
1
#endif
529
4
    for (k = data_length; k > 0; k--) { /* Loop on rows */
530
3
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
531
3
        curr_position = 0; /* We use this in keeping track of widths */
532
3
        contone_out_posit = 0; /* Our index out */
533
6
        for (j = num_contone; j > 0; j--) {
534
3
            byte c = *contone_ptr;
535
            /* The microsoft compiler, cleverly spots that the following loop
536
             * can be replaced by a memset. Unfortunately, it can't spot that
537
             * the typical length values of the memset are so small that we'd
538
             * be better off doing it the slow way. We therefore introduce a
539
             * sneaky 'volatile' cast below that stops this optimisation. */
540
3
            w = local_widths[curr_position];
541
9
            do {
542
9
                ((volatile byte *)contone)[contone_out_posit] = c;
543
9
                contone_out_posit++;
544
9
            } while (--w);
545
3
#ifdef PACIFY_VALGRIND
546
3
            if (extra)
547
3
                memset(contone+contone_out_posit, 0, extra);
548
3
#endif
549
3
            curr_position++; /* Move us to the next position in our width array */
550
3
            contone_ptr++;   /* Move us to a new location in our contone buffer */
551
3
        }
552
        /* Now we have our left justified and expanded contone data for
553
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
554
3
        contone_ptr = &contone[0];
555
3
#if LAND_BITS > 16
556
3
        j = LAND_BITS;
557
12
        do {
558
12
#endif
559
12
#ifdef HAVE_SSE2
560
12
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
561
#else
562
            threshold_16_bit(contone_ptr, thresh_ptr, halftone_ptr);
563
#endif
564
12
            thresh_ptr += 16;
565
12
            position += 16;
566
12
            halftone_ptr += 2;
567
12
            contone_ptr += 16;
568
12
#if LAND_BITS > 16
569
12
            j -= 16;
570
12
        } while (j > 0);
571
3
#endif
572
3
    }
573
1
}
574
575
int
576
gxht_thresh_image_init(gx_image_enum *penum)
577
2.40k
{
578
2.40k
    int code = 0;
579
2.40k
    fixed ox;
580
2.40k
    int temp;
581
2.40k
    int dev_width, max_height;
582
2.40k
    int spp_out;
583
2.40k
    int k;
584
2.40k
    gx_ht_order *d_order;
585
2.40k
    gx_dda_fixed dda_ht;
586
587
2.40k
    if (gx_device_must_halftone(penum->dev)) {
588
2.40k
        if (penum->pgs != NULL && penum->pgs->dev_ht[HT_OBJTYPE_DEFAULT] != NULL) {
589
2.40k
            gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
590
591
5.00k
            for (k = 0; k < pdht->num_comp; k++) {
592
2.60k
                d_order = &(pdht->components[k].corder);
593
2.60k
                code = gx_ht_construct_threshold(d_order, penum->dev,
594
2.60k
                                                 penum->pgs, k);
595
2.60k
                if (code < 0 ) {
596
0
                    return gs_rethrow(code, "threshold creation failed");
597
0
                }
598
2.60k
            }
599
2.40k
        } else {
600
0
            return -1;
601
0
        }
602
2.40k
    }
603
2.40k
    spp_out = penum->dev->color_info.num_components;
604
    /* Precompute values needed for rasterizing. */
605
2.40k
    penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2);
606
    /* If the image is landscaped then we want to maintain a buffer
607
       that is sufficiently large so that we can hold a byte
608
       of halftoned data along the column.  This way we avoid doing
609
       multiple writes into the same position over and over.
610
       The size of the buffer we need depends upon the bitdepth of
611
       the output device, the number of device coloranants and the
612
       number of  colorants in the source space.  Note we will
613
       need to eventually  consider  multi-level halftone case
614
       here too.  For now, to make use of the SSE2 stuff, we would
615
       like to have a multiple of 16 bytes of data to process at a time.
616
       So we will collect the columns of data in a buffer that is LAND_BITS
617
       wide.  We will also keep track of the widths of each column.  When
618
       the total width count reaches LAND_BITS, we will create our
619
       threshold array and apply it.  We may have one column that is
620
       buffered between calls in this case.  Also if a call is made
621
       with h=0 we will flush the buffer as we are at the end of the
622
       data.  */
623
2.40k
    if (penum->posture == image_landscape) {
624
1
        int col_length = fixed2int_var_rounded(any_abs(penum->x_extent.y));
625
1
        dda_ht = penum->dda.pixel0.y;
626
1
        if (penum->dxx > 0)
627
1
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
628
629
1
        ox = dda_current(penum->dda.pixel0.x);
630
1
        temp = gxht_dda_length(&dda_ht, penum->rect.w);
631
1
        if (col_length < temp)
632
0
            col_length = temp;          /* choose max to make sure line_size is large enough */
633
1
        temp = (col_length + LAND_BITS)/LAND_BITS;      /* round up to allow for offset bits */
634
        /* bitmap_raster() expects the width in bits, hence "* 8" */
635
1
        penum->line_size = bitmap_raster((temp * LAND_BITS) * 8);  /* The stride */
636
        /* Now we need at most LAND_BITS of these */
637
1
        penum->line = gs_alloc_bytes(penum->memory,
638
1
                                     LAND_BITS * penum->line_size * spp_out + 16,
639
1
                                     "gxht_thresh");
640
        /* Same with this.  However, we only need one plane here */
641
1
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
642
1
                                           penum->line_size * LAND_BITS + 16,
643
1
                                           "gxht_thresh");
644
        /* That maps into (LAND_BITS/8) bytes of Halftone data */
645
1
        penum->ht_buffer =
646
1
                        gs_alloc_bytes(penum->memory,
647
1
                           penum->line_size * (LAND_BITS>>3) * spp_out,
648
1
                           "gxht_thresh");
649
1
        penum->ht_plane_height = penum->line_size;
650
1
        penum->ht_stride = penum->line_size;
651
1
        if (penum->line == NULL || penum->thresh_buffer == NULL
652
1
                    || penum->ht_buffer == NULL)
653
0
            return -1;
654
1
        penum->ht_landscape.count = 0;
655
1
        penum->ht_landscape.num_contones = 0;
656
1
        if (penum->y_extent.x < 0) {
657
            /* Going right to left */
658
0
            penum->ht_landscape.curr_pos = LAND_BITS-1;
659
0
            penum->ht_landscape.index = -1;
660
1
        } else {
661
            /* Going left to right */
662
1
            penum->ht_landscape.curr_pos = 0;
663
1
            penum->ht_landscape.index = 1;
664
1
        }
665
1
        if (penum->x_extent.y < 0) {
666
0
            penum->ht_landscape.flipy = true;
667
0
            penum->ht_landscape.y_pos =
668
0
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y);
669
1
        } else {
670
1
            penum->ht_landscape.flipy = false;
671
1
            penum->ht_landscape.y_pos =
672
1
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y));
673
1
        }
674
1
        memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
675
1
        penum->ht_landscape.offset_set = false;
676
1
        penum->ht_offset_bits = 0; /* Will get set in call to render */
677
1
        if (code >= 0) {
678
1
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
679
1
            memset(penum->line, 0, LAND_BITS * penum->line_size * spp_out + 16);
680
1
            memset(penum->ht_buffer, 0, penum->line_size * (LAND_BITS>>3) * spp_out);
681
1
            memset(penum->thresh_buffer, 0, LAND_BITS * penum->line_size + 16);
682
1
#endif
683
1
        }
684
2.39k
    } else {
685
        /* In the portrait case we allocate a single line buffer
686
           in device width, a threshold buffer of the same size
687
           and possibly wider and the buffer for the halftoned
688
           bits. We have to do a bit of work to enable 16 byte
689
           boundary after an offset to ensure that we can make use
690
           of  the SSE2 operations for thresholding.  We do the
691
           allocations now to avoid doing them with every line */
692
2.39k
        dda_ht = penum->dda.pixel0.x;
693
2.39k
        if (penum->dxx > 0)
694
2.39k
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
695
        /* Initialize the ht_landscape stuff to zero */
696
2.39k
        memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t));
697
2.39k
        ox = dda_current(dda_ht);
698
2.39k
        dev_width = gxht_dda_length(&dda_ht, penum->rect.w);
699
        /* Get the bit position so that we can do a copy_mono for
700
           the left remainder and then 16 bit aligned copies for the
701
           rest.  The right remainder will be OK as it will land in
702
           the MSBit positions. Note the #define chunk bits16 in
703
           gdevm1.c.  Allow also for a 15 sample over run.
704
        */
705
2.39k
        penum->ht_offset_bits = (-fixed2int_var_rounded(ox)) & (bitmap_raster(1) - 1);
706
2.39k
        if (penum->ht_offset_bits > 0) {
707
1.43k
            penum->ht_stride = bitmap_raster((7 + (dev_width + 4)) + (ARCH_SIZEOF_LONG * 8));
708
1.43k
        } else {
709
962
            penum->ht_stride = bitmap_raster((7 + (dev_width + 2)) + (ARCH_SIZEOF_LONG * 8));
710
962
        }
711
        /* We want to figure out the maximum height that we may
712
           have in taking a single source row and going to device
713
           space */
714
2.39k
        max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) /
715
2.39k
                                            (float) penum->Height);
716
2.39k
        if (max_height <= 0)
717
0
            return -1;   /* shouldn't happen, but check so we don't div by zero */
718
2.39k
        if (penum->ht_stride * spp_out > max_int / max_height)
719
0
            return -1;         /* overflow */
720
721
2.39k
        penum->ht_buffer =
722
2.39k
                        gs_alloc_bytes(penum->memory,
723
2.39k
                           (size_t)penum->ht_stride * max_height * spp_out,
724
2.39k
                           "gxht_thresh");
725
2.39k
        penum->ht_plane_height = penum->ht_stride * max_height;
726
        /* We want to have 128 bit alignement for our contone and
727
           threshold strips so that we can use SSE operations
728
           in the threshold operation.  Add in a minor buffer and offset
729
           to ensure this.  If gs_alloc_bytes provides at least 16
730
           bit alignment so we may need to move 14 bytes.  However, the
731
           HT process is split in two operations.  One that involves
732
           the HT of a left remainder and the rest which ensures that
733
           we pack in the HT data in the bits with no skew for a fast
734
           copy into the gdevm1 device (16 bit copies).  So, we
735
           need to account for those pixels which occur first and which
736
           are NOT aligned for the contone buffer.  After we offset
737
           by this remainder portion we should be 128 bit aligned.
738
           Also allow a 15 sample over run during the execution.  */
739
2.39k
        temp = (int) ceil((float) ((dev_width + 15.0) + 15.0)/16.0);
740
2.39k
        penum->line_size = bitmap_raster(temp * 16 * 8);  /* The stride */
741
2.39k
        if (penum->line_size > max_int / max_height) {
742
0
            gs_free_object(penum->memory, penum->ht_buffer, "gxht_thresh");
743
0
            penum->ht_buffer = NULL;
744
0
            return -1;         /* thresh_buffer size overflow */
745
0
        }
746
2.39k
        penum->line = gs_alloc_bytes(penum->memory, penum->line_size * spp_out,
747
2.39k
                                     "gxht_thresh");
748
2.39k
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
749
2.39k
                                              (size_t)penum->line_size * max_height,
750
2.39k
                                              "gxht_thresh");
751
2.39k
        if (penum->line == NULL || penum->thresh_buffer == NULL ||
752
2.39k
            penum->ht_buffer == NULL) {
753
0
            return -1;
754
2.39k
        } else {
755
2.39k
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
756
2.39k
            memset(penum->line, 0, penum->line_size * spp_out);
757
2.39k
            memset(penum->ht_buffer, 0, penum->ht_stride * max_height * spp_out);
758
2.39k
            memset(penum->thresh_buffer, 0, penum->line_size * max_height);
759
2.39k
#endif
760
2.39k
        }
761
2.39k
    }
762
2.40k
    return code;
763
2.40k
}
764
765
static void
766
fill_threshold_buffer(byte *dest_strip, byte *src_strip, int src_width,
767
                       int left_offset, int left_width, int num_tiles,
768
                       int right_width)
769
329k
{
770
329k
    byte *ptr_out_temp = dest_strip;
771
329k
    int ii;
772
773
    /* Left part */
774
329k
    memcpy(dest_strip, src_strip + left_offset, left_width);
775
329k
    ptr_out_temp += left_width;
776
    /* Now the full parts */
777
34.0M
    for (ii = 0; ii < num_tiles; ii++){
778
33.7M
        memcpy(ptr_out_temp, src_strip, src_width);
779
33.7M
        ptr_out_temp += src_width;
780
33.7M
    }
781
    /* Now the remainder */
782
329k
    memcpy(ptr_out_temp, src_strip, right_width);
783
329k
#ifdef PACIFY_VALGRIND
784
329k
    ptr_out_temp += right_width;
785
329k
    ii = (dest_strip-ptr_out_temp) % (LAND_BITS-1);
786
329k
    if (ii > 0)
787
0
        memset(ptr_out_temp, 0, ii);
788
329k
#endif
789
329k
}
790
/* This only moves the data but does not do a reset of the variables.  Used
791
   for case where we have multiple bands of data (e.g. CMYK output) */
792
static void
793
move_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
794
                       int data_length)
795
0
{
796
0
    int k;
797
0
    int position_curr, position_new;
798
799
0
    if (ht_landscape->index < 0) {
800
        /* Moving right to left, move column to far right */
801
0
        position_curr = ht_landscape->curr_pos + 1;
802
0
        position_new = LAND_BITS-1;
803
0
    } else {
804
        /* Moving left to right, move column to far left */
805
0
        position_curr = ht_landscape->curr_pos - 1;
806
0
        position_new = 0;
807
0
    }
808
0
    if (position_curr != position_new) {
809
0
        for (k = 0; k < data_length; k++) {
810
0
                contone_align[position_new] = contone_align[position_curr];
811
0
                position_curr += LAND_BITS;
812
0
                position_new += LAND_BITS;
813
0
        }
814
0
    }
815
0
}
816
817
818
/* If we are in here, we had data left over.  Move it to the proper position
819
   and get ht_landscape_info_t set properly */
820
static void
821
reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
822
                       int data_length, int num_used)
823
0
{
824
0
    int delta;
825
0
    int curr_x_pos = ht_landscape->xstart;
826
827
0
    if (ht_landscape->index < 0) {
828
        /* Moving right to left, move column to far right */
829
0
        delta = ht_landscape->count - num_used;
830
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
831
0
        ht_landscape->widths[LAND_BITS-1] = delta;
832
0
        ht_landscape->curr_pos = LAND_BITS-2;
833
0
        ht_landscape->xstart = curr_x_pos - num_used;
834
0
    } else {
835
        /* Moving left to right, move column to far left */
836
0
        delta = ht_landscape->count - num_used;
837
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
838
0
        ht_landscape->widths[0] = delta;
839
0
        ht_landscape->curr_pos = 1;
840
0
        ht_landscape->xstart = curr_x_pos + num_used;
841
0
    }
842
0
    ht_landscape->count = delta;
843
0
    ht_landscape->num_contones = 1;
844
0
}
845
846
/* This performs a thresholding operation on multiple planes of data and
847
   stores the bits into a planar buffer which can then be used for
848
   copy_planes */
849
int
850
gxht_thresh_planes(gx_image_enum *penum, fixed xrun,
851
                   int dest_width, int dest_height,
852
                   byte *thresh_align, gx_device * dev, int offset_contone[],
853
                   int contone_stride)
854
192k
{
855
192k
    int thresh_width, thresh_height, dx;
856
192k
    int left_rem_end, left_width, vdi;
857
192k
    int num_full_tiles, right_tile_width;
858
192k
    int k, jj, dy, j;
859
192k
    byte *thresh_tile;
860
192k
    int position;
861
192k
    bool replicate_tile;
862
192k
    image_posture posture = penum->posture;
863
192k
    const int y_pos = penum->yci;
864
192k
    int width = 0; /* Init to silence compiler warnings */
865
192k
    byte *ptr_out, *row_ptr, *ptr_out_temp;
866
192k
    byte *threshold;
867
192k
    int init_tile, in_row_offset, ii, num_tiles, tile_remainder;
868
192k
    int offset_bits = penum->ht_offset_bits;
869
192k
    byte *halftone;
870
192k
    int dithered_stride = penum->ht_stride;
871
192k
    bool is_planar_dev = dev->is_planar;
872
192k
    gx_color_index dev_white = gx_device_white(dev);
873
192k
    gx_color_index dev_black = gx_device_black(dev);
874
192k
    int spp_out = dev->color_info.num_components;
875
192k
    byte *contone_align = NULL; /* Init to silence compiler warnings */
876
192k
    gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
877
878
    /* Go ahead and fill the threshold line buffer with tiled threshold values.
879
       First just grab the row or column that we are going to tile with and
880
       then do memcpy into the buffer */
881
882
    /* Figure out the tile steps.  Left offset, Number of tiles, Right offset. */
883
192k
    switch (posture) {
884
192k
        case image_portrait:
885
192k
            vdi = penum->hci;
886
            /*  Iterate over the vdi and fill up our threshold buffer.  We
887
                 also need to loop across the planes of data */
888
404k
            for (j = 0; j < spp_out; j++) {
889
211k
                bool threshold_inverted = pdht->components[j].corder.threshold_inverted;
890
891
211k
                thresh_width = pdht->components[j].corder.width;
892
211k
                thresh_height = pdht->components[j].corder.full_height;
893
211k
                halftone = penum->ht_buffer + j * vdi * dithered_stride;
894
                /* Compute the tiling positions with dest_width */
895
211k
                dx = (fixed2int_var_rounded(xrun) + penum->pgs->screen_phase[0].x) % thresh_width;
896
                /* Left remainder part */
897
211k
                left_rem_end = min(dx + dest_width, thresh_width);
898
                /* The left width of our tile part */
899
211k
                left_width = left_rem_end - dx;
900
                /* Now the middle part */
901
211k
                num_full_tiles =
902
211k
                    (int)fastfloor((dest_width - left_width)/ (float) thresh_width);
903
                /* Now the right part */
904
211k
                right_tile_width = dest_width -  num_full_tiles * thresh_width -
905
211k
                                   left_width;
906
                /* Get the proper threshold for the colorant count */
907
211k
                threshold = pdht->components[j].corder.threshold;
908
                /* Point to the proper contone data */
909
211k
                contone_align = penum->line + contone_stride * j +
910
211k
                                offset_contone[j];
911
541k
                for (k = 0; k < vdi; k++) {
912
                    /* Get a pointer to our tile row */
913
329k
                    dy = (penum->yci + k -
914
329k
                          penum->pgs->screen_phase[0].y) % thresh_height;
915
329k
                    if (dy < 0)
916
0
                        dy += thresh_height;
917
329k
                    thresh_tile = threshold + thresh_width * dy;
918
                    /* Fill the buffer, can be multiple rows.  Make sure
919
                       to update with stride */
920
329k
                    position = contone_stride * k;
921
                    /* Tile into the 128 bit aligned threshold strip */
922
329k
                    fill_threshold_buffer(&(thresh_align[position]),
923
329k
                                           thresh_tile, thresh_width, dx, left_width,
924
329k
                                           num_full_tiles, right_tile_width);
925
329k
                }
926
                /* Apply the threshold operation */
927
211k
                if (offset_bits > dest_width)
928
3
                    offset_bits = dest_width;
929
930
211k
                if (threshold_inverted ||
931
211k
                    (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE && is_planar_dev)) {
932
25.3k
                    gx_ht_threshold_row_bit_sub(contone_align, thresh_align, contone_stride,
933
25.3k
                                      halftone, dithered_stride, dest_width, vdi,
934
25.3k
                                      offset_bits);
935
186k
                } else {
936
186k
                    gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
937
186k
                          halftone, dithered_stride, dest_width, vdi,
938
186k
                          offset_bits);
939
186k
                }
940
211k
            }
941
            /* FIXME: An improvement here would be to generate the initial
942
             * offset_bits at the correct offset within the byte so that they
943
             * align with the remainder of the line. This would mean not
944
             * always packing them into the first offset_bits (in MSB order)
945
             * of our 16 bit word, but rather into the last offset_bits
946
             * (in MSB order) (except when the entire run is small!).
947
             *
948
             * This would enable us to do just one aligned copy_mono call for
949
             * the entire scanline. */
950
            /* Now do the copy mono or copy plane operation */
951
            /* First the left remainder bits */
952
192k
            if (offset_bits > 0) {
953
175k
                int x_pos = fixed2int_var_rounded(xrun);
954
175k
                if (!is_planar_dev) {
955
170k
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, dithered_stride,
956
170k
                                                 gx_no_bitmap_id, x_pos, y_pos,
957
170k
                                                 offset_bits, vdi, dev_white,
958
170k
                                                 dev_black);
959
170k
                } else {
960
4.64k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, dithered_stride,
961
4.64k
                                                 gx_no_bitmap_id, x_pos, y_pos,
962
4.64k
                                                 offset_bits, vdi, vdi);
963
4.64k
                }
964
175k
            }
965
192k
            if ((dest_width - offset_bits) > 0 ) {
966
                /* Now the primary aligned bytes */
967
192k
                int curr_width = dest_width - offset_bits;
968
192k
                int x_pos = fixed2int_var_rounded(xrun) + offset_bits;
969
                /* FIXME: This assumes the allowed offset_bits will always be <= 16 */
970
192k
                int xoffs = offset_bits > 0 ? 16 : 0;
971
972
192k
                if (!is_planar_dev) {
973
186k
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, xoffs, dithered_stride,
974
186k
                                                 gx_no_bitmap_id, x_pos, y_pos,
975
186k
                                                 curr_width, vdi, dev_white,
976
186k
                                                 dev_black);
977
186k
                } else {
978
6.33k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, xoffs, dithered_stride,
979
6.33k
                                                 gx_no_bitmap_id, x_pos, y_pos,
980
6.33k
                                                 curr_width, vdi, vdi);
981
6.33k
                }
982
192k
            }
983
984
192k
            break;
985
2
        case image_landscape:
986
            /* Go ahead and paint the chunk if we have LAND_BITS values or a
987
             * partial to get us in sync with the 1 bit devices 16 bit
988
             * positions. */
989
2
            vdi = penum->wci;
990
                /* Now do the haftoning into our buffer.  We basically check
991
                   first if we have enough data or are all done */
992
3
            while ( (penum->ht_landscape.count >= LAND_BITS ||
993
3
                   ((penum->ht_landscape.count >= offset_bits) &&
994
3
                    penum->ht_landscape.offset_set))) {
995
                /* Go ahead and 2D tile in the threshold buffer at this time */
996
                /* Always work the tiling from the upper left corner of our
997
                   LAND_BITS columns */
998
2
                for (j = 0; j < spp_out; j++) {
999
1
                    halftone = penum->ht_buffer +
1000
1
                                   j * penum->ht_plane_height * (LAND_BITS>>3);
1001
1
                    thresh_width = pdht->components[j].corder.width;
1002
1
                    thresh_height =
1003
1
                          pdht->components[j].corder.full_height;
1004
                    /* Get the proper threshold for the colorant count */
1005
1
                    threshold = pdht->components[j].corder.threshold;
1006
                    /* Point to the proper contone data */
1007
1
                    contone_align = penum->line + offset_contone[j] +
1008
1
                                      LAND_BITS * j * contone_stride;
1009
1
                    if (penum->ht_landscape.offset_set) {
1010
1
                        width = offset_bits;
1011
1
                    } else {
1012
0
                        width = LAND_BITS;
1013
0
                    }
1014
1
                    if (penum->y_extent.x < 0) {
1015
0
                        dx = penum->ht_landscape.xstart - width + 1;
1016
1
                    } else {
1017
1
                        dx = penum->ht_landscape.xstart;
1018
1
                    }
1019
1
                    dx = (dx + penum->pgs->screen_phase[0].x) % thresh_width;
1020
1
                    if (dx < 0)
1021
0
                        dx += thresh_width;
1022
1
                    dy = (penum->ht_landscape.y_pos -
1023
1
                              penum->pgs->screen_phase[0].y) % thresh_height;
1024
1
                    if (dy < 0)
1025
0
                        dy += thresh_height;
1026
                    /* Left remainder part */
1027
1
                    left_rem_end = min(dx + LAND_BITS, thresh_width);
1028
1
                    left_width = left_rem_end - dx;
1029
                    /* Now the middle part */
1030
1
                    num_full_tiles = (LAND_BITS - left_width) / thresh_width;
1031
                    /* Now the right part */
1032
1
                    right_tile_width =
1033
1
                        LAND_BITS - num_full_tiles * thresh_width - left_width;
1034
                    /* Now loop over the y stuff */
1035
1
                    ptr_out = thresh_align;
1036
                    /* Do this in three parts.  We do a top part, followed by
1037
                       larger mem copies followed by a bottom partial. After
1038
                       a slower initial fill we are able to do larger faster
1039
                       expansions */
1040
1
                    if (dest_height <= 2 * thresh_height) {
1041
1
                        init_tile = dest_height;
1042
1
                        replicate_tile = false;
1043
1
                    } else {
1044
0
                        init_tile = thresh_height;
1045
0
                        replicate_tile = true;
1046
0
                    }
1047
4
                    for (jj = 0; jj < init_tile; jj++) {
1048
3
                        in_row_offset = (jj + dy) % thresh_height;
1049
3
                        row_ptr = threshold + in_row_offset * thresh_width;
1050
3
                        ptr_out_temp = ptr_out;
1051
                        /* Left part */
1052
3
                        memcpy(ptr_out_temp, row_ptr + dx, left_width);
1053
3
                        ptr_out_temp += left_width;
1054
                        /* Now the full tiles */
1055
30
                        for (ii = 0; ii < num_full_tiles; ii++) {
1056
27
                            memcpy(ptr_out_temp, row_ptr, thresh_width);
1057
27
                            ptr_out_temp += thresh_width;
1058
27
                        }
1059
                        /* Now the remainder */
1060
3
                        memcpy(ptr_out_temp, row_ptr, right_tile_width);
1061
3
                        ptr_out += LAND_BITS;
1062
3
                    }
1063
1
                    if (replicate_tile) {
1064
                        /* Find out how many we need to copy */
1065
0
                        num_tiles =
1066
0
                            (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height);
1067
0
                        tile_remainder = dest_height - (num_tiles + 1) * thresh_height;
1068
0
                        for (jj = 0; jj < num_tiles; jj ++) {
1069
0
                            memcpy(ptr_out, thresh_align, LAND_BITS * thresh_height);
1070
0
                            ptr_out += LAND_BITS * thresh_height;
1071
0
                        }
1072
                        /* Now fill in the remainder */
1073
0
                        memcpy(ptr_out, thresh_align, LAND_BITS * tile_remainder);
1074
0
                    }
1075
                    /* Apply the threshold operation */
1076
1
                    if (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE
1077
1
                        && is_planar_dev) {
1078
0
                        gx_ht_threshold_landscape_sub(contone_align, thresh_align,
1079
0
                                            &(penum->ht_landscape), halftone, dest_height);
1080
1
                    } else {
1081
1
                        gx_ht_threshold_landscape(contone_align, thresh_align,
1082
1
                                            &(penum->ht_landscape), halftone, dest_height);
1083
1
                    }
1084
                    /* We may have a line left over that has to be maintained
1085
                       due to line replication in the resolution conversion. */
1086
1
                    if (width != penum->ht_landscape.count) {
1087
                        /* move the line do not reset the stuff */
1088
0
                        move_landscape_buffer(&(penum->ht_landscape),
1089
0
                                              contone_align, dest_height);
1090
0
                    }
1091
1
                }
1092
                /* Perform the copy mono */
1093
1
                if (penum->ht_landscape.index < 0) {
1094
0
                    if (!is_planar_dev) {
1095
0
                        (*dev_proc(dev, copy_mono))
1096
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1097
0
                                        gx_no_bitmap_id,
1098
0
                                        penum->ht_landscape.xstart - width + 1,
1099
0
                                        penum->ht_landscape.y_pos,
1100
0
                                        width, dest_height,
1101
0
                                        dev_white, dev_black);
1102
0
                    } else {
1103
0
                        (*dev_proc(dev, copy_planes))
1104
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1105
0
                                        gx_no_bitmap_id,
1106
0
                                        penum->ht_landscape.xstart - width + 1,
1107
0
                                        penum->ht_landscape.y_pos,
1108
0
                                        width, dest_height,
1109
0
                                        penum->ht_plane_height);
1110
0
                    }
1111
1
                } else {
1112
1
                    if (!is_planar_dev) {
1113
1
                        (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1114
1
                                                     gx_no_bitmap_id,
1115
1
                                                     penum->ht_landscape.xstart,
1116
1
                                                     penum->ht_landscape.y_pos,
1117
1
                                                     width, dest_height,
1118
1
                                                     dev_white, dev_black);
1119
1
                    } else {
1120
0
                        (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1121
0
                                                     gx_no_bitmap_id,
1122
0
                                                     penum->ht_landscape.xstart,
1123
0
                                                     penum->ht_landscape.y_pos,
1124
0
                                                     width, dest_height,
1125
0
                                                     penum->ht_plane_height);
1126
0
                    }
1127
1
                }
1128
1
                penum->ht_landscape.offset_set = false;
1129
1
                if (width != penum->ht_landscape.count) {
1130
0
                    reset_landscape_buffer(&(penum->ht_landscape),
1131
0
                                           contone_align, dest_height,
1132
0
                                           width);
1133
1
                } else {
1134
                    /* Reset the whole buffer */
1135
1
                    penum->ht_landscape.count = 0;
1136
1
                    if (penum->ht_landscape.index < 0) {
1137
                        /* Going right to left */
1138
0
                        penum->ht_landscape.curr_pos = LAND_BITS-1;
1139
1
                    } else {
1140
                        /* Going left to right */
1141
1
                        penum->ht_landscape.curr_pos = 0;
1142
1
                    }
1143
1
                    penum->ht_landscape.num_contones = 0;
1144
1
                    memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
1145
1
                }
1146
1
            }
1147
2
            break;
1148
0
        default:
1149
0
            return gs_rethrow(-1, "Invalid orientation for thresholding");
1150
192k
    }
1151
192k
    return 0;
1152
192k
}
1153
1154
int gxht_dda_length(gx_dda_fixed *dda, int src_size)
1155
195k
{
1156
195k
    gx_dda_fixed d = (*dda);
1157
195k
    dda_advance(d, src_size);
1158
195k
    return abs(fixed2int_var_rounded(dda_current(d)) - fixed2int_var_rounded(dda_current(*dda)));
1159
195k
}