Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/base/gxht_thresh.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2023 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/*$Id: gxhts_thresh.c  $ */
18
/* Halftone thresholding code */
19
20
#include <stdlib.h> /* abs() */
21
#include "memory_.h"
22
#include "gx.h"
23
#include "gxgstate.h"
24
#include "gsiparam.h"
25
#include "math_.h"
26
#include "gxfixed.h"  /* needed for gximage.h */
27
#include "gximage.h"
28
#include "gxdevice.h"
29
#include "gxdht.h"
30
#include "gxht_thresh.h"
31
#include "gzht.h"
32
#include "gxdevsop.h"
33
34
/* Enable the following define to perform a little extra work to stop
35
 * spurious valgrind errors. The code should perform perfectly even without
36
 * this enabled, but enabling it makes debugging much easier.
37
 */
38
/* #define PACIFY_VALGRIND */
39
40
#ifndef __WIN32__
41
0
#define __align16  __attribute__((aligned(16)))
42
#else
43
#define __align16 __declspec(align(16))
44
#endif
45
0
#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
46
47
#ifdef HAVE_SSE2
48
49
#include <emmintrin.h>
50
51
static const byte bitreverse[] =
52
{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
53
  0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
54
  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
55
  0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
56
  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
57
  0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
58
  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
59
  0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
60
  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
61
  0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
62
  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
63
  0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
64
  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
65
  0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
66
  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
67
  0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
68
  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
69
  0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
70
  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
71
  0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
72
  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
73
  0x3F, 0xBF, 0x7F, 0xFF};
74
#endif
75
76
#if RAW_HT_DUMP
77
/* This is slow thresholding, byte output for debug only */
78
void
79
gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
80
                              byte *halftone, int dithered_stride, int width,
81
                              int num_rows)
82
{
83
    int k, j;
84
    byte *contone_ptr;
85
    byte *thresh_ptr;
86
    byte *halftone_ptr;
87
88
    /* For the moment just do a very slow compare until we get
89
       get this working */
90
    for (j = 0; j < num_rows; j++) {
91
        contone_ptr = contone;
92
        thresh_ptr = threshold_strip + contone_stride * j;
93
        halftone_ptr = halftone + dithered_stride * j;
94
        for (k = 0; k < width; k++) {
95
            if (contone_ptr[k] < thresh_ptr[k]) {
96
                halftone_ptr[k] = 0;
97
            } else {
98
                halftone_ptr[k] = 255;
99
            }
100
        }
101
    }
102
}
103
#endif
104
105
#ifndef HAVE_SSE2
106
/* A simple case for use in the landscape mode. Could probably be coded up
107
   faster */
108
static void
109
threshold_16_bit(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
110
{
111
    int j;
112
113
    for (j = 2; j > 0; j--) {
114
        byte h = 0;
115
        byte bit_init = 0x80;
116
        do {
117
            if (*contone_ptr++ < *thresh_ptr++) {
118
                h |=  bit_init;
119
            }
120
            bit_init >>= 1;
121
        } while (bit_init != 0);
122
        *ht_data++ = h;
123
    }
124
}
125
#else
126
/* Note this function has strict data alignment needs */
127
static void
128
threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
129
0
{
130
0
    __m128i input1;
131
0
    __m128i input2;
132
0
    register int result_int;
133
0
    const unsigned int mask1 = 0x80808080;
134
0
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
135
136
    /* Load */
137
0
    input1 = _mm_load_si128((const __m128i *)contone_ptr);
138
0
    input2 = _mm_load_si128((const __m128i *) thresh_ptr);
139
    /* Unsigned subtraction does Unsigned saturation so we
140
       have to use the signed operation */
141
0
    input1 = _mm_xor_si128(input1, sign_fix);
142
0
    input2 = _mm_xor_si128(input2, sign_fix);
143
    /* Subtract the two */
144
0
    input2 = _mm_subs_epi8(input1, input2);
145
    /* Grab the sign mask */
146
0
    result_int = _mm_movemask_epi8(input2);
147
    /* bit wise reversal on 16 bit word */
148
0
    ht_data[0] = bitreverse[(result_int & 0xff)];
149
0
    ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
150
0
}
151
152
/* Not so fussy on its alignment */
153
static void
154
threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
155
0
{
156
0
    __m128i input1;
157
0
    __m128i input2;
158
0
    int result_int;
159
0
    byte *sse_data;
160
0
    const unsigned int mask1 = 0x80808080;
161
0
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
162
163
0
    sse_data = (byte*) &(result_int);
164
    /* Load */
165
0
    input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
166
0
    input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
167
    /* Unsigned subtraction does Unsigned saturation so we
168
       have to use the signed operation */
169
0
    input1 = _mm_xor_si128(input1, sign_fix);
170
0
    input2 = _mm_xor_si128(input2, sign_fix);
171
    /* Subtract the two */
172
0
    input2 = _mm_subs_epi8(input1, input2);
173
    /* Grab the sign mask */
174
0
    result_int = _mm_movemask_epi8(input2);
175
    /* bit wise reversal on 16 bit word */
176
0
    ht_data[0] = bitreverse[sse_data[0]];
177
0
    ht_data[1] = bitreverse[sse_data[1]];
178
0
}
179
#endif
180
181
/* SSE2 and non-SSE2 implememntation of thresholding a row. Subtractive case
182
   There is some code replication between the two of these (additive and subtractive)
183
   that I need to go back and determine how we can combine them without
184
   any performance loss. */
185
void
186
gx_ht_threshold_row_bit_sub(byte *contone,  byte *threshold_strip,  int contone_stride,
187
                  byte *halftone, int dithered_stride, int width,
188
                  int num_rows, int offset_bits)
189
0
{
190
#ifndef HAVE_SSE2
191
    int k, j;
192
    byte *contone_ptr;
193
    byte *thresh_ptr;
194
    byte *halftone_ptr;
195
    byte bit_init;
196
197
    /* For the moment just do a very slow compare until we get
198
       get this working.  This could use some serious optimization */
199
    width -= offset_bits;
200
    for (j = 0; j < num_rows; j++) {
201
        byte h;
202
        contone_ptr = contone;
203
        thresh_ptr = threshold_strip + contone_stride * j;
204
        halftone_ptr = halftone + dithered_stride * j;
205
        /* First get the left remainder portion.  Put into MSBs of first byte */
206
        bit_init = 0x80;
207
        h = 0;
208
        k = offset_bits;
209
        if (k > 0) {
210
            do {
211
                if (*contone_ptr++ > *thresh_ptr++) {
212
                    h |=  bit_init;
213
                }
214
                bit_init >>= 1;
215
                if (bit_init == 0) {
216
                    bit_init = 0x80;
217
                    *halftone_ptr++ = h;
218
                    h = 0;
219
                }
220
                k--;
221
            } while (k > 0);
222
            bit_init = 0x80;
223
            *halftone_ptr++ = h;
224
            h = 0;
225
            if (offset_bits < 8)
226
                *halftone_ptr++ = 0;
227
        }
228
        /* Now get the rest, which will be 16 bit aligned. */
229
        k = width;
230
        if (k > 0) {
231
            do {
232
                if (*contone_ptr++ > *thresh_ptr++) {
233
                    h |=  bit_init;
234
                }
235
                bit_init >>= 1;
236
                if (bit_init == 0) {
237
                    bit_init = 0x80;
238
                    *halftone_ptr++ = h;
239
                    h = 0;
240
                }
241
                k--;
242
            } while (k > 0);
243
            if (bit_init != 0x80) {
244
                *halftone_ptr++ = h;
245
            }
246
            if ((width & 15) < 8)
247
                *halftone_ptr++ = 0;
248
        }
249
    }
250
#else
251
0
    byte *contone_ptr;
252
0
    byte *thresh_ptr;
253
0
    byte *halftone_ptr;
254
0
    int num_tiles = (width - offset_bits + 15)>>4;
255
0
    int k, j;
256
257
0
    for (j = 0; j < num_rows; j++) {
258
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
259
           two steps to ensure that we pack the bits in an aligned fashion
260
           into halftone_ptr.  */
261
0
        contone_ptr = contone;
262
0
        thresh_ptr = threshold_strip + contone_stride * j;
263
0
        halftone_ptr = halftone + dithered_stride * j;
264
0
        if (offset_bits > 0) {
265
            /* Since we allowed for 16 bits in our left remainder
266
               we can go directly in to the destination.  threshold_16_SSE
267
               requires 128 bit alignment.  contone_ptr and thresh_ptr
268
               are set up so that after we move in by offset_bits elements
269
               then we are 128 bit aligned.  */
270
0
            threshold_16_SSE_unaligned(thresh_ptr, contone_ptr,
271
0
                                       halftone_ptr);
272
0
            halftone_ptr += 2;
273
0
            thresh_ptr += offset_bits;
274
0
            contone_ptr += offset_bits;
275
0
        }
276
        /* Now we should have 128 bit aligned with our input data. Iterate
277
           over sets of 16 going directly into our HT buffer.  Sources and
278
           halftone_ptr buffers should be padded to allow 15 bit overrun */
279
0
        for (k = 0; k < num_tiles; k++) {
280
0
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
281
0
            thresh_ptr += 16;
282
0
            contone_ptr += 16;
283
0
            halftone_ptr += 2;
284
0
        }
285
0
    }
286
0
#endif
287
0
}
288
289
/* SSE2 and non-SSE2 implememntation of thresholding a row. additive case  */
290
void
291
gx_ht_threshold_row_bit(byte *contone,  byte *threshold_strip,  int contone_stride,
292
                  byte *halftone, int dithered_stride, int width,
293
                  int num_rows, int offset_bits)
294
0
{
295
#ifndef HAVE_SSE2
296
    int k, j;
297
    byte *contone_ptr;
298
    byte *thresh_ptr;
299
    byte *halftone_ptr;
300
    byte bit_init;
301
302
    /* For the moment just do a very slow compare until we get
303
       get this working.  This could use some serious optimization */
304
    width -= offset_bits;
305
    for (j = 0; j < num_rows; j++) {
306
        byte h;
307
        contone_ptr = contone;
308
        thresh_ptr = threshold_strip + contone_stride * j;
309
        halftone_ptr = halftone + dithered_stride * j;
310
        /* First get the left remainder portion.  Put into MSBs of first byte */
311
        bit_init = 0x80;
312
        h = 0;
313
        k = offset_bits;
314
        if (k > 0) {
315
            do {
316
                if (*contone_ptr++ < *thresh_ptr++) {
317
                    h |=  bit_init;
318
                }
319
                bit_init >>= 1;
320
                if (bit_init == 0) {
321
                    bit_init = 0x80;
322
                    *halftone_ptr++ = h;
323
                    h = 0;
324
                }
325
                k--;
326
            } while (k > 0);
327
            bit_init = 0x80;
328
            *halftone_ptr++ = h;
329
            h = 0;
330
            if (offset_bits < 8)
331
                *halftone_ptr++ = 0;
332
        }
333
        /* Now get the rest, which will be 16 bit aligned. */
334
        k = width;
335
        if (k > 0) {
336
            do {
337
                if (*contone_ptr++ < *thresh_ptr++) {
338
                    h |=  bit_init;
339
                }
340
                bit_init >>= 1;
341
                if (bit_init == 0) {
342
                    bit_init = 0x80;
343
                    *halftone_ptr++ = h;
344
                    h = 0;
345
                }
346
                k--;
347
            } while (k > 0);
348
            if (bit_init != 0x80) {
349
                *halftone_ptr++ = h;
350
            }
351
            if ((width & 15) < 8)
352
                *halftone_ptr++ = 0;
353
        }
354
    }
355
#else
356
0
    byte *contone_ptr;
357
0
    byte *thresh_ptr;
358
0
    byte *halftone_ptr;
359
0
    int num_tiles = (width - offset_bits + 15)>>4;
360
0
    int k, j;
361
362
0
    for (j = 0; j < num_rows; j++) {
363
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
364
           two steps to ensure that we pack the bits in an aligned fashion
365
           into halftone_ptr.  */
366
0
        contone_ptr = contone;
367
0
        thresh_ptr = threshold_strip + contone_stride * j;
368
0
        halftone_ptr = halftone + dithered_stride * j;
369
0
        if (offset_bits > 0) {
370
            /* Since we allowed for 16 bits in our left remainder
371
               we can go directly in to the destination.  threshold_16_SSE
372
               requires 128 bit alignment.  contone_ptr and thresh_ptr
373
               are set up so that after we move in by offset_bits elements
374
               then we are 128 bit aligned.  */
375
0
            threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
376
0
                                       halftone_ptr);
377
0
            halftone_ptr += 2;
378
0
            thresh_ptr += offset_bits;
379
0
            contone_ptr += offset_bits;
380
0
        }
381
        /* Now we should have 128 bit aligned with our input data. Iterate
382
           over sets of 16 going directly into our HT buffer.  Sources and
383
           halftone_ptr buffers should be padded to allow 15 bit overrun */
384
0
        for (k = 0; k < num_tiles; k++) {
385
0
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
386
0
            thresh_ptr += 16;
387
0
            contone_ptr += 16;
388
0
            halftone_ptr += 2;
389
0
        }
390
0
    }
391
0
#endif
392
0
}
393
394
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
395
   Subtractive case */
396
void
397
gx_ht_threshold_landscape_sub(byte *contone_align, byte *thresh_align,
398
                    ht_landscape_info_t *ht_landscape, byte *halftone,
399
                    int data_length)
400
0
{
401
0
    __align16 byte contone[LAND_BITS];
402
0
    int position_start, position, curr_position;
403
0
    int *widths = &(ht_landscape->widths[0]);
404
0
    int local_widths[LAND_BITS];
405
0
    int num_contone = ht_landscape->num_contones;
406
0
    int k, j, w, contone_out_posit;
407
0
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
408
0
#ifdef PACIFY_VALGRIND
409
0
    int extra = 0;
410
0
#endif
411
412
    /* Work through chunks of 16.  */
413
    /* Data may have come in left to right or right to left. */
414
0
    if (ht_landscape->index > 0) {
415
0
        position = position_start = 0;
416
0
    } else {
417
0
        position = position_start = ht_landscape->curr_pos + 1;
418
0
    }
419
0
    thresh_ptr = thresh_align;
420
0
    halftone_ptr = halftone;
421
    /* Copy the widths to a local array, and truncate the last one (which may
422
     * be the first one!) if required. */
423
0
    k = 0;
424
0
    for (j = 0; j < num_contone; j++)
425
0
        k += (local_widths[j] = widths[position_start+j]);
426
0
    if (k > LAND_BITS) {
427
0
        if (ht_landscape->index > 0) {
428
0
            local_widths[num_contone-1] -= k-LAND_BITS;
429
0
        } else {
430
0
            local_widths[0] -= k-LAND_BITS;
431
0
        }
432
0
    }
433
0
#ifdef PACIFY_VALGRIND
434
0
    if (k < LAND_BITS) {
435
0
        extra = LAND_BITS - k;
436
0
    }
437
0
#endif
438
0
    for (k = data_length; k > 0; k--) { /* Loop on rows */
439
0
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
440
0
        curr_position = 0; /* We use this in keeping track of widths */
441
0
        contone_out_posit = 0; /* Our index out */
442
0
        for (j = num_contone; j > 0; j--) {
443
0
            byte c = *contone_ptr;
444
            /* The microsoft compiler, cleverly spots that the following loop
445
             * can be replaced by a memset. Unfortunately, it can't spot that
446
             * the typical length values of the memset are so small that we'd
447
             * be better off doing it the slow way. We therefore introduce a
448
             * sneaky 'volatile' cast below that stops this optimisation. */
449
0
            w = local_widths[curr_position];
450
0
            do {
451
0
                ((volatile byte *)contone)[contone_out_posit] = c;
452
0
                contone_out_posit++;
453
0
            } while (--w);
454
0
#ifdef PACIFY_VALGRIND
455
0
            if (extra)
456
0
                memset(contone+contone_out_posit, 0, extra);
457
0
#endif
458
0
            curr_position++; /* Move us to the next position in our width array */
459
0
            contone_ptr++;   /* Move us to a new location in our contone buffer */
460
0
        }
461
        /* Now we have our left justified and expanded contone data for
462
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
463
0
        contone_ptr = &contone[0];
464
0
#if LAND_BITS > 16
465
0
        j = LAND_BITS;
466
0
        do {
467
0
#endif
468
0
#ifdef HAVE_SSE2
469
0
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
470
#else
471
            threshold_16_bit(thresh_ptr, contone_ptr, halftone_ptr);
472
#endif
473
0
            thresh_ptr += 16;
474
0
            position += 16;
475
0
            halftone_ptr += 2;
476
0
            contone_ptr += 16;
477
0
#if LAND_BITS > 16
478
0
            j -= 16;
479
0
        } while (j > 0);
480
0
#endif
481
0
    }
482
0
}
483
484
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
485
   Additive case.  Note I could likely do some code reduction between
486
   the additive and subtractive cases */
487
void
488
gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
489
                    ht_landscape_info_t *ht_landscape, byte *halftone,
490
                    int data_length)
491
0
{
492
0
    __align16 byte contone[LAND_BITS];
493
0
    int position_start, position, curr_position;
494
0
    int *widths = &(ht_landscape->widths[0]);
495
0
    int local_widths[LAND_BITS];
496
0
    int num_contone = ht_landscape->num_contones;
497
0
    int k, j, w, contone_out_posit;
498
0
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
499
0
#ifdef PACIFY_VALGRIND
500
0
    int extra = 0;
501
0
#endif
502
503
    /* Work through chunks of 16.  */
504
    /* Data may have come in left to right or right to left. */
505
0
    if (ht_landscape->index > 0) {
506
0
        position = position_start = 0;
507
0
    } else {
508
0
        position = position_start = ht_landscape->curr_pos + 1;
509
0
    }
510
0
    thresh_ptr = thresh_align;
511
0
    halftone_ptr = halftone;
512
    /* Copy the widths to a local array, and truncate the last one (which may
513
     * be the first one!) if required. */
514
0
    k = 0;
515
0
    for (j = 0; j < num_contone; j++)
516
0
        k += (local_widths[j] = widths[position_start+j]);
517
0
    if (k > LAND_BITS) {
518
0
        if (ht_landscape->index > 0) {
519
0
            local_widths[num_contone-1] -= k-LAND_BITS;
520
0
        } else {
521
0
            local_widths[0] -= k-LAND_BITS;
522
0
        }
523
0
    }
524
0
#ifdef PACIFY_VALGRIND
525
0
    if (k < LAND_BITS) {
526
0
        extra = LAND_BITS - k;
527
0
    }
528
0
#endif
529
0
    for (k = data_length; k > 0; k--) { /* Loop on rows */
530
0
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
531
0
        curr_position = 0; /* We use this in keeping track of widths */
532
0
        contone_out_posit = 0; /* Our index out */
533
0
        for (j = num_contone; j > 0; j--) {
534
0
            byte c = *contone_ptr;
535
            /* The microsoft compiler, cleverly spots that the following loop
536
             * can be replaced by a memset. Unfortunately, it can't spot that
537
             * the typical length values of the memset are so small that we'd
538
             * be better off doing it the slow way. We therefore introduce a
539
             * sneaky 'volatile' cast below that stops this optimisation. */
540
0
            w = local_widths[curr_position];
541
0
            do {
542
0
                ((volatile byte *)contone)[contone_out_posit] = c;
543
0
                contone_out_posit++;
544
0
            } while (--w);
545
0
#ifdef PACIFY_VALGRIND
546
0
            if (extra)
547
0
                memset(contone+contone_out_posit, 0, extra);
548
0
#endif
549
0
            curr_position++; /* Move us to the next position in our width array */
550
0
            contone_ptr++;   /* Move us to a new location in our contone buffer */
551
0
        }
552
        /* Now we have our left justified and expanded contone data for
553
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
554
0
        contone_ptr = &contone[0];
555
0
#if LAND_BITS > 16
556
0
        j = LAND_BITS;
557
0
        do {
558
0
#endif
559
0
#ifdef HAVE_SSE2
560
0
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
561
#else
562
            threshold_16_bit(contone_ptr, thresh_ptr, halftone_ptr);
563
#endif
564
0
            thresh_ptr += 16;
565
0
            position += 16;
566
0
            halftone_ptr += 2;
567
0
            contone_ptr += 16;
568
0
#if LAND_BITS > 16
569
0
            j -= 16;
570
0
        } while (j > 0);
571
0
#endif
572
0
    }
573
0
}
574
575
int
576
gxht_thresh_image_init(gx_image_enum *penum)
577
0
{
578
0
    int code = 0;
579
0
    fixed ox;
580
0
    int temp;
581
0
    int dev_width, max_height;
582
0
    int spp_out;
583
0
    int k;
584
0
    gx_ht_order *d_order;
585
0
    gx_dda_fixed dda_ht;
586
587
0
    if (gx_device_must_halftone(penum->dev)) {
588
0
        if (penum->pgs != NULL && penum->pgs->dev_ht[HT_OBJTYPE_DEFAULT] != NULL) {
589
0
            gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
590
591
0
            for (k = 0; k < pdht->num_comp; k++) {
592
0
                d_order = &(pdht->components[k].corder);
593
0
                code = gx_ht_construct_threshold(d_order, penum->dev,
594
0
                                                 penum->pgs, k);
595
0
                if (code < 0 ) {
596
0
                    return gs_rethrow(code, "threshold creation failed");
597
0
                }
598
0
            }
599
0
        } else {
600
0
            return -1;
601
0
        }
602
0
    }
603
0
    spp_out = penum->dev->color_info.num_components;
604
    /* Precompute values needed for rasterizing. */
605
0
    penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2);
606
    /* If the image is landscaped then we want to maintain a buffer
607
       that is sufficiently large so that we can hold a byte
608
       of halftoned data along the column.  This way we avoid doing
609
       multiple writes into the same position over and over.
610
       The size of the buffer we need depends upon the bitdepth of
611
       the output device, the number of device coloranants and the
612
       number of  colorants in the source space.  Note we will
613
       need to eventually  consider  multi-level halftone case
614
       here too.  For now, to make use of the SSE2 stuff, we would
615
       like to have a multiple of 16 bytes of data to process at a time.
616
       So we will collect the columns of data in a buffer that is LAND_BITS
617
       wide.  We will also keep track of the widths of each column.  When
618
       the total width count reaches LAND_BITS, we will create our
619
       threshold array and apply it.  We may have one column that is
620
       buffered between calls in this case.  Also if a call is made
621
       with h=0 we will flush the buffer as we are at the end of the
622
       data.  */
623
0
    if (penum->posture == image_landscape) {
624
0
        int col_length = fixed2int_var_rounded(any_abs(penum->x_extent.y));
625
0
        dda_ht = penum->dda.pixel0.y;
626
0
        if (penum->dxx > 0)
627
0
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
628
629
0
        ox = dda_current(penum->dda.pixel0.x);
630
0
        temp = gxht_dda_length(&dda_ht, penum->rect.w);
631
0
        if (col_length < temp)
632
0
            col_length = temp;          /* choose max to make sure line_size is large enough */
633
0
        temp = (col_length + LAND_BITS)/LAND_BITS;      /* round up to allow for offset bits */
634
        /* bitmap_raster() expects the width in bits, hence "* 8" */
635
0
        penum->line_size = bitmap_raster((temp * LAND_BITS) * 8);  /* The stride */
636
        /* Now we need at most LAND_BITS of these */
637
0
        penum->line = gs_alloc_bytes(penum->memory,
638
0
                                     LAND_BITS * penum->line_size * spp_out + 16,
639
0
                                     "gxht_thresh");
640
        /* Same with this.  However, we only need one plane here */
641
0
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
642
0
                                           penum->line_size * LAND_BITS + 16,
643
0
                                           "gxht_thresh");
644
        /* That maps into (LAND_BITS/8) bytes of Halftone data */
645
0
        penum->ht_buffer =
646
0
                        gs_alloc_bytes(penum->memory,
647
0
                           penum->line_size * (LAND_BITS>>3) * spp_out,
648
0
                           "gxht_thresh");
649
0
        penum->ht_plane_height = penum->line_size;
650
0
        penum->ht_stride = penum->line_size;
651
0
        if (penum->line == NULL || penum->thresh_buffer == NULL
652
0
                    || penum->ht_buffer == NULL)
653
0
            return -1;
654
0
        penum->ht_landscape.count = 0;
655
0
        penum->ht_landscape.num_contones = 0;
656
0
        if (penum->y_extent.x < 0) {
657
            /* Going right to left */
658
0
            penum->ht_landscape.curr_pos = LAND_BITS-1;
659
0
            penum->ht_landscape.index = -1;
660
0
        } else {
661
            /* Going left to right */
662
0
            penum->ht_landscape.curr_pos = 0;
663
0
            penum->ht_landscape.index = 1;
664
0
        }
665
0
        if (penum->x_extent.y < 0) {
666
0
            penum->ht_landscape.flipy = true;
667
0
            penum->ht_landscape.y_pos =
668
0
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y);
669
0
        } else {
670
0
            penum->ht_landscape.flipy = false;
671
0
            penum->ht_landscape.y_pos =
672
0
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y));
673
0
        }
674
0
        memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
675
0
        penum->ht_landscape.offset_set = false;
676
0
        penum->ht_offset_bits = 0; /* Will get set in call to render */
677
0
        if (code >= 0) {
678
0
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
679
0
            memset(penum->line, 0, LAND_BITS * penum->line_size * spp_out + 16);
680
0
            memset(penum->ht_buffer, 0, penum->line_size * (LAND_BITS>>3) * spp_out);
681
0
            memset(penum->thresh_buffer, 0, LAND_BITS * penum->line_size + 16);
682
0
#endif
683
0
        }
684
0
    } else {
685
        /* In the portrait case we allocate a single line buffer
686
           in device width, a threshold buffer of the same size
687
           and possibly wider and the buffer for the halftoned
688
           bits. We have to do a bit of work to enable 16 byte
689
           boundary after an offset to ensure that we can make use
690
           of  the SSE2 operations for thresholding.  We do the
691
           allocations now to avoid doing them with every line */
692
0
        dda_ht = penum->dda.pixel0.x;
693
0
        if (penum->dxx > 0)
694
0
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
695
        /* Initialize the ht_landscape stuff to zero */
696
0
        memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t));
697
0
        ox = dda_current(dda_ht);
698
0
        dev_width = gxht_dda_length(&dda_ht, penum->rect.w);
699
        /* Get the bit position so that we can do a copy_mono for
700
           the left remainder and then 16 bit aligned copies for the
701
           rest.  The right remainder will be OK as it will land in
702
           the MSBit positions. Note the #define chunk bits16 in
703
           gdevm1.c.  Allow also for a 15 sample over run.
704
        */
705
0
        penum->ht_offset_bits = (-fixed2int_var_rounded(ox)) & (bitmap_raster(1) - 1);
706
0
        if (penum->ht_offset_bits > 0) {
707
0
            penum->ht_stride = bitmap_raster((7 + (dev_width + 4)) + (ARCH_SIZEOF_LONG * 8));
708
0
        } else {
709
0
            penum->ht_stride = bitmap_raster((7 + (dev_width + 2)) + (ARCH_SIZEOF_LONG * 8));
710
0
        }
711
        /* We want to figure out the maximum height that we may
712
           have in taking a single source row and going to device
713
           space */
714
0
        max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) /
715
0
                                            (float) penum->Height);
716
0
        if (max_height <= 0)
717
0
            return -1;   /* shouldn't happen, but check so we don't div by zero */
718
0
        if (penum->ht_stride * spp_out > max_int / max_height)
719
0
            return -1;         /* overflow */
720
721
0
        penum->ht_buffer =
722
0
                        gs_alloc_bytes(penum->memory,
723
0
                           (size_t)penum->ht_stride * max_height * spp_out,
724
0
                           "gxht_thresh");
725
0
        penum->ht_plane_height = penum->ht_stride * max_height;
726
        /* We want to have 128 bit alignement for our contone and
727
           threshold strips so that we can use SSE operations
728
           in the threshold operation.  Add in a minor buffer and offset
729
           to ensure this.  If gs_alloc_bytes provides at least 16
730
           bit alignment so we may need to move 14 bytes.  However, the
731
           HT process is split in two operations.  One that involves
732
           the HT of a left remainder and the rest which ensures that
733
           we pack in the HT data in the bits with no skew for a fast
734
           copy into the gdevm1 device (16 bit copies).  So, we
735
           need to account for those pixels which occur first and which
736
           are NOT aligned for the contone buffer.  After we offset
737
           by this remainder portion we should be 128 bit aligned.
738
           Also allow a 15 sample over run during the execution.  */
739
0
        temp = (int) ceil((float) ((dev_width + 15.0) + 15.0)/16.0);
740
0
        penum->line_size = bitmap_raster(temp * 16 * 8);  /* The stride */
741
0
        if (penum->line_size > max_int / max_height) {
742
0
            gs_free_object(penum->memory, penum->ht_buffer, "gxht_thresh");
743
0
            penum->ht_buffer = NULL;
744
0
            return -1;         /* thresh_buffer size overflow */
745
0
        }
746
0
        penum->line = gs_alloc_bytes(penum->memory, penum->line_size * spp_out,
747
0
                                     "gxht_thresh");
748
0
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
749
0
                                              (size_t)penum->line_size * max_height,
750
0
                                              "gxht_thresh");
751
0
        if (penum->line == NULL || penum->thresh_buffer == NULL ||
752
0
            penum->ht_buffer == NULL) {
753
0
            return -1;
754
0
        } else {
755
0
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
756
0
            memset(penum->line, 0, penum->line_size * spp_out);
757
0
            memset(penum->ht_buffer, 0, penum->ht_stride * max_height * spp_out);
758
0
            memset(penum->thresh_buffer, 0, penum->line_size * max_height);
759
0
#endif
760
0
        }
761
0
    }
762
0
    return code;
763
0
}
764
765
static void
766
fill_threshold_buffer(byte *dest_strip, byte *src, byte *src_strip, int src_width,
767
                       int left_offset, int left_width, int num_tiles,
768
                       int right_width)
769
0
{
770
0
    byte *ptr_out_temp = dest_strip;
771
0
    int ii;
772
773
    /* Make sure we don't try and read before the start of the threshold array. This can happen
774
     * if we drop to the beginning of the array, AND we have a negative left_offset. If we do
775
     * have a negative left_offset this represents an area we won't actually be using, but we need
776
     * to move along the threshold array until we get to the point where we copy data we will use.
777
     * So lets simply avoid reading before the start of the data. We can leave the destination
778
     * buffer uninitialised because we won't be reading from that area. Bug #706795 but the ASAN
779
     * error occurs on a number of input files in the test suite.
780
     */
781
0
    if (src_strip + left_offset < src) {
782
0
        int under = src - (src_strip + left_offset);
783
0
        left_offset += under;
784
0
        ptr_out_temp += under;
785
0
        left_width -= under;
786
0
        if (left_width < 0)
787
0
            left_width = 0;
788
0
    }
789
    /* Left part */
790
0
    memcpy(ptr_out_temp, src_strip + left_offset, left_width);
791
0
    ptr_out_temp += left_width;
792
    /* Now the full parts */
793
0
    for (ii = 0; ii < num_tiles; ii++){
794
0
        memcpy(ptr_out_temp, src_strip, src_width);
795
0
        ptr_out_temp += src_width;
796
0
    }
797
    /* Now the remainder */
798
0
    memcpy(ptr_out_temp, src_strip, right_width);
799
0
#ifdef PACIFY_VALGRIND
800
0
    ptr_out_temp += right_width;
801
0
    ii = (dest_strip-ptr_out_temp) % (LAND_BITS-1);
802
0
    if (ii > 0)
803
0
        memset(ptr_out_temp, 0, ii);
804
0
#endif
805
0
}
806
/* This only moves the data but does not do a reset of the variables.  Used
807
   for case where we have multiple bands of data (e.g. CMYK output) */
808
static void
809
move_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
810
                       int data_length)
811
0
{
812
0
    int k;
813
0
    int position_curr, position_new;
814
815
0
    if (ht_landscape->index < 0) {
816
        /* Moving right to left, move column to far right */
817
0
        position_curr = ht_landscape->curr_pos + 1;
818
0
        position_new = LAND_BITS-1;
819
0
    } else {
820
        /* Moving left to right, move column to far left */
821
0
        position_curr = ht_landscape->curr_pos - 1;
822
0
        position_new = 0;
823
0
    }
824
0
    if (position_curr != position_new) {
825
0
        for (k = 0; k < data_length; k++) {
826
0
                contone_align[position_new] = contone_align[position_curr];
827
0
                position_curr += LAND_BITS;
828
0
                position_new += LAND_BITS;
829
0
        }
830
0
    }
831
0
}
832
833
834
/* If we are in here, we had data left over.  Move it to the proper position
835
   and get ht_landscape_info_t set properly */
836
static void
837
reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
838
                       int data_length, int num_used)
839
0
{
840
0
    int delta;
841
0
    int curr_x_pos = ht_landscape->xstart;
842
843
0
    if (ht_landscape->index < 0) {
844
        /* Moving right to left, move column to far right */
845
0
        delta = ht_landscape->count - num_used;
846
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
847
0
        ht_landscape->widths[LAND_BITS-1] = delta;
848
0
        ht_landscape->curr_pos = LAND_BITS-2;
849
0
        ht_landscape->xstart = curr_x_pos - num_used;
850
0
    } else {
851
        /* Moving left to right, move column to far left */
852
0
        delta = ht_landscape->count - num_used;
853
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
854
0
        ht_landscape->widths[0] = delta;
855
0
        ht_landscape->curr_pos = 1;
856
0
        ht_landscape->xstart = curr_x_pos + num_used;
857
0
    }
858
0
    ht_landscape->count = delta;
859
0
    ht_landscape->num_contones = 1;
860
0
}
861
862
/* This performs a thresholding operation on multiple planes of data and
863
   stores the bits into a planar buffer which can then be used for
864
   copy_planes */
865
int
866
gxht_thresh_planes(gx_image_enum *penum, fixed xrun,
867
                   int dest_width, int dest_height,
868
                   byte *thresh_align, gx_device * dev, int offset_contone[],
869
                   int contone_stride)
870
0
{
871
0
    int thresh_width, thresh_height, dx;
872
0
    int left_rem_end, left_width, vdi;
873
0
    int num_full_tiles, right_tile_width;
874
0
    int k, jj, dy, j;
875
0
    byte *thresh_tile;
876
0
    int position;
877
0
    bool replicate_tile;
878
0
    image_posture posture = penum->posture;
879
0
    const int y_pos = penum->yci;
880
0
    int width = 0; /* Init to silence compiler warnings */
881
0
    byte *ptr_out, *row_ptr, *ptr_out_temp;
882
0
    byte *threshold;
883
0
    int init_tile, in_row_offset, ii, num_tiles, tile_remainder;
884
0
    int offset_bits = penum->ht_offset_bits;
885
0
    byte *halftone;
886
0
    int dithered_stride = penum->ht_stride;
887
0
    bool is_planar_dev = dev->num_planar_planes;
888
0
    gx_color_index dev_white = gx_device_white(dev);
889
0
    gx_color_index dev_black = gx_device_black(dev);
890
0
    int spp_out = dev->color_info.num_components;
891
0
    byte *contone_align = NULL; /* Init to silence compiler warnings */
892
0
    gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
893
894
    /* Go ahead and fill the threshold line buffer with tiled threshold values.
895
       First just grab the row or column that we are going to tile with and
896
       then do memcpy into the buffer */
897
898
    /* Figure out the tile steps.  Left offset, Number of tiles, Right offset. */
899
0
    switch (posture) {
900
0
        case image_portrait:
901
0
            vdi = penum->hci;
902
            /*  Iterate over the vdi and fill up our threshold buffer.  We
903
                 also need to loop across the planes of data */
904
0
            for (j = 0; j < spp_out; j++) {
905
0
                bool threshold_inverted = pdht->components[j].corder.threshold_inverted;
906
907
0
                thresh_width = pdht->components[j].corder.width;
908
0
                thresh_height = pdht->components[j].corder.full_height;
909
0
                halftone = penum->ht_buffer + j * vdi * dithered_stride;
910
                /* Compute the tiling positions with dest_width */
911
0
                dx = (fixed2int_var_rounded(xrun) + penum->pgs->screen_phase[0].x) % thresh_width;
912
                /* Left remainder part */
913
0
                left_rem_end = min(dx + dest_width, thresh_width);
914
                /* The left width of our tile part */
915
0
                left_width = left_rem_end - dx;
916
                /* Now the middle part */
917
0
                num_full_tiles =
918
0
                    (int)fastfloor((dest_width - left_width)/ (float) thresh_width);
919
                /* Now the right part */
920
0
                right_tile_width = dest_width -  num_full_tiles * thresh_width -
921
0
                                   left_width;
922
                /* Get the proper threshold for the colorant count */
923
0
                threshold = pdht->components[j].corder.threshold;
924
0
                if (threshold == NULL)
925
0
                    return_error(gs_error_unregistered);
926
                /* Point to the proper contone data */
927
0
                contone_align = penum->line + contone_stride * j +
928
0
                                offset_contone[j];
929
0
                for (k = 0; k < vdi; k++) {
930
                    /* Get a pointer to our tile row */
931
0
                    dy = (penum->yci + k -
932
0
                          penum->pgs->screen_phase[0].y) % thresh_height;
933
0
                    if (dy < 0)
934
0
                        dy += thresh_height;
935
0
                    thresh_tile = threshold + thresh_width * dy;
936
                    /* Fill the buffer, can be multiple rows.  Make sure
937
                       to update with stride */
938
0
                    position = contone_stride * k;
939
                    /* Tile into the 128 bit aligned threshold strip */
940
0
                    fill_threshold_buffer(&(thresh_align[position]), threshold,
941
0
                                           thresh_tile, thresh_width, dx, left_width,
942
0
                                           num_full_tiles, right_tile_width);
943
0
                }
944
                /* Apply the threshold operation */
945
0
                if (offset_bits > dest_width)
946
0
                    offset_bits = dest_width;
947
948
0
                if (threshold_inverted ||
949
0
                    (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE && is_planar_dev)) {
950
0
                    gx_ht_threshold_row_bit_sub(contone_align, thresh_align, contone_stride,
951
0
                                      halftone, dithered_stride, dest_width, vdi,
952
0
                                      offset_bits);
953
0
                } else {
954
0
                    gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
955
0
                          halftone, dithered_stride, dest_width, vdi,
956
0
                          offset_bits);
957
0
                }
958
0
            }
959
            /* FIXME: An improvement here would be to generate the initial
960
             * offset_bits at the correct offset within the byte so that they
961
             * align with the remainder of the line. This would mean not
962
             * always packing them into the first offset_bits (in MSB order)
963
             * of our 16 bit word, but rather into the last offset_bits
964
             * (in MSB order) (except when the entire run is small!).
965
             *
966
             * This would enable us to do just one aligned copy_mono call for
967
             * the entire scanline. */
968
            /* Now do the copy mono or copy plane operation */
969
            /* First the left remainder bits */
970
0
            if (offset_bits > 0) {
971
0
                int x_pos = fixed2int_var_rounded(xrun);
972
0
                if (!is_planar_dev) {
973
0
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, dithered_stride,
974
0
                                                 gx_no_bitmap_id, x_pos, y_pos,
975
0
                                                 offset_bits, vdi, dev_white,
976
0
                                                 dev_black);
977
0
                } else {
978
0
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, dithered_stride,
979
0
                                                 gx_no_bitmap_id, x_pos, y_pos,
980
0
                                                 offset_bits, vdi, vdi);
981
0
                }
982
0
            }
983
0
            if ((dest_width - offset_bits) > 0 ) {
984
                /* Now the primary aligned bytes */
985
0
                int curr_width = dest_width - offset_bits;
986
0
                int x_pos = fixed2int_var_rounded(xrun) + offset_bits;
987
                /* FIXME: This assumes the allowed offset_bits will always be <= 16 */
988
0
                int xoffs = offset_bits > 0 ? 16 : 0;
989
990
0
                if (!is_planar_dev) {
991
0
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, xoffs, dithered_stride,
992
0
                                                 gx_no_bitmap_id, x_pos, y_pos,
993
0
                                                 curr_width, vdi, dev_white,
994
0
                                                 dev_black);
995
0
                } else {
996
0
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, xoffs, dithered_stride,
997
0
                                                 gx_no_bitmap_id, x_pos, y_pos,
998
0
                                                 curr_width, vdi, vdi);
999
0
                }
1000
0
            }
1001
1002
0
            break;
1003
0
        case image_landscape:
1004
            /* Go ahead and paint the chunk if we have LAND_BITS values or a
1005
             * partial to get us in sync with the 1 bit devices 16 bit
1006
             * positions. */
1007
0
            vdi = penum->wci;
1008
                /* Now do the haftoning into our buffer.  We basically check
1009
                   first if we have enough data or are all done */
1010
0
            while ( (penum->ht_landscape.count >= LAND_BITS ||
1011
0
                   ((penum->ht_landscape.count >= offset_bits) &&
1012
0
                    penum->ht_landscape.offset_set))) {
1013
                /* Go ahead and 2D tile in the threshold buffer at this time */
1014
                /* Always work the tiling from the upper left corner of our
1015
                   LAND_BITS columns */
1016
0
                for (j = 0; j < spp_out; j++) {
1017
0
                    halftone = penum->ht_buffer +
1018
0
                                   j * penum->ht_plane_height * (LAND_BITS>>3);
1019
0
                    thresh_width = pdht->components[j].corder.width;
1020
0
                    thresh_height =
1021
0
                          pdht->components[j].corder.full_height;
1022
                    /* Get the proper threshold for the colorant count */
1023
0
                    threshold = pdht->components[j].corder.threshold;
1024
0
                    if (threshold == NULL)
1025
0
                        return_error(gs_error_unregistered);
1026
                    /* Point to the proper contone data */
1027
0
                    contone_align = penum->line + offset_contone[j] +
1028
0
                                      LAND_BITS * j * contone_stride;
1029
0
                    if (penum->ht_landscape.offset_set) {
1030
0
                        width = offset_bits;
1031
0
                    } else {
1032
0
                        width = LAND_BITS;
1033
0
                    }
1034
0
                    if (penum->y_extent.x < 0) {
1035
0
                        dx = penum->ht_landscape.xstart - width + 1;
1036
0
                    } else {
1037
0
                        dx = penum->ht_landscape.xstart;
1038
0
                    }
1039
0
                    dx = (dx + penum->pgs->screen_phase[0].x) % thresh_width;
1040
0
                    if (dx < 0)
1041
0
                        dx += thresh_width;
1042
0
                    dy = (penum->ht_landscape.y_pos -
1043
0
                              penum->pgs->screen_phase[0].y) % thresh_height;
1044
0
                    if (dy < 0)
1045
0
                        dy += thresh_height;
1046
                    /* Left remainder part */
1047
0
                    left_rem_end = min(dx + LAND_BITS, thresh_width);
1048
0
                    left_width = left_rem_end - dx;
1049
                    /* Now the middle part */
1050
0
                    num_full_tiles = (LAND_BITS - left_width) / thresh_width;
1051
                    /* Now the right part */
1052
0
                    right_tile_width =
1053
0
                        LAND_BITS - num_full_tiles * thresh_width - left_width;
1054
                    /* Now loop over the y stuff */
1055
0
                    ptr_out = thresh_align;
1056
                    /* Do this in three parts.  We do a top part, followed by
1057
                       larger mem copies followed by a bottom partial. After
1058
                       a slower initial fill we are able to do larger faster
1059
                       expansions */
1060
0
                    if (dest_height <= 2 * thresh_height) {
1061
0
                        init_tile = dest_height;
1062
0
                        replicate_tile = false;
1063
0
                    } else {
1064
0
                        init_tile = thresh_height;
1065
0
                        replicate_tile = true;
1066
0
                    }
1067
0
                    for (jj = 0; jj < init_tile; jj++) {
1068
0
                        in_row_offset = (jj + dy) % thresh_height;
1069
0
                        row_ptr = threshold + in_row_offset * thresh_width;
1070
0
                        ptr_out_temp = ptr_out;
1071
                        /* Left part */
1072
0
                        memcpy(ptr_out_temp, row_ptr + dx, left_width);
1073
0
                        ptr_out_temp += left_width;
1074
                        /* Now the full tiles */
1075
0
                        for (ii = 0; ii < num_full_tiles; ii++) {
1076
0
                            memcpy(ptr_out_temp, row_ptr, thresh_width);
1077
0
                            ptr_out_temp += thresh_width;
1078
0
                        }
1079
                        /* Now the remainder */
1080
0
                        memcpy(ptr_out_temp, row_ptr, right_tile_width);
1081
0
                        ptr_out += LAND_BITS;
1082
0
                    }
1083
0
                    if (replicate_tile) {
1084
                        /* Find out how many we need to copy */
1085
0
                        num_tiles =
1086
0
                            (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height);
1087
0
                        tile_remainder = dest_height - (num_tiles + 1) * thresh_height;
1088
0
                        for (jj = 0; jj < num_tiles; jj ++) {
1089
0
                            memcpy(ptr_out, thresh_align, LAND_BITS * thresh_height);
1090
0
                            ptr_out += LAND_BITS * thresh_height;
1091
0
                        }
1092
                        /* Now fill in the remainder */
1093
0
                        memcpy(ptr_out, thresh_align, LAND_BITS * tile_remainder);
1094
0
                    }
1095
                    /* Apply the threshold operation */
1096
0
                    if (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE
1097
0
                        && is_planar_dev) {
1098
0
                        gx_ht_threshold_landscape_sub(contone_align, thresh_align,
1099
0
                                            &(penum->ht_landscape), halftone, dest_height);
1100
0
                    } else {
1101
0
                        gx_ht_threshold_landscape(contone_align, thresh_align,
1102
0
                                            &(penum->ht_landscape), halftone, dest_height);
1103
0
                    }
1104
                    /* We may have a line left over that has to be maintained
1105
                       due to line replication in the resolution conversion. */
1106
0
                    if (width != penum->ht_landscape.count) {
1107
                        /* move the line do not reset the stuff */
1108
0
                        move_landscape_buffer(&(penum->ht_landscape),
1109
0
                                              contone_align, dest_height);
1110
0
                    }
1111
0
                }
1112
                /* Perform the copy mono */
1113
0
                if (penum->ht_landscape.index < 0) {
1114
0
                    if (!is_planar_dev) {
1115
0
                        (*dev_proc(dev, copy_mono))
1116
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1117
0
                                        gx_no_bitmap_id,
1118
0
                                        penum->ht_landscape.xstart - width + 1,
1119
0
                                        penum->ht_landscape.y_pos,
1120
0
                                        width, dest_height,
1121
0
                                        dev_white, dev_black);
1122
0
                    } else {
1123
0
                        (*dev_proc(dev, copy_planes))
1124
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1125
0
                                        gx_no_bitmap_id,
1126
0
                                        penum->ht_landscape.xstart - width + 1,
1127
0
                                        penum->ht_landscape.y_pos,
1128
0
                                        width, dest_height,
1129
0
                                        penum->ht_plane_height);
1130
0
                    }
1131
0
                } else {
1132
0
                    if (!is_planar_dev) {
1133
0
                        (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1134
0
                                                     gx_no_bitmap_id,
1135
0
                                                     penum->ht_landscape.xstart,
1136
0
                                                     penum->ht_landscape.y_pos,
1137
0
                                                     width, dest_height,
1138
0
                                                     dev_white, dev_black);
1139
0
                    } else {
1140
0
                        (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1141
0
                                                     gx_no_bitmap_id,
1142
0
                                                     penum->ht_landscape.xstart,
1143
0
                                                     penum->ht_landscape.y_pos,
1144
0
                                                     width, dest_height,
1145
0
                                                     penum->ht_plane_height);
1146
0
                    }
1147
0
                }
1148
0
                penum->ht_landscape.offset_set = false;
1149
0
                if (width != penum->ht_landscape.count) {
1150
0
                    reset_landscape_buffer(&(penum->ht_landscape),
1151
0
                                           contone_align, dest_height,
1152
0
                                           width);
1153
0
                } else {
1154
                    /* Reset the whole buffer */
1155
0
                    penum->ht_landscape.count = 0;
1156
0
                    if (penum->ht_landscape.index < 0) {
1157
                        /* Going right to left */
1158
0
                        penum->ht_landscape.curr_pos = LAND_BITS-1;
1159
0
                    } else {
1160
                        /* Going left to right */
1161
0
                        penum->ht_landscape.curr_pos = 0;
1162
0
                    }
1163
0
                    penum->ht_landscape.num_contones = 0;
1164
0
                    memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
1165
0
                }
1166
0
            }
1167
0
            break;
1168
0
        default:
1169
0
            return gs_rethrow(-1, "Invalid orientation for thresholding");
1170
0
    }
1171
0
    return 0;
1172
0
}
1173
1174
int gxht_dda_length(gx_dda_fixed *dda, int src_size)
1175
0
{
1176
0
    gx_dda_fixed d = (*dda);
1177
0
    dda_advance(d, src_size);
1178
0
    return abs(fixed2int_var_rounded(dda_current(d)) - fixed2int_var_rounded(dda_current(*dda)));
1179
0
}