Coverage Report

Created: 2025-08-28 07:06

/src/ghostpdl/base/gxht_thresh.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2025 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/*$Id: gxhts_thresh.c  $ */
18
/* Halftone thresholding code */
19
20
#include <stdlib.h> /* abs() */
21
#include "memory_.h"
22
#include "gx.h"
23
#include "gxgstate.h"
24
#include "gsiparam.h"
25
#include "math_.h"
26
#include "gxfixed.h"  /* needed for gximage.h */
27
#include "gximage.h"
28
#include "gxdevice.h"
29
#include "gxdht.h"
30
#include "gxht_thresh.h"
31
#include "gzht.h"
32
#include "gxdevsop.h"
33
34
/* Enable the following define to perform a little extra work to stop
35
 * spurious valgrind errors. The code should perform perfectly even without
36
 * this enabled, but enabling it makes debugging much easier.
37
 */
38
/* #define PACIFY_VALGRIND */
39
40
#ifndef __WIN32__
41
1
#define __align16  __attribute__((aligned(16)))
42
#else
43
#define __align16 __declspec(align(16))
44
#endif
45
2.03M
#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
46
47
#ifdef HAVE_SSE2
48
49
#include <emmintrin.h>
50
51
static const byte bitreverse[] =
52
{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
53
  0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
54
  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
55
  0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
56
  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
57
  0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
58
  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
59
  0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
60
  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
61
  0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
62
  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
63
  0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
64
  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
65
  0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
66
  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
67
  0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
68
  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
69
  0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
70
  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
71
  0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
72
  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
73
  0x3F, 0xBF, 0x7F, 0xFF};
74
#endif
75
76
#if RAW_HT_DUMP
77
/* This is slow thresholding, byte output for debug only */
78
void
79
gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
80
                              byte *halftone, int dithered_stride, int width,
81
                              int num_rows)
82
{
83
    int k, j;
84
    byte *contone_ptr;
85
    byte *thresh_ptr;
86
    byte *halftone_ptr;
87
88
    /* For the moment just do a very slow compare until we get
89
       get this working */
90
    for (j = 0; j < num_rows; j++) {
91
        contone_ptr = contone;
92
        thresh_ptr = threshold_strip + contone_stride * j;
93
        halftone_ptr = halftone + dithered_stride * j;
94
        for (k = 0; k < width; k++) {
95
            if (contone_ptr[k] < thresh_ptr[k]) {
96
                halftone_ptr[k] = 0;
97
            } else {
98
                halftone_ptr[k] = 255;
99
            }
100
        }
101
    }
102
}
103
#endif
104
105
#ifndef HAVE_SSE2
106
/* A simple case for use in the landscape mode. Could probably be coded up
107
   faster */
108
static void
109
threshold_16_bit(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
110
{
111
    int j;
112
113
    for (j = 2; j > 0; j--) {
114
        byte h = 0;
115
        byte bit_init = 0x80;
116
        do {
117
            if (*contone_ptr++ < *thresh_ptr++) {
118
                h |=  bit_init;
119
            }
120
            bit_init >>= 1;
121
        } while (bit_init != 0);
122
        *ht_data++ = h;
123
    }
124
}
125
#else
126
/* Note this function has strict data alignment needs */
127
static void
128
threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
129
287M
{
130
287M
    __m128i input1;
131
287M
    __m128i input2;
132
287M
    register int result_int;
133
287M
    const unsigned int mask1 = 0x80808080;
134
287M
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
135
136
    /* Load */
137
287M
    input1 = _mm_load_si128((const __m128i *)contone_ptr);
138
287M
    input2 = _mm_load_si128((const __m128i *) thresh_ptr);
139
    /* Unsigned subtraction does Unsigned saturation so we
140
       have to use the signed operation */
141
287M
    input1 = _mm_xor_si128(input1, sign_fix);
142
287M
    input2 = _mm_xor_si128(input2, sign_fix);
143
    /* Subtract the two */
144
287M
    input2 = _mm_subs_epi8(input1, input2);
145
    /* Grab the sign mask */
146
287M
    result_int = _mm_movemask_epi8(input2);
147
    /* bit wise reversal on 16 bit word */
148
287M
    ht_data[0] = bitreverse[(result_int & 0xff)];
149
287M
    ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
150
287M
}
151
152
/* Not so fussy on its alignment */
153
static void
154
threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
155
1.78M
{
156
1.78M
    __m128i input1;
157
1.78M
    __m128i input2;
158
1.78M
    int result_int;
159
1.78M
    byte *sse_data;
160
1.78M
    const unsigned int mask1 = 0x80808080;
161
1.78M
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
162
163
1.78M
    sse_data = (byte*) &(result_int);
164
    /* Load */
165
1.78M
    input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
166
1.78M
    input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
167
    /* Unsigned subtraction does Unsigned saturation so we
168
       have to use the signed operation */
169
1.78M
    input1 = _mm_xor_si128(input1, sign_fix);
170
1.78M
    input2 = _mm_xor_si128(input2, sign_fix);
171
    /* Subtract the two */
172
1.78M
    input2 = _mm_subs_epi8(input1, input2);
173
    /* Grab the sign mask */
174
1.78M
    result_int = _mm_movemask_epi8(input2);
175
    /* bit wise reversal on 16 bit word */
176
1.78M
    ht_data[0] = bitreverse[sse_data[0]];
177
1.78M
    ht_data[1] = bitreverse[sse_data[1]];
178
1.78M
}
179
#endif
180
181
/* SSE2 and non-SSE2 implememntation of thresholding a row. Subtractive case
182
   There is some code replication between the two of these (additive and subtractive)
183
   that I need to go back and determine how we can combine them without
184
   any performance loss. */
185
void
186
gx_ht_threshold_row_bit_sub(byte *contone,  byte *threshold_strip,  int contone_stride,
187
                  byte *halftone, int dithered_stride, int width,
188
                  int num_rows, int offset_bits)
189
107k
{
190
#ifndef HAVE_SSE2
191
    int k, j;
192
    byte *contone_ptr;
193
    byte *thresh_ptr;
194
    byte *halftone_ptr;
195
    byte bit_init;
196
197
    /* For the moment just do a very slow compare until we get
198
       get this working.  This could use some serious optimization */
199
    width -= offset_bits;
200
    for (j = 0; j < num_rows; j++) {
201
        byte h;
202
        contone_ptr = contone;
203
        thresh_ptr = threshold_strip + contone_stride * j;
204
        halftone_ptr = halftone + dithered_stride * j;
205
        /* First get the left remainder portion.  Put into MSBs of first byte */
206
        bit_init = 0x80;
207
        h = 0;
208
        k = offset_bits;
209
        if (k > 0) {
210
            do {
211
                if (*contone_ptr++ > *thresh_ptr++) {
212
                    h |=  bit_init;
213
                }
214
                bit_init >>= 1;
215
                if (bit_init == 0) {
216
                    bit_init = 0x80;
217
                    *halftone_ptr++ = h;
218
                    h = 0;
219
                }
220
                k--;
221
            } while (k > 0);
222
            bit_init = 0x80;
223
            *halftone_ptr++ = h;
224
            h = 0;
225
            if (offset_bits < 8)
226
                *halftone_ptr++ = 0;
227
        }
228
        /* Now get the rest, which will be 16 bit aligned. */
229
        k = width;
230
        if (k > 0) {
231
            do {
232
                if (*contone_ptr++ > *thresh_ptr++) {
233
                    h |=  bit_init;
234
                }
235
                bit_init >>= 1;
236
                if (bit_init == 0) {
237
                    bit_init = 0x80;
238
                    *halftone_ptr++ = h;
239
                    h = 0;
240
                }
241
                k--;
242
            } while (k > 0);
243
            if (bit_init != 0x80) {
244
                *halftone_ptr++ = h;
245
            }
246
            if ((width & 15) < 8)
247
                *halftone_ptr++ = 0;
248
        }
249
    }
250
#else
251
107k
    byte *contone_ptr;
252
107k
    byte *thresh_ptr;
253
107k
    byte *halftone_ptr;
254
107k
    int num_tiles = (width - offset_bits + 15)>>4;
255
107k
    int k, j;
256
257
454k
    for (j = 0; j < num_rows; j++) {
258
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
259
           two steps to ensure that we pack the bits in an aligned fashion
260
           into halftone_ptr.  */
261
347k
        contone_ptr = contone;
262
347k
        thresh_ptr = threshold_strip + contone_stride * j;
263
347k
        halftone_ptr = halftone + dithered_stride * j;
264
347k
        if (offset_bits > 0) {
265
            /* Since we allowed for 16 bits in our left remainder
266
               we can go directly in to the destination.  threshold_16_SSE
267
               requires 128 bit alignment.  contone_ptr and thresh_ptr
268
               are set up so that after we move in by offset_bits elements
269
               then we are 128 bit aligned.  */
270
97.4k
            threshold_16_SSE_unaligned(thresh_ptr, contone_ptr,
271
97.4k
                                       halftone_ptr);
272
97.4k
            halftone_ptr += 2;
273
97.4k
            thresh_ptr += offset_bits;
274
97.4k
            contone_ptr += offset_bits;
275
97.4k
        }
276
        /* Now we should have 128 bit aligned with our input data. Iterate
277
           over sets of 16 going directly into our HT buffer.  Sources and
278
           halftone_ptr buffers should be padded to allow 15 bit overrun */
279
24.6M
        for (k = 0; k < num_tiles; k++) {
280
24.3M
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
281
24.3M
            thresh_ptr += 16;
282
24.3M
            contone_ptr += 16;
283
24.3M
            halftone_ptr += 2;
284
24.3M
        }
285
347k
    }
286
107k
#endif
287
107k
}
288
289
/* SSE2 and non-SSE2 implememntation of thresholding a row. additive case  */
290
void
291
gx_ht_threshold_row_bit(byte *contone,  byte *threshold_strip,  int contone_stride,
292
                  byte *halftone, int dithered_stride, int width,
293
                  int num_rows, int offset_bits)
294
1.92M
{
295
#ifndef HAVE_SSE2
296
    int k, j;
297
    byte *contone_ptr;
298
    byte *thresh_ptr;
299
    byte *halftone_ptr;
300
    byte bit_init;
301
302
    /* For the moment just do a very slow compare until we get
303
       get this working.  This could use some serious optimization */
304
    width -= offset_bits;
305
    for (j = 0; j < num_rows; j++) {
306
        byte h;
307
        contone_ptr = contone;
308
        thresh_ptr = threshold_strip + contone_stride * j;
309
        halftone_ptr = halftone + dithered_stride * j;
310
        /* First get the left remainder portion.  Put into MSBs of first byte */
311
        bit_init = 0x80;
312
        h = 0;
313
        k = offset_bits;
314
        if (k > 0) {
315
            do {
316
                if (*contone_ptr++ < *thresh_ptr++) {
317
                    h |=  bit_init;
318
                }
319
                bit_init >>= 1;
320
                if (bit_init == 0) {
321
                    bit_init = 0x80;
322
                    *halftone_ptr++ = h;
323
                    h = 0;
324
                }
325
                k--;
326
            } while (k > 0);
327
            bit_init = 0x80;
328
            *halftone_ptr++ = h;
329
            h = 0;
330
            if (offset_bits < 8)
331
                *halftone_ptr++ = 0;
332
        }
333
        /* Now get the rest, which will be 16 bit aligned. */
334
        k = width;
335
        if (k > 0) {
336
            do {
337
                if (*contone_ptr++ < *thresh_ptr++) {
338
                    h |=  bit_init;
339
                }
340
                bit_init >>= 1;
341
                if (bit_init == 0) {
342
                    bit_init = 0x80;
343
                    *halftone_ptr++ = h;
344
                    h = 0;
345
                }
346
                k--;
347
            } while (k > 0);
348
            if (bit_init != 0x80) {
349
                *halftone_ptr++ = h;
350
            }
351
            if ((width & 15) < 8)
352
                *halftone_ptr++ = 0;
353
        }
354
    }
355
#else
356
1.92M
    byte *contone_ptr;
357
1.92M
    byte *thresh_ptr;
358
1.92M
    byte *halftone_ptr;
359
1.92M
    int num_tiles = (width - offset_bits + 15)>>4;
360
1.92M
    int k, j;
361
362
4.15M
    for (j = 0; j < num_rows; j++) {
363
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
364
           two steps to ensure that we pack the bits in an aligned fashion
365
           into halftone_ptr.  */
366
2.22M
        contone_ptr = contone;
367
2.22M
        thresh_ptr = threshold_strip + contone_stride * j;
368
2.22M
        halftone_ptr = halftone + dithered_stride * j;
369
2.22M
        if (offset_bits > 0) {
370
            /* Since we allowed for 16 bits in our left remainder
371
               we can go directly in to the destination.  threshold_16_SSE
372
               requires 128 bit alignment.  contone_ptr and thresh_ptr
373
               are set up so that after we move in by offset_bits elements
374
               then we are 128 bit aligned.  */
375
1.68M
            threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
376
1.68M
                                       halftone_ptr);
377
1.68M
            halftone_ptr += 2;
378
1.68M
            thresh_ptr += offset_bits;
379
1.68M
            contone_ptr += offset_bits;
380
1.68M
        }
381
        /* Now we should have 128 bit aligned with our input data. Iterate
382
           over sets of 16 going directly into our HT buffer.  Sources and
383
           halftone_ptr buffers should be padded to allow 15 bit overrun */
384
265M
        for (k = 0; k < num_tiles; k++) {
385
263M
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
386
263M
            thresh_ptr += 16;
387
263M
            contone_ptr += 16;
388
263M
            halftone_ptr += 2;
389
263M
        }
390
2.22M
    }
391
1.92M
#endif
392
1.92M
}
393
394
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
395
   Subtractive case */
396
void
397
gx_ht_threshold_landscape_sub(byte *contone_align, byte *thresh_align,
398
                    ht_landscape_info_t *ht_landscape, byte *halftone,
399
                    int data_length)
400
0
{
401
0
    __align16 byte contone[LAND_BITS];
402
0
    int position_start, position, curr_position;
403
0
    int *widths = &(ht_landscape->widths[0]);
404
0
    int local_widths[LAND_BITS];
405
0
    int num_contone = ht_landscape->num_contones;
406
0
    int k, j, w, contone_out_posit;
407
0
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
408
0
#ifdef PACIFY_VALGRIND
409
0
    int extra = 0;
410
0
#endif
411
412
    /* Work through chunks of 16.  */
413
    /* Data may have come in left to right or right to left. */
414
0
    if (ht_landscape->index > 0) {
415
0
        position = position_start = 0;
416
0
    } else {
417
0
        position = position_start = ht_landscape->curr_pos + 1;
418
0
    }
419
0
    thresh_ptr = thresh_align;
420
0
    halftone_ptr = halftone;
421
    /* Copy the widths to a local array, and truncate the last one (which may
422
     * be the first one!) if required. */
423
0
    k = 0;
424
0
    for (j = 0; j < num_contone; j++)
425
0
        k += (local_widths[j] = widths[position_start+j]);
426
0
    if (k > LAND_BITS) {
427
0
        if (ht_landscape->index > 0) {
428
0
            local_widths[num_contone-1] -= k-LAND_BITS;
429
0
        } else {
430
0
            local_widths[0] -= k-LAND_BITS;
431
0
        }
432
0
    }
433
0
#ifdef PACIFY_VALGRIND
434
0
    if (k < LAND_BITS) {
435
0
        extra = LAND_BITS - k;
436
0
    }
437
0
#endif
438
0
    for (k = data_length; k > 0; k--) { /* Loop on rows */
439
0
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
440
0
        curr_position = 0; /* We use this in keeping track of widths */
441
0
        contone_out_posit = 0; /* Our index out */
442
0
        for (j = num_contone; j > 0; j--) {
443
0
            byte c = *contone_ptr;
444
            /* The microsoft compiler, cleverly spots that the following loop
445
             * can be replaced by a memset. Unfortunately, it can't spot that
446
             * the typical length values of the memset are so small that we'd
447
             * be better off doing it the slow way. We therefore introduce a
448
             * sneaky 'volatile' cast below that stops this optimisation. */
449
0
            w = local_widths[curr_position];
450
0
            do {
451
0
                ((volatile byte *)contone)[contone_out_posit] = c;
452
0
                contone_out_posit++;
453
0
            } while (--w);
454
0
#ifdef PACIFY_VALGRIND
455
0
            if (extra)
456
0
                memset(contone+contone_out_posit, 0, extra);
457
0
#endif
458
0
            curr_position++; /* Move us to the next position in our width array */
459
0
            contone_ptr++;   /* Move us to a new location in our contone buffer */
460
0
        }
461
        /* Now we have our left justified and expanded contone data for
462
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
463
0
        contone_ptr = &contone[0];
464
0
#if LAND_BITS > 16
465
0
        j = LAND_BITS;
466
0
        do {
467
0
#endif
468
0
#ifdef HAVE_SSE2
469
0
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
470
#else
471
            threshold_16_bit(thresh_ptr, contone_ptr, halftone_ptr);
472
#endif
473
0
            thresh_ptr += 16;
474
0
            position += 16;
475
0
            halftone_ptr += 2;
476
0
            contone_ptr += 16;
477
0
#if LAND_BITS > 16
478
0
            j -= 16;
479
0
        } while (j > 0);
480
0
#endif
481
0
    }
482
0
}
483
484
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
485
   Additive case.  Note I could likely do some code reduction between
486
   the additive and subtractive cases */
487
void
488
gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
489
                    ht_landscape_info_t *ht_landscape, byte *halftone,
490
                    int data_length)
491
1
{
492
1
    __align16 byte contone[LAND_BITS];
493
1
    int position_start, position, curr_position;
494
1
    int *widths = &(ht_landscape->widths[0]);
495
1
    int local_widths[LAND_BITS];
496
1
    int num_contone = ht_landscape->num_contones;
497
1
    int k, j, w, contone_out_posit;
498
1
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
499
1
#ifdef PACIFY_VALGRIND
500
1
    int extra = 0;
501
1
#endif
502
503
    /* Work through chunks of 16.  */
504
    /* Data may have come in left to right or right to left. */
505
1
    if (ht_landscape->index > 0) {
506
1
        position = position_start = 0;
507
1
    } else {
508
0
        position = position_start = ht_landscape->curr_pos + 1;
509
0
    }
510
1
    thresh_ptr = thresh_align;
511
1
    halftone_ptr = halftone;
512
    /* Copy the widths to a local array, and truncate the last one (which may
513
     * be the first one!) if required. */
514
1
    k = 0;
515
2
    for (j = 0; j < num_contone; j++)
516
1
        k += (local_widths[j] = widths[position_start+j]);
517
1
    if (k > LAND_BITS) {
518
0
        if (ht_landscape->index > 0) {
519
0
            local_widths[num_contone-1] -= k-LAND_BITS;
520
0
        } else {
521
0
            local_widths[0] -= k-LAND_BITS;
522
0
        }
523
0
    }
524
1
#ifdef PACIFY_VALGRIND
525
1
    if (k < LAND_BITS) {
526
1
        extra = LAND_BITS - k;
527
1
    }
528
1
#endif
529
4
    for (k = data_length; k > 0; k--) { /* Loop on rows */
530
3
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
531
3
        curr_position = 0; /* We use this in keeping track of widths */
532
3
        contone_out_posit = 0; /* Our index out */
533
6
        for (j = num_contone; j > 0; j--) {
534
3
            byte c = *contone_ptr;
535
            /* The microsoft compiler, cleverly spots that the following loop
536
             * can be replaced by a memset. Unfortunately, it can't spot that
537
             * the typical length values of the memset are so small that we'd
538
             * be better off doing it the slow way. We therefore introduce a
539
             * sneaky 'volatile' cast below that stops this optimisation. */
540
3
            w = local_widths[curr_position];
541
9
            do {
542
9
                ((volatile byte *)contone)[contone_out_posit] = c;
543
9
                contone_out_posit++;
544
9
            } while (--w);
545
3
#ifdef PACIFY_VALGRIND
546
3
            if (extra)
547
3
                memset(contone+contone_out_posit, 0, extra);
548
3
#endif
549
3
            curr_position++; /* Move us to the next position in our width array */
550
3
            contone_ptr++;   /* Move us to a new location in our contone buffer */
551
3
        }
552
        /* Now we have our left justified and expanded contone data for
553
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
554
3
        contone_ptr = &contone[0];
555
3
#if LAND_BITS > 16
556
3
        j = LAND_BITS;
557
12
        do {
558
12
#endif
559
12
#ifdef HAVE_SSE2
560
12
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
561
#else
562
            threshold_16_bit(contone_ptr, thresh_ptr, halftone_ptr);
563
#endif
564
12
            thresh_ptr += 16;
565
12
            position += 16;
566
12
            halftone_ptr += 2;
567
12
            contone_ptr += 16;
568
12
#if LAND_BITS > 16
569
12
            j -= 16;
570
12
        } while (j > 0);
571
3
#endif
572
3
    }
573
1
}
574
575
int
576
gxht_thresh_image_init(gx_image_enum *penum)
577
150k
{
578
150k
    int code = 0;
579
150k
    fixed ox;
580
150k
    int temp;
581
150k
    int dev_width, max_height;
582
150k
    int spp_out;
583
150k
    int k;
584
150k
    gx_ht_order *d_order;
585
150k
    gx_dda_fixed dda_ht;
586
587
150k
    if (gx_device_must_halftone(penum->dev)) {
588
150k
        if (penum->pgs != NULL && penum->pgs->dev_ht[HT_OBJTYPE_DEFAULT] != NULL) {
589
150k
            gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
590
591
302k
            for (k = 0; k < pdht->num_comp; k++) {
592
151k
                d_order = &(pdht->components[k].corder);
593
151k
                code = gx_ht_construct_threshold(d_order, penum->dev,
594
151k
                                                 penum->pgs, k);
595
151k
                if (code < 0 ) {
596
0
                    return gs_rethrow(code, "threshold creation failed");
597
0
                }
598
151k
            }
599
150k
        } else {
600
0
            return -1;
601
0
        }
602
150k
    }
603
150k
    spp_out = penum->dev->color_info.num_components;
604
    /* Precompute values needed for rasterizing. */
605
150k
    penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2);
606
    /* If the image is landscaped then we want to maintain a buffer
607
       that is sufficiently large so that we can hold a byte
608
       of halftoned data along the column.  This way we avoid doing
609
       multiple writes into the same position over and over.
610
       The size of the buffer we need depends upon the bitdepth of
611
       the output device, the number of device coloranants and the
612
       number of  colorants in the source space.  Note we will
613
       need to eventually  consider  multi-level halftone case
614
       here too.  For now, to make use of the SSE2 stuff, we would
615
       like to have a multiple of 16 bytes of data to process at a time.
616
       So we will collect the columns of data in a buffer that is LAND_BITS
617
       wide.  We will also keep track of the widths of each column.  When
618
       the total width count reaches LAND_BITS, we will create our
619
       threshold array and apply it.  We may have one column that is
620
       buffered between calls in this case.  Also if a call is made
621
       with h=0 we will flush the buffer as we are at the end of the
622
       data.  */
623
150k
    if (penum->posture == image_landscape) {
624
3
        int col_length = fixed2int_var_rounded(any_abs(penum->x_extent.y));
625
3
        dda_ht = penum->dda.pixel0.y;
626
3
        if (penum->dxx > 0)
627
3
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
628
629
3
        ox = dda_current(penum->dda.pixel0.x);
630
3
        temp = gxht_dda_length(&dda_ht, penum->rect.w);
631
3
        if (col_length < temp)
632
0
            col_length = temp;          /* choose max to make sure line_size is large enough */
633
3
        temp = (col_length + LAND_BITS)/LAND_BITS;      /* round up to allow for offset bits */
634
        /* bitmap_raster() expects the width in bits, hence "* 8" */
635
3
        penum->line_size = bitmap_raster((temp * LAND_BITS) * 8);  /* The stride */
636
        /* Now we need at most LAND_BITS of these */
637
3
        penum->line = gs_alloc_bytes(penum->memory,
638
3
                                     LAND_BITS * (size_t)penum->line_size * spp_out + 16,
639
3
                                     "gxht_thresh");
640
        /* Same with this.  However, we only need one plane here */
641
3
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
642
3
                                           (size_t)penum->line_size * LAND_BITS + 16,
643
3
                                           "gxht_thresh");
644
        /* That maps into (LAND_BITS/8) bytes of Halftone data */
645
3
        penum->ht_buffer =
646
3
                        gs_alloc_bytes(penum->memory,
647
3
                           (size_t)penum->line_size * (LAND_BITS>>3) * spp_out,
648
3
                           "gxht_thresh");
649
3
        penum->ht_plane_height = penum->line_size;
650
3
        penum->ht_stride = penum->line_size;
651
3
        if (penum->line == NULL || penum->thresh_buffer == NULL
652
3
                    || penum->ht_buffer == NULL)
653
0
            return -1;
654
3
        penum->ht_landscape.count = 0;
655
3
        penum->ht_landscape.num_contones = 0;
656
3
        if (penum->y_extent.x < 0) {
657
            /* Going right to left */
658
1
            penum->ht_landscape.curr_pos = LAND_BITS-1;
659
1
            penum->ht_landscape.index = -1;
660
2
        } else {
661
            /* Going left to right */
662
2
            penum->ht_landscape.curr_pos = 0;
663
2
            penum->ht_landscape.index = 1;
664
2
        }
665
3
        if (penum->x_extent.y < 0) {
666
0
            penum->ht_landscape.flipy = true;
667
0
            penum->ht_landscape.y_pos =
668
0
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y);
669
3
        } else {
670
3
            penum->ht_landscape.flipy = false;
671
3
            penum->ht_landscape.y_pos =
672
3
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y));
673
3
        }
674
3
        memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
675
3
        penum->ht_landscape.offset_set = false;
676
3
        penum->ht_offset_bits = 0; /* Will get set in call to render */
677
3
        if (code >= 0) {
678
3
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
679
3
            memset(penum->line, 0, LAND_BITS * penum->line_size * spp_out + 16);
680
3
            memset(penum->ht_buffer, 0, penum->line_size * (LAND_BITS>>3) * spp_out);
681
3
            memset(penum->thresh_buffer, 0, LAND_BITS * penum->line_size + 16);
682
3
#endif
683
3
        }
684
150k
    } else {
685
        /* In the portrait case we allocate a single line buffer
686
           in device width, a threshold buffer of the same size
687
           and possibly wider and the buffer for the halftoned
688
           bits. We have to do a bit of work to enable 16 byte
689
           boundary after an offset to ensure that we can make use
690
           of  the SSE2 operations for thresholding.  We do the
691
           allocations now to avoid doing them with every line */
692
150k
        dda_ht = penum->dda.pixel0.x;
693
150k
        if (penum->dxx > 0)
694
150k
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
695
        /* Initialize the ht_landscape stuff to zero */
696
150k
        memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t));
697
150k
        ox = dda_current(dda_ht);
698
150k
        dev_width = gxht_dda_length(&dda_ht, penum->rect.w);
699
        /* Get the bit position so that we can do a copy_mono for
700
           the left remainder and then 16 bit aligned copies for the
701
           rest.  The right remainder will be OK as it will land in
702
           the MSBit positions. Note the #define chunk bits16 in
703
           gdevm1.c.  Allow also for a 15 sample over run.
704
        */
705
150k
        penum->ht_offset_bits = (-fixed2int_var_rounded(ox)) & (bitmap_raster(1) - 1);
706
150k
        if (penum->ht_offset_bits > 0) {
707
108k
            penum->ht_stride = bitmap_raster((7 + (dev_width + 4)) + (ARCH_SIZEOF_LONG * 8));
708
108k
        } else {
709
42.0k
            penum->ht_stride = bitmap_raster((7 + (dev_width + 2)) + (ARCH_SIZEOF_LONG * 8));
710
42.0k
        }
711
        /* We want to figure out the maximum height that we may
712
           have in taking a single source row and going to device
713
           space */
714
150k
        max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) /
715
150k
                                            (float) penum->Height);
716
150k
        if (max_height <= 0)
717
6
            return -1;    /* shouldn't happen, but check so we don't div by zero */
718
150k
        if (penum->ht_stride * spp_out > max_int / max_height)
719
0
            return -1;         /* overflow */
720
721
150k
        penum->ht_buffer =
722
150k
                        gs_alloc_bytes(penum->memory,
723
150k
                           (size_t)penum->ht_stride * max_height * spp_out,
724
150k
                           "gxht_thresh");
725
150k
        penum->ht_plane_height = penum->ht_stride * max_height;
726
        /* We want to have 128 bit alignement for our contone and
727
           threshold strips so that we can use SSE operations
728
           in the threshold operation.  Add in a minor buffer and offset
729
           to ensure this.  If gs_alloc_bytes provides at least 16
730
           bit alignment so we may need to move 14 bytes.  However, the
731
           HT process is split in two operations.  One that involves
732
           the HT of a left remainder and the rest which ensures that
733
           we pack in the HT data in the bits with no skew for a fast
734
           copy into the gdevm1 device (16 bit copies).  So, we
735
           need to account for those pixels which occur first and which
736
           are NOT aligned for the contone buffer.  After we offset
737
           by this remainder portion we should be 128 bit aligned.
738
           Also allow a 15 sample over run during the execution.  */
739
150k
        temp = (int) ceil((float) ((dev_width + 15.0) + 15.0)/16.0);
740
150k
        penum->line_size = bitmap_raster(temp * 16 * 8);  /* The stride */
741
150k
        if (penum->line_size > max_int / max_height) {
742
0
            gs_free_object(penum->memory, penum->ht_buffer, "gxht_thresh");
743
0
            penum->ht_buffer = NULL;
744
0
            return -1;         /* thresh_buffer size overflow */
745
0
        }
746
150k
        penum->line = gs_alloc_bytes(penum->memory, (size_t)penum->line_size * spp_out,
747
150k
                                     "gxht_thresh");
748
150k
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
749
150k
                                              (size_t)penum->line_size * max_height,
750
150k
                                              "gxht_thresh");
751
150k
        if (penum->line == NULL || penum->thresh_buffer == NULL ||
752
150k
            penum->ht_buffer == NULL) {
753
0
            return -1;
754
150k
        } else {
755
150k
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
756
150k
            memset(penum->line, 0, penum->line_size * spp_out);
757
150k
            memset(penum->ht_buffer, 0, penum->ht_stride * max_height * spp_out);
758
150k
            memset(penum->thresh_buffer, 0, penum->line_size * max_height);
759
150k
#endif
760
150k
        }
761
150k
    }
762
150k
    return code;
763
150k
}
764
765
static void
766
fill_threshold_buffer(byte *dest_strip, byte *src, byte *src_strip, int src_width,
767
                       int left_offset, int left_width, int num_tiles,
768
                       int right_width)
769
2.56M
{
770
2.56M
    byte *ptr_out_temp = dest_strip;
771
2.56M
    int ii;
772
773
    /* Make sure we don't try and read before the start of the threshold array. This can happen
774
     * if we drop to the beginning of the array, AND we have a negative left_offset. If we do
775
     * have a negative left_offset this represents an area we won't actually be using, but we need
776
     * to move along the threshold array until we get to the point where we copy data we will use.
777
     * So lets simply avoid reading before the start of the data. We can leave the destination
778
     * buffer uninitialised because we won't be reading from that area. Bug #706795 but the ASAN
779
     * error occurs on a number of input files in the test suite.
780
     */
781
2.56M
    if (src_strip + left_offset < src) {
782
2.07k
        int under = src - (src_strip + left_offset);
783
2.07k
        left_offset += under;
784
2.07k
        ptr_out_temp += under;
785
2.07k
        left_width -= under;
786
2.07k
        if (left_width < 0)
787
0
            left_width = 0;
788
2.07k
    }
789
    /* Left part */
790
2.56M
    memcpy(ptr_out_temp, src_strip + left_offset, left_width);
791
2.56M
    ptr_out_temp += left_width;
792
    /* Now the full parts */
793
750M
    for (ii = 0; ii < num_tiles; ii++){
794
748M
        memcpy(ptr_out_temp, src_strip, src_width);
795
748M
        ptr_out_temp += src_width;
796
748M
    }
797
    /* Now the remainder */
798
2.56M
    memcpy(ptr_out_temp, src_strip, right_width);
799
2.56M
#ifdef PACIFY_VALGRIND
800
2.56M
    ptr_out_temp += right_width;
801
2.56M
    ii = (dest_strip-ptr_out_temp) % (LAND_BITS-1);
802
2.56M
    if (ii > 0)
803
0
        memset(ptr_out_temp, 0, ii);
804
2.56M
#endif
805
2.56M
}
806
/* This only moves the data but does not do a reset of the variables.  Used
807
   for case where we have multiple bands of data (e.g. CMYK output) */
808
static void
809
move_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
810
                       int data_length)
811
0
{
812
0
    int k;
813
0
    int position_curr, position_new;
814
815
0
    if (ht_landscape->index < 0) {
816
        /* Moving right to left, move column to far right */
817
0
        position_curr = ht_landscape->curr_pos + 1;
818
0
        position_new = LAND_BITS-1;
819
0
    } else {
820
        /* Moving left to right, move column to far left */
821
0
        position_curr = ht_landscape->curr_pos - 1;
822
0
        position_new = 0;
823
0
    }
824
0
    if (position_curr != position_new) {
825
0
        for (k = 0; k < data_length; k++) {
826
0
                contone_align[position_new] = contone_align[position_curr];
827
0
                position_curr += LAND_BITS;
828
0
                position_new += LAND_BITS;
829
0
        }
830
0
    }
831
0
}
832
833
834
/* If we are in here, we had data left over.  Move it to the proper position
835
   and get ht_landscape_info_t set properly */
836
static void
837
reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
838
                       int data_length, int num_used)
839
0
{
840
0
    int delta;
841
0
    int curr_x_pos = ht_landscape->xstart;
842
843
0
    if (ht_landscape->index < 0) {
844
        /* Moving right to left, move column to far right */
845
0
        delta = ht_landscape->count - num_used;
846
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
847
0
        ht_landscape->widths[LAND_BITS-1] = delta;
848
0
        ht_landscape->curr_pos = LAND_BITS-2;
849
0
        ht_landscape->xstart = curr_x_pos - num_used;
850
0
    } else {
851
        /* Moving left to right, move column to far left */
852
0
        delta = ht_landscape->count - num_used;
853
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
854
0
        ht_landscape->widths[0] = delta;
855
0
        ht_landscape->curr_pos = 1;
856
0
        ht_landscape->xstart = curr_x_pos + num_used;
857
0
    }
858
0
    ht_landscape->count = delta;
859
0
    ht_landscape->num_contones = 1;
860
0
}
861
862
/* This performs a thresholding operation on multiple planes of data and
863
   stores the bits into a planar buffer which can then be used for
864
   copy_planes */
865
int
866
gxht_thresh_planes(gx_image_enum *penum, fixed xrun,
867
                   int dest_width, int dest_height,
868
                   byte *thresh_align, gx_device * dev, int offset_contone[],
869
                   int contone_stride)
870
1.95M
{
871
1.95M
    int thresh_width, thresh_height, dx;
872
1.95M
    int left_rem_end, left_width, vdi;
873
1.95M
    int num_full_tiles, right_tile_width;
874
1.95M
    int k, jj, dy, j;
875
1.95M
    byte *thresh_tile;
876
1.95M
    int position;
877
1.95M
    bool replicate_tile;
878
1.95M
    image_posture posture = penum->posture;
879
1.95M
    const int y_pos = penum->yci;
880
1.95M
    int width = 0; /* Init to silence compiler warnings */
881
1.95M
    byte *ptr_out, *row_ptr, *ptr_out_temp;
882
1.95M
    byte *threshold;
883
1.95M
    int init_tile, in_row_offset, ii, num_tiles, tile_remainder;
884
1.95M
    int offset_bits = penum->ht_offset_bits;
885
1.95M
    byte *halftone;
886
1.95M
    int dithered_stride = penum->ht_stride;
887
1.95M
    bool is_planar_dev = dev->num_planar_planes;
888
1.95M
    gx_color_index dev_white = gx_device_white(dev);
889
1.95M
    gx_color_index dev_black = gx_device_black(dev);
890
1.95M
    int spp_out = dev->color_info.num_components;
891
1.95M
    byte *contone_align = NULL; /* Init to silence compiler warnings */
892
1.95M
    gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
893
894
    /* Go ahead and fill the threshold line buffer with tiled threshold values.
895
       First just grab the row or column that we are going to tile with and
896
       then do memcpy into the buffer */
897
898
    /* Figure out the tile steps.  Left offset, Number of tiles, Right offset. */
899
1.95M
    switch (posture) {
900
1.95M
        case image_portrait:
901
1.95M
            vdi = penum->hci;
902
            /*  Iterate over the vdi and fill up our threshold buffer.  We
903
                 also need to loop across the planes of data */
904
3.99M
            for (j = 0; j < spp_out; j++) {
905
2.03M
                bool threshold_inverted = pdht->components[j].corder.threshold_inverted;
906
907
2.03M
                thresh_width = pdht->components[j].corder.width;
908
2.03M
                thresh_height = pdht->components[j].corder.full_height;
909
2.03M
                halftone = penum->ht_buffer + j * vdi * dithered_stride;
910
                /* Compute the tiling positions with dest_width */
911
2.03M
                dx = (fixed2int_var_rounded(xrun) + penum->pgs->screen_phase[0].x) % thresh_width;
912
                /* Left remainder part */
913
2.03M
                left_rem_end = min(dx + dest_width, thresh_width);
914
                /* The left width of our tile part */
915
2.03M
                left_width = left_rem_end - dx;
916
                /* Now the middle part */
917
2.03M
                num_full_tiles =
918
2.03M
                    (int)fastfloor((dest_width - left_width)/ (float) thresh_width);
919
                /* Now the right part */
920
2.03M
                right_tile_width = dest_width -  num_full_tiles * thresh_width -
921
2.03M
                                   left_width;
922
                /* Get the proper threshold for the colorant count */
923
2.03M
                threshold = pdht->components[j].corder.threshold;
924
2.03M
                if (threshold == NULL)
925
0
                    return_error(gs_error_unregistered);
926
                /* Point to the proper contone data */
927
2.03M
                contone_align = penum->line + contone_stride * j +
928
2.03M
                                offset_contone[j];
929
4.60M
                for (k = 0; k < vdi; k++) {
930
                    /* Get a pointer to our tile row */
931
2.56M
                    dy = (penum->yci + k -
932
2.56M
                          penum->pgs->screen_phase[0].y) % thresh_height;
933
2.56M
                    if (dy < 0)
934
29
                        dy += thresh_height;
935
2.56M
                    thresh_tile = threshold + thresh_width * dy;
936
                    /* Fill the buffer, can be multiple rows.  Make sure
937
                       to update with stride */
938
2.56M
                    position = contone_stride * k;
939
                    /* Tile into the 128 bit aligned threshold strip */
940
2.56M
                    fill_threshold_buffer(&(thresh_align[position]), threshold,
941
2.56M
                                           thresh_tile, thresh_width, dx, left_width,
942
2.56M
                                           num_full_tiles, right_tile_width);
943
2.56M
                }
944
                /* Apply the threshold operation */
945
2.03M
                if (offset_bits > dest_width)
946
10.4k
                    offset_bits = dest_width;
947
948
2.03M
                if (threshold_inverted ||
949
2.03M
                    (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE && is_planar_dev)) {
950
107k
                    gx_ht_threshold_row_bit_sub(contone_align, thresh_align, contone_stride,
951
107k
                                      halftone, dithered_stride, dest_width, vdi,
952
107k
                                      offset_bits);
953
1.92M
                } else {
954
1.92M
                    gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
955
1.92M
                          halftone, dithered_stride, dest_width, vdi,
956
1.92M
                          offset_bits);
957
1.92M
                }
958
2.03M
            }
959
            /* FIXME: An improvement here would be to generate the initial
960
             * offset_bits at the correct offset within the byte so that they
961
             * align with the remainder of the line. This would mean not
962
             * always packing them into the first offset_bits (in MSB order)
963
             * of our 16 bit word, but rather into the last offset_bits
964
             * (in MSB order) (except when the entire run is small!).
965
             *
966
             * This would enable us to do just one aligned copy_mono call for
967
             * the entire scanline. */
968
            /* Now do the copy mono or copy plane operation */
969
            /* First the left remainder bits */
970
1.95M
            if (offset_bits > 0) {
971
1.45M
                int x_pos = fixed2int_var_rounded(xrun);
972
1.45M
                if (!is_planar_dev) {
973
1.43M
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, dithered_stride,
974
1.43M
                                                 gx_no_bitmap_id, x_pos, y_pos,
975
1.43M
                                                 offset_bits, vdi, dev_white,
976
1.43M
                                                 dev_black);
977
1.43M
                } else {
978
12.6k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, dithered_stride,
979
12.6k
                                                 gx_no_bitmap_id, x_pos, y_pos,
980
12.6k
                                                 offset_bits, vdi, vdi);
981
12.6k
                }
982
1.45M
            }
983
1.95M
            if ((dest_width - offset_bits) > 0 ) {
984
                /* Now the primary aligned bytes */
985
1.94M
                int curr_width = dest_width - offset_bits;
986
1.94M
                int x_pos = fixed2int_var_rounded(xrun) + offset_bits;
987
                /* FIXME: This assumes the allowed offset_bits will always be <= 16 */
988
1.94M
                int xoffs = offset_bits > 0 ? 16 : 0;
989
990
1.94M
                if (!is_planar_dev) {
991
1.91M
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, xoffs, dithered_stride,
992
1.91M
                                                 gx_no_bitmap_id, x_pos, y_pos,
993
1.91M
                                                 curr_width, vdi, dev_white,
994
1.91M
                                                 dev_black);
995
1.91M
                } else {
996
26.6k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, xoffs, dithered_stride,
997
26.6k
                                                 gx_no_bitmap_id, x_pos, y_pos,
998
26.6k
                                                 curr_width, vdi, vdi);
999
26.6k
                }
1000
1.94M
            }
1001
1002
1.95M
            break;
1003
2
        case image_landscape:
1004
            /* Go ahead and paint the chunk if we have LAND_BITS values or a
1005
             * partial to get us in sync with the 1 bit devices 16 bit
1006
             * positions. */
1007
2
            vdi = penum->wci;
1008
                /* Now do the haftoning into our buffer.  We basically check
1009
                   first if we have enough data or are all done */
1010
3
            while ( (penum->ht_landscape.count >= LAND_BITS ||
1011
3
                   ((penum->ht_landscape.count >= offset_bits) &&
1012
3
                    penum->ht_landscape.offset_set))) {
1013
                /* Go ahead and 2D tile in the threshold buffer at this time */
1014
                /* Always work the tiling from the upper left corner of our
1015
                   LAND_BITS columns */
1016
2
                for (j = 0; j < spp_out; j++) {
1017
1
                    halftone = penum->ht_buffer +
1018
1
                                   j * penum->ht_plane_height * (LAND_BITS>>3);
1019
1
                    thresh_width = pdht->components[j].corder.width;
1020
1
                    thresh_height =
1021
1
                          pdht->components[j].corder.full_height;
1022
                    /* Get the proper threshold for the colorant count */
1023
1
                    threshold = pdht->components[j].corder.threshold;
1024
1
                    if (threshold == NULL)
1025
0
                        return_error(gs_error_unregistered);
1026
                    /* Point to the proper contone data */
1027
1
                    contone_align = penum->line + offset_contone[j] +
1028
1
                                      LAND_BITS * j * contone_stride;
1029
1
                    if (penum->ht_landscape.offset_set) {
1030
1
                        width = offset_bits;
1031
1
                    } else {
1032
0
                        width = LAND_BITS;
1033
0
                    }
1034
1
                    if (penum->y_extent.x < 0) {
1035
0
                        dx = penum->ht_landscape.xstart - width + 1;
1036
1
                    } else {
1037
1
                        dx = penum->ht_landscape.xstart;
1038
1
                    }
1039
1
                    dx = (dx + penum->pgs->screen_phase[0].x) % thresh_width;
1040
1
                    if (dx < 0)
1041
0
                        dx += thresh_width;
1042
1
                    dy = (penum->ht_landscape.y_pos -
1043
1
                              penum->pgs->screen_phase[0].y) % thresh_height;
1044
1
                    if (dy < 0)
1045
0
                        dy += thresh_height;
1046
                    /* Left remainder part */
1047
1
                    left_rem_end = min(dx + LAND_BITS, thresh_width);
1048
1
                    left_width = left_rem_end - dx;
1049
                    /* Now the middle part */
1050
1
                    num_full_tiles = (LAND_BITS - left_width) / thresh_width;
1051
                    /* Now the right part */
1052
1
                    right_tile_width =
1053
1
                        LAND_BITS - num_full_tiles * thresh_width - left_width;
1054
                    /* Now loop over the y stuff */
1055
1
                    ptr_out = thresh_align;
1056
                    /* Do this in three parts.  We do a top part, followed by
1057
                       larger mem copies followed by a bottom partial. After
1058
                       a slower initial fill we are able to do larger faster
1059
                       expansions */
1060
1
                    if (dest_height <= 2 * thresh_height) {
1061
1
                        init_tile = dest_height;
1062
1
                        replicate_tile = false;
1063
1
                    } else {
1064
0
                        init_tile = thresh_height;
1065
0
                        replicate_tile = true;
1066
0
                    }
1067
4
                    for (jj = 0; jj < init_tile; jj++) {
1068
3
                        in_row_offset = (jj + dy) % thresh_height;
1069
3
                        row_ptr = threshold + in_row_offset * thresh_width;
1070
3
                        ptr_out_temp = ptr_out;
1071
                        /* Left part */
1072
3
                        memcpy(ptr_out_temp, row_ptr + dx, left_width);
1073
3
                        ptr_out_temp += left_width;
1074
                        /* Now the full tiles */
1075
30
                        for (ii = 0; ii < num_full_tiles; ii++) {
1076
27
                            memcpy(ptr_out_temp, row_ptr, thresh_width);
1077
27
                            ptr_out_temp += thresh_width;
1078
27
                        }
1079
                        /* Now the remainder */
1080
3
                        memcpy(ptr_out_temp, row_ptr, right_tile_width);
1081
3
                        ptr_out += LAND_BITS;
1082
3
                    }
1083
1
                    if (replicate_tile) {
1084
                        /* Find out how many we need to copy */
1085
0
                        num_tiles =
1086
0
                            (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height);
1087
0
                        tile_remainder = dest_height - (num_tiles + 1) * thresh_height;
1088
0
                        for (jj = 0; jj < num_tiles; jj ++) {
1089
0
                            memcpy(ptr_out, thresh_align, LAND_BITS * thresh_height);
1090
0
                            ptr_out += LAND_BITS * thresh_height;
1091
0
                        }
1092
                        /* Now fill in the remainder */
1093
0
                        memcpy(ptr_out, thresh_align, LAND_BITS * tile_remainder);
1094
0
                    }
1095
                    /* Apply the threshold operation */
1096
1
                    if (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE
1097
1
                        && is_planar_dev) {
1098
0
                        gx_ht_threshold_landscape_sub(contone_align, thresh_align,
1099
0
                                            &(penum->ht_landscape), halftone, dest_height);
1100
1
                    } else {
1101
1
                        gx_ht_threshold_landscape(contone_align, thresh_align,
1102
1
                                            &(penum->ht_landscape), halftone, dest_height);
1103
1
                    }
1104
                    /* We may have a line left over that has to be maintained
1105
                       due to line replication in the resolution conversion. */
1106
1
                    if (width != penum->ht_landscape.count) {
1107
                        /* move the line do not reset the stuff */
1108
0
                        move_landscape_buffer(&(penum->ht_landscape),
1109
0
                                              contone_align, dest_height);
1110
0
                    }
1111
1
                }
1112
                /* Perform the copy mono */
1113
1
                if (penum->ht_landscape.index < 0) {
1114
0
                    if (!is_planar_dev) {
1115
0
                        (*dev_proc(dev, copy_mono))
1116
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1117
0
                                        gx_no_bitmap_id,
1118
0
                                        penum->ht_landscape.xstart - width + 1,
1119
0
                                        penum->ht_landscape.y_pos,
1120
0
                                        width, dest_height,
1121
0
                                        dev_white, dev_black);
1122
0
                    } else {
1123
0
                        (*dev_proc(dev, copy_planes))
1124
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1125
0
                                        gx_no_bitmap_id,
1126
0
                                        penum->ht_landscape.xstart - width + 1,
1127
0
                                        penum->ht_landscape.y_pos,
1128
0
                                        width, dest_height,
1129
0
                                        penum->ht_plane_height);
1130
0
                    }
1131
1
                } else {
1132
1
                    if (!is_planar_dev) {
1133
1
                        (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1134
1
                                                     gx_no_bitmap_id,
1135
1
                                                     penum->ht_landscape.xstart,
1136
1
                                                     penum->ht_landscape.y_pos,
1137
1
                                                     width, dest_height,
1138
1
                                                     dev_white, dev_black);
1139
1
                    } else {
1140
0
                        (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1141
0
                                                     gx_no_bitmap_id,
1142
0
                                                     penum->ht_landscape.xstart,
1143
0
                                                     penum->ht_landscape.y_pos,
1144
0
                                                     width, dest_height,
1145
0
                                                     penum->ht_plane_height);
1146
0
                    }
1147
1
                }
1148
1
                penum->ht_landscape.offset_set = false;
1149
1
                if (width != penum->ht_landscape.count) {
1150
0
                    reset_landscape_buffer(&(penum->ht_landscape),
1151
0
                                           contone_align, dest_height,
1152
0
                                           width);
1153
1
                } else {
1154
                    /* Reset the whole buffer */
1155
1
                    penum->ht_landscape.count = 0;
1156
1
                    if (penum->ht_landscape.index < 0) {
1157
                        /* Going right to left */
1158
0
                        penum->ht_landscape.curr_pos = LAND_BITS-1;
1159
1
                    } else {
1160
                        /* Going left to right */
1161
1
                        penum->ht_landscape.curr_pos = 0;
1162
1
                    }
1163
1
                    penum->ht_landscape.num_contones = 0;
1164
1
                    memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
1165
1
                }
1166
1
            }
1167
2
            break;
1168
2
        default:
1169
0
            return gs_rethrow(-1, "Invalid orientation for thresholding");
1170
1.95M
    }
1171
1.95M
    return 0;
1172
1.95M
}
1173
1174
int gxht_dda_length(gx_dda_fixed *dda, int src_size)
1175
2.10M
{
1176
2.10M
    gx_dda_fixed d = (*dda);
1177
2.10M
    dda_advance(d, src_size);
1178
2.10M
    return abs(fixed2int_var_rounded(dda_current(d)) - fixed2int_var_rounded(dda_current(*dda)));
1179
2.10M
}