Coverage Report

Created: 2025-06-24 07:01

/src/ghostpdl/base/gxht_thresh.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2023 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/*$Id: gxhts_thresh.c  $ */
18
/* Halftone thresholding code */
19
20
#include <stdlib.h> /* abs() */
21
#include "memory_.h"
22
#include "gx.h"
23
#include "gxgstate.h"
24
#include "gsiparam.h"
25
#include "math_.h"
26
#include "gxfixed.h"  /* needed for gximage.h */
27
#include "gximage.h"
28
#include "gxdevice.h"
29
#include "gxdht.h"
30
#include "gxht_thresh.h"
31
#include "gzht.h"
32
#include "gxdevsop.h"
33
34
/* Enable the following define to perform a little extra work to stop
35
 * spurious valgrind errors. The code should perform perfectly even without
36
 * this enabled, but enabling it makes debugging much easier.
37
 */
38
/* #define PACIFY_VALGRIND */
39
40
#ifndef __WIN32__
41
1
#define __align16  __attribute__((aligned(16)))
42
#else
43
#define __align16 __declspec(align(16))
44
#endif
45
1.95M
#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
46
47
#ifdef HAVE_SSE2
48
49
#include <emmintrin.h>
50
51
static const byte bitreverse[] =
52
{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
53
  0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
54
  0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
55
  0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
56
  0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
57
  0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
58
  0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
59
  0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
60
  0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
61
  0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
62
  0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
63
  0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
64
  0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
65
  0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
66
  0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
67
  0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
68
  0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
69
  0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
70
  0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
71
  0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
72
  0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
73
  0x3F, 0xBF, 0x7F, 0xFF};
74
#endif
75
76
#if RAW_HT_DUMP
77
/* This is slow thresholding, byte output for debug only */
78
void
79
gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
80
                              byte *halftone, int dithered_stride, int width,
81
                              int num_rows)
82
{
83
    int k, j;
84
    byte *contone_ptr;
85
    byte *thresh_ptr;
86
    byte *halftone_ptr;
87
88
    /* For the moment just do a very slow compare until we get
89
       get this working */
90
    for (j = 0; j < num_rows; j++) {
91
        contone_ptr = contone;
92
        thresh_ptr = threshold_strip + contone_stride * j;
93
        halftone_ptr = halftone + dithered_stride * j;
94
        for (k = 0; k < width; k++) {
95
            if (contone_ptr[k] < thresh_ptr[k]) {
96
                halftone_ptr[k] = 0;
97
            } else {
98
                halftone_ptr[k] = 255;
99
            }
100
        }
101
    }
102
}
103
#endif
104
105
#ifndef HAVE_SSE2
106
/* A simple case for use in the landscape mode. Could probably be coded up
107
   faster */
108
static void
109
threshold_16_bit(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
110
{
111
    int j;
112
113
    for (j = 2; j > 0; j--) {
114
        byte h = 0;
115
        byte bit_init = 0x80;
116
        do {
117
            if (*contone_ptr++ < *thresh_ptr++) {
118
                h |=  bit_init;
119
            }
120
            bit_init >>= 1;
121
        } while (bit_init != 0);
122
        *ht_data++ = h;
123
    }
124
}
125
#else
126
/* Note this function has strict data alignment needs */
127
static void
128
threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
129
279M
{
130
279M
    __m128i input1;
131
279M
    __m128i input2;
132
279M
    register int result_int;
133
279M
    const unsigned int mask1 = 0x80808080;
134
279M
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
135
136
    /* Load */
137
279M
    input1 = _mm_load_si128((const __m128i *)contone_ptr);
138
279M
    input2 = _mm_load_si128((const __m128i *) thresh_ptr);
139
    /* Unsigned subtraction does Unsigned saturation so we
140
       have to use the signed operation */
141
279M
    input1 = _mm_xor_si128(input1, sign_fix);
142
279M
    input2 = _mm_xor_si128(input2, sign_fix);
143
    /* Subtract the two */
144
279M
    input2 = _mm_subs_epi8(input1, input2);
145
    /* Grab the sign mask */
146
279M
    result_int = _mm_movemask_epi8(input2);
147
    /* bit wise reversal on 16 bit word */
148
279M
    ht_data[0] = bitreverse[(result_int & 0xff)];
149
279M
    ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
150
279M
}
151
152
/* Not so fussy on its alignment */
153
static void
154
threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
155
1.65M
{
156
1.65M
    __m128i input1;
157
1.65M
    __m128i input2;
158
1.65M
    int result_int;
159
1.65M
    byte *sse_data;
160
1.65M
    const unsigned int mask1 = 0x80808080;
161
1.65M
    __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
162
163
1.65M
    sse_data = (byte*) &(result_int);
164
    /* Load */
165
1.65M
    input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
166
1.65M
    input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
167
    /* Unsigned subtraction does Unsigned saturation so we
168
       have to use the signed operation */
169
1.65M
    input1 = _mm_xor_si128(input1, sign_fix);
170
1.65M
    input2 = _mm_xor_si128(input2, sign_fix);
171
    /* Subtract the two */
172
1.65M
    input2 = _mm_subs_epi8(input1, input2);
173
    /* Grab the sign mask */
174
1.65M
    result_int = _mm_movemask_epi8(input2);
175
    /* bit wise reversal on 16 bit word */
176
1.65M
    ht_data[0] = bitreverse[sse_data[0]];
177
1.65M
    ht_data[1] = bitreverse[sse_data[1]];
178
1.65M
}
179
#endif
180
181
/* SSE2 and non-SSE2 implememntation of thresholding a row. Subtractive case
182
   There is some code replication between the two of these (additive and subtractive)
183
   that I need to go back and determine how we can combine them without
184
   any performance loss. */
185
void
186
gx_ht_threshold_row_bit_sub(byte *contone,  byte *threshold_strip,  int contone_stride,
187
                  byte *halftone, int dithered_stride, int width,
188
                  int num_rows, int offset_bits)
189
83.1k
{
190
#ifndef HAVE_SSE2
191
    int k, j;
192
    byte *contone_ptr;
193
    byte *thresh_ptr;
194
    byte *halftone_ptr;
195
    byte bit_init;
196
197
    /* For the moment just do a very slow compare until we get
198
       get this working.  This could use some serious optimization */
199
    width -= offset_bits;
200
    for (j = 0; j < num_rows; j++) {
201
        byte h;
202
        contone_ptr = contone;
203
        thresh_ptr = threshold_strip + contone_stride * j;
204
        halftone_ptr = halftone + dithered_stride * j;
205
        /* First get the left remainder portion.  Put into MSBs of first byte */
206
        bit_init = 0x80;
207
        h = 0;
208
        k = offset_bits;
209
        if (k > 0) {
210
            do {
211
                if (*contone_ptr++ > *thresh_ptr++) {
212
                    h |=  bit_init;
213
                }
214
                bit_init >>= 1;
215
                if (bit_init == 0) {
216
                    bit_init = 0x80;
217
                    *halftone_ptr++ = h;
218
                    h = 0;
219
                }
220
                k--;
221
            } while (k > 0);
222
            bit_init = 0x80;
223
            *halftone_ptr++ = h;
224
            h = 0;
225
            if (offset_bits < 8)
226
                *halftone_ptr++ = 0;
227
        }
228
        /* Now get the rest, which will be 16 bit aligned. */
229
        k = width;
230
        if (k > 0) {
231
            do {
232
                if (*contone_ptr++ > *thresh_ptr++) {
233
                    h |=  bit_init;
234
                }
235
                bit_init >>= 1;
236
                if (bit_init == 0) {
237
                    bit_init = 0x80;
238
                    *halftone_ptr++ = h;
239
                    h = 0;
240
                }
241
                k--;
242
            } while (k > 0);
243
            if (bit_init != 0x80) {
244
                *halftone_ptr++ = h;
245
            }
246
            if ((width & 15) < 8)
247
                *halftone_ptr++ = 0;
248
        }
249
    }
250
#else
251
83.1k
    byte *contone_ptr;
252
83.1k
    byte *thresh_ptr;
253
83.1k
    byte *halftone_ptr;
254
83.1k
    int num_tiles = (width - offset_bits + 15)>>4;
255
83.1k
    int k, j;
256
257
368k
    for (j = 0; j < num_rows; j++) {
258
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
259
           two steps to ensure that we pack the bits in an aligned fashion
260
           into halftone_ptr.  */
261
285k
        contone_ptr = contone;
262
285k
        thresh_ptr = threshold_strip + contone_stride * j;
263
285k
        halftone_ptr = halftone + dithered_stride * j;
264
285k
        if (offset_bits > 0) {
265
            /* Since we allowed for 16 bits in our left remainder
266
               we can go directly in to the destination.  threshold_16_SSE
267
               requires 128 bit alignment.  contone_ptr and thresh_ptr
268
               are set up so that after we move in by offset_bits elements
269
               then we are 128 bit aligned.  */
270
89.8k
            threshold_16_SSE_unaligned(thresh_ptr, contone_ptr,
271
89.8k
                                       halftone_ptr);
272
89.8k
            halftone_ptr += 2;
273
89.8k
            thresh_ptr += offset_bits;
274
89.8k
            contone_ptr += offset_bits;
275
89.8k
        }
276
        /* Now we should have 128 bit aligned with our input data. Iterate
277
           over sets of 16 going directly into our HT buffer.  Sources and
278
           halftone_ptr buffers should be padded to allow 15 bit overrun */
279
20.0M
        for (k = 0; k < num_tiles; k++) {
280
19.7M
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
281
19.7M
            thresh_ptr += 16;
282
19.7M
            contone_ptr += 16;
283
19.7M
            halftone_ptr += 2;
284
19.7M
        }
285
285k
    }
286
83.1k
#endif
287
83.1k
}
288
289
/* SSE2 and non-SSE2 implememntation of thresholding a row. additive case  */
290
void
291
gx_ht_threshold_row_bit(byte *contone,  byte *threshold_strip,  int contone_stride,
292
                  byte *halftone, int dithered_stride, int width,
293
                  int num_rows, int offset_bits)
294
1.87M
{
295
#ifndef HAVE_SSE2
296
    int k, j;
297
    byte *contone_ptr;
298
    byte *thresh_ptr;
299
    byte *halftone_ptr;
300
    byte bit_init;
301
302
    /* For the moment just do a very slow compare until we get
303
       get this working.  This could use some serious optimization */
304
    width -= offset_bits;
305
    for (j = 0; j < num_rows; j++) {
306
        byte h;
307
        contone_ptr = contone;
308
        thresh_ptr = threshold_strip + contone_stride * j;
309
        halftone_ptr = halftone + dithered_stride * j;
310
        /* First get the left remainder portion.  Put into MSBs of first byte */
311
        bit_init = 0x80;
312
        h = 0;
313
        k = offset_bits;
314
        if (k > 0) {
315
            do {
316
                if (*contone_ptr++ < *thresh_ptr++) {
317
                    h |=  bit_init;
318
                }
319
                bit_init >>= 1;
320
                if (bit_init == 0) {
321
                    bit_init = 0x80;
322
                    *halftone_ptr++ = h;
323
                    h = 0;
324
                }
325
                k--;
326
            } while (k > 0);
327
            bit_init = 0x80;
328
            *halftone_ptr++ = h;
329
            h = 0;
330
            if (offset_bits < 8)
331
                *halftone_ptr++ = 0;
332
        }
333
        /* Now get the rest, which will be 16 bit aligned. */
334
        k = width;
335
        if (k > 0) {
336
            do {
337
                if (*contone_ptr++ < *thresh_ptr++) {
338
                    h |=  bit_init;
339
                }
340
                bit_init >>= 1;
341
                if (bit_init == 0) {
342
                    bit_init = 0x80;
343
                    *halftone_ptr++ = h;
344
                    h = 0;
345
                }
346
                k--;
347
            } while (k > 0);
348
            if (bit_init != 0x80) {
349
                *halftone_ptr++ = h;
350
            }
351
            if ((width & 15) < 8)
352
                *halftone_ptr++ = 0;
353
        }
354
    }
355
#else
356
1.87M
    byte *contone_ptr;
357
1.87M
    byte *thresh_ptr;
358
1.87M
    byte *halftone_ptr;
359
1.87M
    int num_tiles = (width - offset_bits + 15)>>4;
360
1.87M
    int k, j;
361
362
3.99M
    for (j = 0; j < num_rows; j++) {
363
        /* contone and thresh_ptr are 128 bit aligned.  We do need to do this in
364
           two steps to ensure that we pack the bits in an aligned fashion
365
           into halftone_ptr.  */
366
2.12M
        contone_ptr = contone;
367
2.12M
        thresh_ptr = threshold_strip + contone_stride * j;
368
2.12M
        halftone_ptr = halftone + dithered_stride * j;
369
2.12M
        if (offset_bits > 0) {
370
            /* Since we allowed for 16 bits in our left remainder
371
               we can go directly in to the destination.  threshold_16_SSE
372
               requires 128 bit alignment.  contone_ptr and thresh_ptr
373
               are set up so that after we move in by offset_bits elements
374
               then we are 128 bit aligned.  */
375
1.56M
            threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
376
1.56M
                                       halftone_ptr);
377
1.56M
            halftone_ptr += 2;
378
1.56M
            thresh_ptr += offset_bits;
379
1.56M
            contone_ptr += offset_bits;
380
1.56M
        }
381
        /* Now we should have 128 bit aligned with our input data. Iterate
382
           over sets of 16 going directly into our HT buffer.  Sources and
383
           halftone_ptr buffers should be padded to allow 15 bit overrun */
384
261M
        for (k = 0; k < num_tiles; k++) {
385
259M
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
386
259M
            thresh_ptr += 16;
387
259M
            contone_ptr += 16;
388
259M
            halftone_ptr += 2;
389
259M
        }
390
2.12M
    }
391
1.87M
#endif
392
1.87M
}
393
394
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
395
   Subtractive case */
396
void
397
gx_ht_threshold_landscape_sub(byte *contone_align, byte *thresh_align,
398
                    ht_landscape_info_t *ht_landscape, byte *halftone,
399
                    int data_length)
400
0
{
401
0
    __align16 byte contone[LAND_BITS];
402
0
    int position_start, position, curr_position;
403
0
    int *widths = &(ht_landscape->widths[0]);
404
0
    int local_widths[LAND_BITS];
405
0
    int num_contone = ht_landscape->num_contones;
406
0
    int k, j, w, contone_out_posit;
407
0
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
408
0
#ifdef PACIFY_VALGRIND
409
0
    int extra = 0;
410
0
#endif
411
412
    /* Work through chunks of 16.  */
413
    /* Data may have come in left to right or right to left. */
414
0
    if (ht_landscape->index > 0) {
415
0
        position = position_start = 0;
416
0
    } else {
417
0
        position = position_start = ht_landscape->curr_pos + 1;
418
0
    }
419
0
    thresh_ptr = thresh_align;
420
0
    halftone_ptr = halftone;
421
    /* Copy the widths to a local array, and truncate the last one (which may
422
     * be the first one!) if required. */
423
0
    k = 0;
424
0
    for (j = 0; j < num_contone; j++)
425
0
        k += (local_widths[j] = widths[position_start+j]);
426
0
    if (k > LAND_BITS) {
427
0
        if (ht_landscape->index > 0) {
428
0
            local_widths[num_contone-1] -= k-LAND_BITS;
429
0
        } else {
430
0
            local_widths[0] -= k-LAND_BITS;
431
0
        }
432
0
    }
433
0
#ifdef PACIFY_VALGRIND
434
0
    if (k < LAND_BITS) {
435
0
        extra = LAND_BITS - k;
436
0
    }
437
0
#endif
438
0
    for (k = data_length; k > 0; k--) { /* Loop on rows */
439
0
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
440
0
        curr_position = 0; /* We use this in keeping track of widths */
441
0
        contone_out_posit = 0; /* Our index out */
442
0
        for (j = num_contone; j > 0; j--) {
443
0
            byte c = *contone_ptr;
444
            /* The microsoft compiler, cleverly spots that the following loop
445
             * can be replaced by a memset. Unfortunately, it can't spot that
446
             * the typical length values of the memset are so small that we'd
447
             * be better off doing it the slow way. We therefore introduce a
448
             * sneaky 'volatile' cast below that stops this optimisation. */
449
0
            w = local_widths[curr_position];
450
0
            do {
451
0
                ((volatile byte *)contone)[contone_out_posit] = c;
452
0
                contone_out_posit++;
453
0
            } while (--w);
454
0
#ifdef PACIFY_VALGRIND
455
0
            if (extra)
456
0
                memset(contone+contone_out_posit, 0, extra);
457
0
#endif
458
0
            curr_position++; /* Move us to the next position in our width array */
459
0
            contone_ptr++;   /* Move us to a new location in our contone buffer */
460
0
        }
461
        /* Now we have our left justified and expanded contone data for
462
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
463
0
        contone_ptr = &contone[0];
464
0
#if LAND_BITS > 16
465
0
        j = LAND_BITS;
466
0
        do {
467
0
#endif
468
0
#ifdef HAVE_SSE2
469
0
            threshold_16_SSE(thresh_ptr, contone_ptr, halftone_ptr);
470
#else
471
            threshold_16_bit(thresh_ptr, contone_ptr, halftone_ptr);
472
#endif
473
0
            thresh_ptr += 16;
474
0
            position += 16;
475
0
            halftone_ptr += 2;
476
0
            contone_ptr += 16;
477
0
#if LAND_BITS > 16
478
0
            j -= 16;
479
0
        } while (j > 0);
480
0
#endif
481
0
    }
482
0
}
483
484
/* This thresholds a buffer that is LAND_BITS wide by data_length tall.
485
   Additive case.  Note I could likely do some code reduction between
486
   the additive and subtractive cases */
487
void
488
gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
489
                    ht_landscape_info_t *ht_landscape, byte *halftone,
490
                    int data_length)
491
1
{
492
1
    __align16 byte contone[LAND_BITS];
493
1
    int position_start, position, curr_position;
494
1
    int *widths = &(ht_landscape->widths[0]);
495
1
    int local_widths[LAND_BITS];
496
1
    int num_contone = ht_landscape->num_contones;
497
1
    int k, j, w, contone_out_posit;
498
1
    byte *contone_ptr, *thresh_ptr, *halftone_ptr;
499
1
#ifdef PACIFY_VALGRIND
500
1
    int extra = 0;
501
1
#endif
502
503
    /* Work through chunks of 16.  */
504
    /* Data may have come in left to right or right to left. */
505
1
    if (ht_landscape->index > 0) {
506
1
        position = position_start = 0;
507
1
    } else {
508
0
        position = position_start = ht_landscape->curr_pos + 1;
509
0
    }
510
1
    thresh_ptr = thresh_align;
511
1
    halftone_ptr = halftone;
512
    /* Copy the widths to a local array, and truncate the last one (which may
513
     * be the first one!) if required. */
514
1
    k = 0;
515
2
    for (j = 0; j < num_contone; j++)
516
1
        k += (local_widths[j] = widths[position_start+j]);
517
1
    if (k > LAND_BITS) {
518
0
        if (ht_landscape->index > 0) {
519
0
            local_widths[num_contone-1] -= k-LAND_BITS;
520
0
        } else {
521
0
            local_widths[0] -= k-LAND_BITS;
522
0
        }
523
0
    }
524
1
#ifdef PACIFY_VALGRIND
525
1
    if (k < LAND_BITS) {
526
1
        extra = LAND_BITS - k;
527
1
    }
528
1
#endif
529
4
    for (k = data_length; k > 0; k--) { /* Loop on rows */
530
3
        contone_ptr = &(contone_align[position]); /* Point us to our row start */
531
3
        curr_position = 0; /* We use this in keeping track of widths */
532
3
        contone_out_posit = 0; /* Our index out */
533
6
        for (j = num_contone; j > 0; j--) {
534
3
            byte c = *contone_ptr;
535
            /* The microsoft compiler, cleverly spots that the following loop
536
             * can be replaced by a memset. Unfortunately, it can't spot that
537
             * the typical length values of the memset are so small that we'd
538
             * be better off doing it the slow way. We therefore introduce a
539
             * sneaky 'volatile' cast below that stops this optimisation. */
540
3
            w = local_widths[curr_position];
541
9
            do {
542
9
                ((volatile byte *)contone)[contone_out_posit] = c;
543
9
                contone_out_posit++;
544
9
            } while (--w);
545
3
#ifdef PACIFY_VALGRIND
546
3
            if (extra)
547
3
                memset(contone+contone_out_posit, 0, extra);
548
3
#endif
549
3
            curr_position++; /* Move us to the next position in our width array */
550
3
            contone_ptr++;   /* Move us to a new location in our contone buffer */
551
3
        }
552
        /* Now we have our left justified and expanded contone data for
553
           LAND_BITS/16 sets of 16 bits. Go ahead and threshold these. */
554
3
        contone_ptr = &contone[0];
555
3
#if LAND_BITS > 16
556
3
        j = LAND_BITS;
557
12
        do {
558
12
#endif
559
12
#ifdef HAVE_SSE2
560
12
            threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
561
#else
562
            threshold_16_bit(contone_ptr, thresh_ptr, halftone_ptr);
563
#endif
564
12
            thresh_ptr += 16;
565
12
            position += 16;
566
12
            halftone_ptr += 2;
567
12
            contone_ptr += 16;
568
12
#if LAND_BITS > 16
569
12
            j -= 16;
570
12
        } while (j > 0);
571
3
#endif
572
3
    }
573
1
}
574
575
int
576
gxht_thresh_image_init(gx_image_enum *penum)
577
149k
{
578
149k
    int code = 0;
579
149k
    fixed ox;
580
149k
    int temp;
581
149k
    int dev_width, max_height;
582
149k
    int spp_out;
583
149k
    int k;
584
149k
    gx_ht_order *d_order;
585
149k
    gx_dda_fixed dda_ht;
586
587
149k
    if (gx_device_must_halftone(penum->dev)) {
588
149k
        if (penum->pgs != NULL && penum->pgs->dev_ht[HT_OBJTYPE_DEFAULT] != NULL) {
589
149k
            gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
590
591
300k
            for (k = 0; k < pdht->num_comp; k++) {
592
150k
                d_order = &(pdht->components[k].corder);
593
150k
                code = gx_ht_construct_threshold(d_order, penum->dev,
594
150k
                                                 penum->pgs, k);
595
150k
                if (code < 0 ) {
596
0
                    return gs_rethrow(code, "threshold creation failed");
597
0
                }
598
150k
            }
599
149k
        } else {
600
0
            return -1;
601
0
        }
602
149k
    }
603
149k
    spp_out = penum->dev->color_info.num_components;
604
    /* Precompute values needed for rasterizing. */
605
149k
    penum->dxx = float2fixed(penum->matrix.xx + fixed2float(fixed_epsilon) / 2);
606
    /* If the image is landscaped then we want to maintain a buffer
607
       that is sufficiently large so that we can hold a byte
608
       of halftoned data along the column.  This way we avoid doing
609
       multiple writes into the same position over and over.
610
       The size of the buffer we need depends upon the bitdepth of
611
       the output device, the number of device coloranants and the
612
       number of  colorants in the source space.  Note we will
613
       need to eventually  consider  multi-level halftone case
614
       here too.  For now, to make use of the SSE2 stuff, we would
615
       like to have a multiple of 16 bytes of data to process at a time.
616
       So we will collect the columns of data in a buffer that is LAND_BITS
617
       wide.  We will also keep track of the widths of each column.  When
618
       the total width count reaches LAND_BITS, we will create our
619
       threshold array and apply it.  We may have one column that is
620
       buffered between calls in this case.  Also if a call is made
621
       with h=0 we will flush the buffer as we are at the end of the
622
       data.  */
623
149k
    if (penum->posture == image_landscape) {
624
3
        int col_length = fixed2int_var_rounded(any_abs(penum->x_extent.y));
625
3
        dda_ht = penum->dda.pixel0.y;
626
3
        if (penum->dxx > 0)
627
3
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
628
629
3
        ox = dda_current(penum->dda.pixel0.x);
630
3
        temp = gxht_dda_length(&dda_ht, penum->rect.w);
631
3
        if (col_length < temp)
632
0
            col_length = temp;          /* choose max to make sure line_size is large enough */
633
3
        temp = (col_length + LAND_BITS)/LAND_BITS;      /* round up to allow for offset bits */
634
        /* bitmap_raster() expects the width in bits, hence "* 8" */
635
3
        penum->line_size = bitmap_raster((temp * LAND_BITS) * 8);  /* The stride */
636
        /* Now we need at most LAND_BITS of these */
637
3
        penum->line = gs_alloc_bytes(penum->memory,
638
3
                                     LAND_BITS * penum->line_size * spp_out + 16,
639
3
                                     "gxht_thresh");
640
        /* Same with this.  However, we only need one plane here */
641
3
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
642
3
                                           penum->line_size * LAND_BITS + 16,
643
3
                                           "gxht_thresh");
644
        /* That maps into (LAND_BITS/8) bytes of Halftone data */
645
3
        penum->ht_buffer =
646
3
                        gs_alloc_bytes(penum->memory,
647
3
                           penum->line_size * (LAND_BITS>>3) * spp_out,
648
3
                           "gxht_thresh");
649
3
        penum->ht_plane_height = penum->line_size;
650
3
        penum->ht_stride = penum->line_size;
651
3
        if (penum->line == NULL || penum->thresh_buffer == NULL
652
3
                    || penum->ht_buffer == NULL)
653
0
            return -1;
654
3
        penum->ht_landscape.count = 0;
655
3
        penum->ht_landscape.num_contones = 0;
656
3
        if (penum->y_extent.x < 0) {
657
            /* Going right to left */
658
1
            penum->ht_landscape.curr_pos = LAND_BITS-1;
659
1
            penum->ht_landscape.index = -1;
660
2
        } else {
661
            /* Going left to right */
662
2
            penum->ht_landscape.curr_pos = 0;
663
2
            penum->ht_landscape.index = 1;
664
2
        }
665
3
        if (penum->x_extent.y < 0) {
666
0
            penum->ht_landscape.flipy = true;
667
0
            penum->ht_landscape.y_pos =
668
0
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y) + penum->x_extent.y);
669
3
        } else {
670
3
            penum->ht_landscape.flipy = false;
671
3
            penum->ht_landscape.y_pos =
672
3
                fixed2int_pixround_perfect(dda_current(penum->dda.pixel0.y));
673
3
        }
674
3
        memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
675
3
        penum->ht_landscape.offset_set = false;
676
3
        penum->ht_offset_bits = 0; /* Will get set in call to render */
677
3
        if (code >= 0) {
678
3
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
679
3
            memset(penum->line, 0, LAND_BITS * penum->line_size * spp_out + 16);
680
3
            memset(penum->ht_buffer, 0, penum->line_size * (LAND_BITS>>3) * spp_out);
681
3
            memset(penum->thresh_buffer, 0, LAND_BITS * penum->line_size + 16);
682
3
#endif
683
3
        }
684
149k
    } else {
685
        /* In the portrait case we allocate a single line buffer
686
           in device width, a threshold buffer of the same size
687
           and possibly wider and the buffer for the halftoned
688
           bits. We have to do a bit of work to enable 16 byte
689
           boundary after an offset to ensure that we can make use
690
           of  the SSE2 operations for thresholding.  We do the
691
           allocations now to avoid doing them with every line */
692
149k
        dda_ht = penum->dda.pixel0.x;
693
149k
        if (penum->dxx > 0)
694
149k
            dda_translate(dda_ht, -fixed_epsilon);      /* to match rounding in non-fast code */
695
        /* Initialize the ht_landscape stuff to zero */
696
149k
        memset(&(penum->ht_landscape), 0, sizeof(ht_landscape_info_t));
697
149k
        ox = dda_current(dda_ht);
698
149k
        dev_width = gxht_dda_length(&dda_ht, penum->rect.w);
699
        /* Get the bit position so that we can do a copy_mono for
700
           the left remainder and then 16 bit aligned copies for the
701
           rest.  The right remainder will be OK as it will land in
702
           the MSBit positions. Note the #define chunk bits16 in
703
           gdevm1.c.  Allow also for a 15 sample over run.
704
        */
705
149k
        penum->ht_offset_bits = (-fixed2int_var_rounded(ox)) & (bitmap_raster(1) - 1);
706
149k
        if (penum->ht_offset_bits > 0) {
707
107k
            penum->ht_stride = bitmap_raster((7 + (dev_width + 4)) + (ARCH_SIZEOF_LONG * 8));
708
107k
        } else {
709
42.4k
            penum->ht_stride = bitmap_raster((7 + (dev_width + 2)) + (ARCH_SIZEOF_LONG * 8));
710
42.4k
        }
711
        /* We want to figure out the maximum height that we may
712
           have in taking a single source row and going to device
713
           space */
714
149k
        max_height = (int) ceil(fixed2float(any_abs(penum->dst_height)) /
715
149k
                                            (float) penum->Height);
716
149k
        if (max_height <= 0)
717
6
            return -1;    /* shouldn't happen, but check so we don't div by zero */
718
149k
        if (penum->ht_stride * spp_out > max_int / max_height)
719
0
            return -1;         /* overflow */
720
721
149k
        penum->ht_buffer =
722
149k
                        gs_alloc_bytes(penum->memory,
723
149k
                           (size_t)penum->ht_stride * max_height * spp_out,
724
149k
                           "gxht_thresh");
725
149k
        penum->ht_plane_height = penum->ht_stride * max_height;
726
        /* We want to have 128 bit alignement for our contone and
727
           threshold strips so that we can use SSE operations
728
           in the threshold operation.  Add in a minor buffer and offset
729
           to ensure this.  If gs_alloc_bytes provides at least 16
730
           bit alignment so we may need to move 14 bytes.  However, the
731
           HT process is split in two operations.  One that involves
732
           the HT of a left remainder and the rest which ensures that
733
           we pack in the HT data in the bits with no skew for a fast
734
           copy into the gdevm1 device (16 bit copies).  So, we
735
           need to account for those pixels which occur first and which
736
           are NOT aligned for the contone buffer.  After we offset
737
           by this remainder portion we should be 128 bit aligned.
738
           Also allow a 15 sample over run during the execution.  */
739
149k
        temp = (int) ceil((float) ((dev_width + 15.0) + 15.0)/16.0);
740
149k
        penum->line_size = bitmap_raster(temp * 16 * 8);  /* The stride */
741
149k
        if (penum->line_size > max_int / max_height) {
742
0
            gs_free_object(penum->memory, penum->ht_buffer, "gxht_thresh");
743
0
            penum->ht_buffer = NULL;
744
0
            return -1;         /* thresh_buffer size overflow */
745
0
        }
746
149k
        penum->line = gs_alloc_bytes(penum->memory, penum->line_size * spp_out,
747
149k
                                     "gxht_thresh");
748
149k
        penum->thresh_buffer = gs_alloc_bytes(penum->memory,
749
149k
                                              (size_t)penum->line_size * max_height,
750
149k
                                              "gxht_thresh");
751
149k
        if (penum->line == NULL || penum->thresh_buffer == NULL ||
752
149k
            penum->ht_buffer == NULL) {
753
0
            return -1;
754
149k
        } else {
755
149k
#if defined(DEBUG) || defined(PACIFY_VALGRIND)
756
149k
            memset(penum->line, 0, penum->line_size * spp_out);
757
149k
            memset(penum->ht_buffer, 0, penum->ht_stride * max_height * spp_out);
758
149k
            memset(penum->thresh_buffer, 0, penum->line_size * max_height);
759
149k
#endif
760
149k
        }
761
149k
    }
762
149k
    return code;
763
149k
}
764
765
static void
766
fill_threshold_buffer(byte *dest_strip, byte *src, byte *src_strip, int src_width,
767
                       int left_offset, int left_width, int num_tiles,
768
                       int right_width)
769
2.41M
{
770
2.41M
    byte *ptr_out_temp = dest_strip;
771
2.41M
    int ii;
772
773
    /* Make sure we don't try and read before the start of the threshold array. This can happen
774
     * if we drop to the beginning of the array, AND we have a negative left_offset. If we do
775
     * have a negative left_offset this represents an area we won't actually be using, but we need
776
     * to move along the threshold array until we get to the point where we copy data we will use.
777
     * So lets simply avoid reading before the start of the data. We can leave the destination
778
     * buffer uninitialised because we won't be reading from that area. Bug #706795 but the ASAN
779
     * error occurs on a number of input files in the test suite.
780
     */
781
2.41M
    if (src_strip + left_offset < src) {
782
2.07k
        int under = src - (src_strip + left_offset);
783
2.07k
        left_offset += under;
784
2.07k
        ptr_out_temp += under;
785
2.07k
        left_width -= under;
786
2.07k
        if (left_width < 0)
787
0
            left_width = 0;
788
2.07k
    }
789
    /* Left part */
790
2.41M
    memcpy(ptr_out_temp, src_strip + left_offset, left_width);
791
2.41M
    ptr_out_temp += left_width;
792
    /* Now the full parts */
793
732M
    for (ii = 0; ii < num_tiles; ii++){
794
729M
        memcpy(ptr_out_temp, src_strip, src_width);
795
729M
        ptr_out_temp += src_width;
796
729M
    }
797
    /* Now the remainder */
798
2.41M
    memcpy(ptr_out_temp, src_strip, right_width);
799
2.41M
#ifdef PACIFY_VALGRIND
800
2.41M
    ptr_out_temp += right_width;
801
2.41M
    ii = (dest_strip-ptr_out_temp) % (LAND_BITS-1);
802
2.41M
    if (ii > 0)
803
0
        memset(ptr_out_temp, 0, ii);
804
2.41M
#endif
805
2.41M
}
806
/* This only moves the data but does not do a reset of the variables.  Used
807
   for case where we have multiple bands of data (e.g. CMYK output) */
808
static void
809
move_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
810
                       int data_length)
811
0
{
812
0
    int k;
813
0
    int position_curr, position_new;
814
815
0
    if (ht_landscape->index < 0) {
816
        /* Moving right to left, move column to far right */
817
0
        position_curr = ht_landscape->curr_pos + 1;
818
0
        position_new = LAND_BITS-1;
819
0
    } else {
820
        /* Moving left to right, move column to far left */
821
0
        position_curr = ht_landscape->curr_pos - 1;
822
0
        position_new = 0;
823
0
    }
824
0
    if (position_curr != position_new) {
825
0
        for (k = 0; k < data_length; k++) {
826
0
                contone_align[position_new] = contone_align[position_curr];
827
0
                position_curr += LAND_BITS;
828
0
                position_new += LAND_BITS;
829
0
        }
830
0
    }
831
0
}
832
833
834
/* If we are in here, we had data left over.  Move it to the proper position
835
   and get ht_landscape_info_t set properly */
836
static void
837
reset_landscape_buffer(ht_landscape_info_t *ht_landscape, byte *contone_align,
838
                       int data_length, int num_used)
839
0
{
840
0
    int delta;
841
0
    int curr_x_pos = ht_landscape->xstart;
842
843
0
    if (ht_landscape->index < 0) {
844
        /* Moving right to left, move column to far right */
845
0
        delta = ht_landscape->count - num_used;
846
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
847
0
        ht_landscape->widths[LAND_BITS-1] = delta;
848
0
        ht_landscape->curr_pos = LAND_BITS-2;
849
0
        ht_landscape->xstart = curr_x_pos - num_used;
850
0
    } else {
851
        /* Moving left to right, move column to far left */
852
0
        delta = ht_landscape->count - num_used;
853
0
        memset(&(ht_landscape->widths[0]), 0, sizeof(int)*LAND_BITS);
854
0
        ht_landscape->widths[0] = delta;
855
0
        ht_landscape->curr_pos = 1;
856
0
        ht_landscape->xstart = curr_x_pos + num_used;
857
0
    }
858
0
    ht_landscape->count = delta;
859
0
    ht_landscape->num_contones = 1;
860
0
}
861
862
/* This performs a thresholding operation on multiple planes of data and
863
   stores the bits into a planar buffer which can then be used for
864
   copy_planes */
865
int
866
gxht_thresh_planes(gx_image_enum *penum, fixed xrun,
867
                   int dest_width, int dest_height,
868
                   byte *thresh_align, gx_device * dev, int offset_contone[],
869
                   int contone_stride)
870
1.89M
{
871
1.89M
    int thresh_width, thresh_height, dx;
872
1.89M
    int left_rem_end, left_width, vdi;
873
1.89M
    int num_full_tiles, right_tile_width;
874
1.89M
    int k, jj, dy, j;
875
1.89M
    byte *thresh_tile;
876
1.89M
    int position;
877
1.89M
    bool replicate_tile;
878
1.89M
    image_posture posture = penum->posture;
879
1.89M
    const int y_pos = penum->yci;
880
1.89M
    int width = 0; /* Init to silence compiler warnings */
881
1.89M
    byte *ptr_out, *row_ptr, *ptr_out_temp;
882
1.89M
    byte *threshold;
883
1.89M
    int init_tile, in_row_offset, ii, num_tiles, tile_remainder;
884
1.89M
    int offset_bits = penum->ht_offset_bits;
885
1.89M
    byte *halftone;
886
1.89M
    int dithered_stride = penum->ht_stride;
887
1.89M
    bool is_planar_dev = dev->num_planar_planes;
888
1.89M
    gx_color_index dev_white = gx_device_white(dev);
889
1.89M
    gx_color_index dev_black = gx_device_black(dev);
890
1.89M
    int spp_out = dev->color_info.num_components;
891
1.89M
    byte *contone_align = NULL; /* Init to silence compiler warnings */
892
1.89M
    gx_device_halftone *pdht = gx_select_dev_ht(penum->pgs);
893
894
    /* Go ahead and fill the threshold line buffer with tiled threshold values.
895
       First just grab the row or column that we are going to tile with and
896
       then do memcpy into the buffer */
897
898
    /* Figure out the tile steps.  Left offset, Number of tiles, Right offset. */
899
1.89M
    switch (posture) {
900
1.89M
        case image_portrait:
901
1.89M
            vdi = penum->hci;
902
            /*  Iterate over the vdi and fill up our threshold buffer.  We
903
                 also need to loop across the planes of data */
904
3.84M
            for (j = 0; j < spp_out; j++) {
905
1.95M
                bool threshold_inverted = pdht->components[j].corder.threshold_inverted;
906
907
1.95M
                thresh_width = pdht->components[j].corder.width;
908
1.95M
                thresh_height = pdht->components[j].corder.full_height;
909
1.95M
                halftone = penum->ht_buffer + j * vdi * dithered_stride;
910
                /* Compute the tiling positions with dest_width */
911
1.95M
                dx = (fixed2int_var_rounded(xrun) + penum->pgs->screen_phase[0].x) % thresh_width;
912
                /* Left remainder part */
913
1.95M
                left_rem_end = min(dx + dest_width, thresh_width);
914
                /* The left width of our tile part */
915
1.95M
                left_width = left_rem_end - dx;
916
                /* Now the middle part */
917
1.95M
                num_full_tiles =
918
1.95M
                    (int)fastfloor((dest_width - left_width)/ (float) thresh_width);
919
                /* Now the right part */
920
1.95M
                right_tile_width = dest_width -  num_full_tiles * thresh_width -
921
1.95M
                                   left_width;
922
                /* Get the proper threshold for the colorant count */
923
1.95M
                threshold = pdht->components[j].corder.threshold;
924
1.95M
                if (threshold == NULL)
925
0
                    return_error(gs_error_unregistered);
926
                /* Point to the proper contone data */
927
1.95M
                contone_align = penum->line + contone_stride * j +
928
1.95M
                                offset_contone[j];
929
4.36M
                for (k = 0; k < vdi; k++) {
930
                    /* Get a pointer to our tile row */
931
2.41M
                    dy = (penum->yci + k -
932
2.41M
                          penum->pgs->screen_phase[0].y) % thresh_height;
933
2.41M
                    if (dy < 0)
934
29
                        dy += thresh_height;
935
2.41M
                    thresh_tile = threshold + thresh_width * dy;
936
                    /* Fill the buffer, can be multiple rows.  Make sure
937
                       to update with stride */
938
2.41M
                    position = contone_stride * k;
939
                    /* Tile into the 128 bit aligned threshold strip */
940
2.41M
                    fill_threshold_buffer(&(thresh_align[position]), threshold,
941
2.41M
                                           thresh_tile, thresh_width, dx, left_width,
942
2.41M
                                           num_full_tiles, right_tile_width);
943
2.41M
                }
944
                /* Apply the threshold operation */
945
1.95M
                if (offset_bits > dest_width)
946
10.4k
                    offset_bits = dest_width;
947
948
1.95M
                if (threshold_inverted ||
949
1.95M
                    (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE && is_planar_dev)) {
950
83.1k
                    gx_ht_threshold_row_bit_sub(contone_align, thresh_align, contone_stride,
951
83.1k
                                      halftone, dithered_stride, dest_width, vdi,
952
83.1k
                                      offset_bits);
953
1.87M
                } else {
954
1.87M
                    gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
955
1.87M
                          halftone, dithered_stride, dest_width, vdi,
956
1.87M
                          offset_bits);
957
1.87M
                }
958
1.95M
            }
959
            /* FIXME: An improvement here would be to generate the initial
960
             * offset_bits at the correct offset within the byte so that they
961
             * align with the remainder of the line. This would mean not
962
             * always packing them into the first offset_bits (in MSB order)
963
             * of our 16 bit word, but rather into the last offset_bits
964
             * (in MSB order) (except when the entire run is small!).
965
             *
966
             * This would enable us to do just one aligned copy_mono call for
967
             * the entire scanline. */
968
            /* Now do the copy mono or copy plane operation */
969
            /* First the left remainder bits */
970
1.89M
            if (offset_bits > 0) {
971
1.38M
                int x_pos = fixed2int_var_rounded(xrun);
972
1.38M
                if (!is_planar_dev) {
973
1.37M
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, dithered_stride,
974
1.37M
                                                 gx_no_bitmap_id, x_pos, y_pos,
975
1.37M
                                                 offset_bits, vdi, dev_white,
976
1.37M
                                                 dev_black);
977
1.37M
                } else {
978
11.7k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, dithered_stride,
979
11.7k
                                                 gx_no_bitmap_id, x_pos, y_pos,
980
11.7k
                                                 offset_bits, vdi, vdi);
981
11.7k
                }
982
1.38M
            }
983
1.89M
            if ((dest_width - offset_bits) > 0 ) {
984
                /* Now the primary aligned bytes */
985
1.88M
                int curr_width = dest_width - offset_bits;
986
1.88M
                int x_pos = fixed2int_var_rounded(xrun) + offset_bits;
987
                /* FIXME: This assumes the allowed offset_bits will always be <= 16 */
988
1.88M
                int xoffs = offset_bits > 0 ? 16 : 0;
989
990
1.88M
                if (!is_planar_dev) {
991
1.86M
                    (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, xoffs, dithered_stride,
992
1.86M
                                                 gx_no_bitmap_id, x_pos, y_pos,
993
1.86M
                                                 curr_width, vdi, dev_white,
994
1.86M
                                                 dev_black);
995
1.86M
                } else {
996
20.6k
                    (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, xoffs, dithered_stride,
997
20.6k
                                                 gx_no_bitmap_id, x_pos, y_pos,
998
20.6k
                                                 curr_width, vdi, vdi);
999
20.6k
                }
1000
1.88M
            }
1001
1002
1.89M
            break;
1003
2
        case image_landscape:
1004
            /* Go ahead and paint the chunk if we have LAND_BITS values or a
1005
             * partial to get us in sync with the 1 bit devices 16 bit
1006
             * positions. */
1007
2
            vdi = penum->wci;
1008
                /* Now do the haftoning into our buffer.  We basically check
1009
                   first if we have enough data or are all done */
1010
3
            while ( (penum->ht_landscape.count >= LAND_BITS ||
1011
3
                   ((penum->ht_landscape.count >= offset_bits) &&
1012
3
                    penum->ht_landscape.offset_set))) {
1013
                /* Go ahead and 2D tile in the threshold buffer at this time */
1014
                /* Always work the tiling from the upper left corner of our
1015
                   LAND_BITS columns */
1016
2
                for (j = 0; j < spp_out; j++) {
1017
1
                    halftone = penum->ht_buffer +
1018
1
                                   j * penum->ht_plane_height * (LAND_BITS>>3);
1019
1
                    thresh_width = pdht->components[j].corder.width;
1020
1
                    thresh_height =
1021
1
                          pdht->components[j].corder.full_height;
1022
                    /* Get the proper threshold for the colorant count */
1023
1
                    threshold = pdht->components[j].corder.threshold;
1024
1
                    if (threshold == NULL)
1025
0
                        return_error(gs_error_unregistered);
1026
                    /* Point to the proper contone data */
1027
1
                    contone_align = penum->line + offset_contone[j] +
1028
1
                                      LAND_BITS * j * contone_stride;
1029
1
                    if (penum->ht_landscape.offset_set) {
1030
1
                        width = offset_bits;
1031
1
                    } else {
1032
0
                        width = LAND_BITS;
1033
0
                    }
1034
1
                    if (penum->y_extent.x < 0) {
1035
0
                        dx = penum->ht_landscape.xstart - width + 1;
1036
1
                    } else {
1037
1
                        dx = penum->ht_landscape.xstart;
1038
1
                    }
1039
1
                    dx = (dx + penum->pgs->screen_phase[0].x) % thresh_width;
1040
1
                    if (dx < 0)
1041
0
                        dx += thresh_width;
1042
1
                    dy = (penum->ht_landscape.y_pos -
1043
1
                              penum->pgs->screen_phase[0].y) % thresh_height;
1044
1
                    if (dy < 0)
1045
0
                        dy += thresh_height;
1046
                    /* Left remainder part */
1047
1
                    left_rem_end = min(dx + LAND_BITS, thresh_width);
1048
1
                    left_width = left_rem_end - dx;
1049
                    /* Now the middle part */
1050
1
                    num_full_tiles = (LAND_BITS - left_width) / thresh_width;
1051
                    /* Now the right part */
1052
1
                    right_tile_width =
1053
1
                        LAND_BITS - num_full_tiles * thresh_width - left_width;
1054
                    /* Now loop over the y stuff */
1055
1
                    ptr_out = thresh_align;
1056
                    /* Do this in three parts.  We do a top part, followed by
1057
                       larger mem copies followed by a bottom partial. After
1058
                       a slower initial fill we are able to do larger faster
1059
                       expansions */
1060
1
                    if (dest_height <= 2 * thresh_height) {
1061
1
                        init_tile = dest_height;
1062
1
                        replicate_tile = false;
1063
1
                    } else {
1064
0
                        init_tile = thresh_height;
1065
0
                        replicate_tile = true;
1066
0
                    }
1067
4
                    for (jj = 0; jj < init_tile; jj++) {
1068
3
                        in_row_offset = (jj + dy) % thresh_height;
1069
3
                        row_ptr = threshold + in_row_offset * thresh_width;
1070
3
                        ptr_out_temp = ptr_out;
1071
                        /* Left part */
1072
3
                        memcpy(ptr_out_temp, row_ptr + dx, left_width);
1073
3
                        ptr_out_temp += left_width;
1074
                        /* Now the full tiles */
1075
30
                        for (ii = 0; ii < num_full_tiles; ii++) {
1076
27
                            memcpy(ptr_out_temp, row_ptr, thresh_width);
1077
27
                            ptr_out_temp += thresh_width;
1078
27
                        }
1079
                        /* Now the remainder */
1080
3
                        memcpy(ptr_out_temp, row_ptr, right_tile_width);
1081
3
                        ptr_out += LAND_BITS;
1082
3
                    }
1083
1
                    if (replicate_tile) {
1084
                        /* Find out how many we need to copy */
1085
0
                        num_tiles =
1086
0
                            (int)fastfloor((float) (dest_height - thresh_height)/ (float) thresh_height);
1087
0
                        tile_remainder = dest_height - (num_tiles + 1) * thresh_height;
1088
0
                        for (jj = 0; jj < num_tiles; jj ++) {
1089
0
                            memcpy(ptr_out, thresh_align, LAND_BITS * thresh_height);
1090
0
                            ptr_out += LAND_BITS * thresh_height;
1091
0
                        }
1092
                        /* Now fill in the remainder */
1093
0
                        memcpy(ptr_out, thresh_align, LAND_BITS * tile_remainder);
1094
0
                    }
1095
                    /* Apply the threshold operation */
1096
1
                    if (dev->color_info.polarity == GX_CINFO_POLARITY_SUBTRACTIVE
1097
1
                        && is_planar_dev) {
1098
0
                        gx_ht_threshold_landscape_sub(contone_align, thresh_align,
1099
0
                                            &(penum->ht_landscape), halftone, dest_height);
1100
1
                    } else {
1101
1
                        gx_ht_threshold_landscape(contone_align, thresh_align,
1102
1
                                            &(penum->ht_landscape), halftone, dest_height);
1103
1
                    }
1104
                    /* We may have a line left over that has to be maintained
1105
                       due to line replication in the resolution conversion. */
1106
1
                    if (width != penum->ht_landscape.count) {
1107
                        /* move the line do not reset the stuff */
1108
0
                        move_landscape_buffer(&(penum->ht_landscape),
1109
0
                                              contone_align, dest_height);
1110
0
                    }
1111
1
                }
1112
                /* Perform the copy mono */
1113
1
                if (penum->ht_landscape.index < 0) {
1114
0
                    if (!is_planar_dev) {
1115
0
                        (*dev_proc(dev, copy_mono))
1116
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1117
0
                                        gx_no_bitmap_id,
1118
0
                                        penum->ht_landscape.xstart - width + 1,
1119
0
                                        penum->ht_landscape.y_pos,
1120
0
                                        width, dest_height,
1121
0
                                        dev_white, dev_black);
1122
0
                    } else {
1123
0
                        (*dev_proc(dev, copy_planes))
1124
0
                                       (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1125
0
                                        gx_no_bitmap_id,
1126
0
                                        penum->ht_landscape.xstart - width + 1,
1127
0
                                        penum->ht_landscape.y_pos,
1128
0
                                        width, dest_height,
1129
0
                                        penum->ht_plane_height);
1130
0
                    }
1131
1
                } else {
1132
1
                    if (!is_planar_dev) {
1133
1
                        (*dev_proc(dev, copy_mono)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1134
1
                                                     gx_no_bitmap_id,
1135
1
                                                     penum->ht_landscape.xstart,
1136
1
                                                     penum->ht_landscape.y_pos,
1137
1
                                                     width, dest_height,
1138
1
                                                     dev_white, dev_black);
1139
1
                    } else {
1140
0
                        (*dev_proc(dev, copy_planes)) (dev, penum->ht_buffer, 0, LAND_BITS>>3,
1141
0
                                                     gx_no_bitmap_id,
1142
0
                                                     penum->ht_landscape.xstart,
1143
0
                                                     penum->ht_landscape.y_pos,
1144
0
                                                     width, dest_height,
1145
0
                                                     penum->ht_plane_height);
1146
0
                    }
1147
1
                }
1148
1
                penum->ht_landscape.offset_set = false;
1149
1
                if (width != penum->ht_landscape.count) {
1150
0
                    reset_landscape_buffer(&(penum->ht_landscape),
1151
0
                                           contone_align, dest_height,
1152
0
                                           width);
1153
1
                } else {
1154
                    /* Reset the whole buffer */
1155
1
                    penum->ht_landscape.count = 0;
1156
1
                    if (penum->ht_landscape.index < 0) {
1157
                        /* Going right to left */
1158
0
                        penum->ht_landscape.curr_pos = LAND_BITS-1;
1159
1
                    } else {
1160
                        /* Going left to right */
1161
1
                        penum->ht_landscape.curr_pos = 0;
1162
1
                    }
1163
1
                    penum->ht_landscape.num_contones = 0;
1164
1
                    memset(&(penum->ht_landscape.widths[0]), 0, sizeof(int)*LAND_BITS);
1165
1
                }
1166
1
            }
1167
2
            break;
1168
2
        default:
1169
0
            return gs_rethrow(-1, "Invalid orientation for thresholding");
1170
1.89M
    }
1171
1.89M
    return 0;
1172
1.89M
}
1173
1174
int gxht_dda_length(gx_dda_fixed *dda, int src_size)
1175
2.04M
{
1176
2.04M
    gx_dda_fixed d = (*dda);
1177
2.04M
    dda_advance(d, src_size);
1178
2.04M
    return abs(fixed2int_var_rounded(dda_current(d)) - fixed2int_var_rounded(dda_current(*dda)));
1179
2.04M
}