Coverage Report

Created: 2025-04-22 06:20

/src/libspectre/ghostscript/devices/rinkj/evenbetter-rll.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2020 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
17
/* Testbed implementation of Even Better Screening. */
18
19
/*
20
 * Code in this module is covered by US Patents 5,055,942 and
21
 * 5,917,614, and corresponding international patents.
22
 */
23
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include <math.h>
28
#include "evenbetter-rll.h"
29
30
/* Set this define if compiling with AltiVec optimizations. */
31
#define noUSE_AVEC
32
33
/* Set this define if compiling with SSE optimizations. */
34
#define noUSE_SSE2
35
36
0
#define EVENBETTER_VERSION 133
37
38
0
#define EVEN_SHIFT 16
39
0
#define IMO_SHIFT 14
40
#define EVEN_RB_CAP (1 << (EVEN_SHIFT - 2))
41
42
#define FANCY_COUPLING
43
44
#if defined(USE_AVEC) || defined(USE_SSE2)
45
#define USE_VECTOR
46
#endif
47
48
#ifdef USE_AVEC
49
#include "eb_avec.h"
50
51
#endif
52
53
#ifdef USE_SSE2
54
typedef struct _eb_ctx_sse2 eb_ctx_sse2;
55
typedef struct _eb_srcbuf eb_srcbuf;
56
57
int eb_test_sse2(void);
58
int eb_sse2_core(eb_ctx_sse2 *ctx, unsigned char **out, eb_srcbuf *in,
59
                 int offset);
60
int eb_sse2_rev_rs(eb_ctx_sse2 *ctx, int offset);
61
int eb_sse2_set_daz(void);
62
void eb_sse2_restore_daz(int save_mxcsr);
63
64
struct _eb_ctx_sse2 {
65
  int xs;
66
  int *iir_line;
67
  int *r_line;
68
  int *a_line;
69
  int *b_line;
70
  char *skip_line;
71
  int dummy[2];
72
  float *luts[4];
73
  float e[4];
74
  float e_i_1[4];
75
  int r[4];
76
  int a[4];
77
  int b[4];
78
  int ones[4];
79
  int twos[4];
80
  int aspect2[4];
81
  float ehi[4];
82
  float elo[4];
83
  float ohi[4];
84
  float r_mul[4];
85
  float kernel[4];
86
  unsigned int seed1[4];
87
  unsigned int seed2[4];
88
};
89
90
struct _eb_srcbuf {
91
  float im[64];
92
  float rb[64];
93
  float rs[64];
94
  int dummy[3];
95
};
96
97
#endif
98
99
typedef struct _EBPlaneCtx EBPlaneCtx;
100
typedef unsigned int uint32;
101
102
struct _EvenBetterCtx {
103
  int source_width;
104
  int dest_width;
105
  int n_planes;
106
  int levels; /* Number of levels on output, <= 256 */
107
  EBPlaneCtx **plane_ctx;
108
  int aspect;
109
  int *strengths;
110
  int even_elo;
111
  int even_ehi;
112
  int *c_line;
113
114
  int even_c1;
115
  int do_shadows;
116
117
  uint32  seed1;
118
  uint32  seed2;
119
120
  FILE *dump_file;
121
  EbDumpLevel dump_level;
122
123
#ifdef USE_SSE2
124
  eb_ctx_sse2 **sse2_ctx;
125
  int using_vectors;
126
#endif
127
#ifdef USE_AVEC
128
  eb_ctx_avec **avec_ctx;
129
  int using_vectors;
130
#endif
131
};
132
133
struct _EBPlaneCtx {
134
  int source_width;
135
  int dest_width;
136
  int *rb_line;
137
  int *iir_line;
138
  int *r_line;
139
  int *a_line;
140
  int *b_line;
141
  int *r_line_sh;
142
  int *a_line_sh;
143
  int *b_line_sh;
144
  int *lut;
145
  int *rb_lut;
146
  char *rs_lut;
147
  int *white_count_line;
148
};
149
150
void *
151
eb_malloc_aligned(int size, int align)
152
0
{
153
0
  void *result;
154
0
  void *alloced = malloc(size + align);
155
0
  int pad;
156
157
0
  if (alloced == 0)
158
0
    return 0;
159
0
  pad = (((int)(size_t)alloced + 12) & 15) + 4;
160
0
  result = (void *)(pad + (char *)alloced);
161
0
  ((int *)result)[-1] = pad;
162
0
  return result;
163
0
}
164
165
void
166
eb_free_aligned(void *p)
167
0
{
168
0
  int pad = ((int *)p)[-1];
169
0
  free((char*)p - pad);
170
0
}
171
172
static double
173
eb_compute_rbscale(const EvenBetterParams *params)
174
0
{
175
0
  double rbscale = params->rbscale;
176
177
0
  if (rbscale == 0.0)
178
0
    {
179
0
      rbscale = params->aspect == 1 ? 0.95 :
180
0
        params->aspect == 2 ? 1.8 :
181
0
        params->aspect == 4 ? 3.6 : 1;
182
0
    }
183
0
  return rbscale;
184
0
}
185
186
static int
187
eb_compute_randshift(int nl, int rs_base, int do_shadows, int levels)
188
0
{
189
0
  int rs = rs_base;
190
0
  if ((nl > (90 << (EVEN_SHIFT - 10)) &&
191
0
       nl < (129 << (EVEN_SHIFT - 10))) ||
192
0
      (nl > (162 << (EVEN_SHIFT - 10)) &&
193
0
       nl < (180 << (EVEN_SHIFT - 10))))
194
0
    rs--;
195
0
  else if (nl > (321 << (EVEN_SHIFT - 10)) &&
196
0
           nl < (361 << (EVEN_SHIFT - 10)))
197
0
    {
198
0
      rs--;
199
0
      if (nl > (331 << (EVEN_SHIFT - 10)) &&
200
0
          nl < (351 << (EVEN_SHIFT - 10)))
201
0
        rs--;
202
0
    }
203
0
  else if ((do_shadows ||
204
0
            nl == (levels - 1) << EVEN_SHIFT) &&
205
0
           nl > ((levels - 1) << EVEN_SHIFT) -
206
0
           (1 << (EVEN_SHIFT - 2)))
207
0
    {
208
      /* don't add randomness in extreme shadows */
209
0
    }
210
0
  else if ((nl > (3 << (EVEN_SHIFT - 2))))
211
0
    {
212
0
      nl -= (nl + (1 << (EVEN_SHIFT - 2))) & -(1 << (EVEN_SHIFT - 1));
213
0
      if (nl < 0) nl = -nl;
214
0
      if (nl < (1 << (EVEN_SHIFT - 4))) rs--;
215
0
      if (nl < (1 << (EVEN_SHIFT - 5))) rs--;
216
0
      if (nl < (1 << (EVEN_SHIFT - 6))) rs--;
217
0
    }
218
0
  else
219
0
    {
220
0
      if (nl < (3 << (EVEN_SHIFT - 3))) nl += 1 << (EVEN_SHIFT - 2);
221
0
      nl = nl - (1 << (EVEN_SHIFT - 1));
222
0
      if (nl < 0) nl = -nl;
223
0
      if (nl < (1 << (EVEN_SHIFT - 4))) rs--;
224
0
      if (nl < (1 << (EVEN_SHIFT - 5))) rs--;
225
0
      if (nl < (1 << (EVEN_SHIFT - 6))) rs--;
226
0
    }
227
0
  return rs;
228
0
}
229
230
#ifdef USE_SSE2
231
static eb_ctx_sse2 *
232
eb_ctx_sse2_new(const EvenBetterParams *params, int start_plane, int end_plane)
233
{
234
  int xs = params->source_width;
235
  int aspect2 = params->aspect * params->aspect;
236
  eb_ctx_sse2 *ctx;
237
  int i;
238
  double im_scale;
239
  float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale)));
240
  double rbscale = eb_compute_rbscale(params);
241
  int rs_base;
242
243
  ctx = (eb_ctx_sse2 *)eb_malloc_aligned(sizeof(eb_ctx_sse2), 16);
244
  ctx->xs = xs;
245
  for (i = 0; i < 4; i++)
246
    {
247
      ctx->e[i] = 0.0;
248
      ctx->e_i_1[i] = 0.0;
249
      ctx->r[i] = 0;
250
      ctx->a[i] = 1;
251
      ctx->b[i] = aspect2;
252
      ctx->ones[i] = 1;
253
      ctx->twos[i] = 2;
254
      ctx->aspect2[i] = aspect2;
255
      ctx->ohi[i] = params->levels - 1;
256
      ctx->ehi[i] = 1.1;
257
      ctx->elo[i] = -0.1;
258
      ctx->r_mul[i] = r_mul;
259
      ctx->seed1[i] = (i << 8) + 0x7000;
260
      ctx->seed2[i] = (i << 16) + 0x9000;
261
    }
262
  ctx->kernel[0] = 1.0 / 16;
263
  ctx->kernel[1] = 3.0 / 16;
264
  ctx->kernel[2] = 5.0 / 16;
265
  ctx->kernel[3] = 7.0 / 16;
266
267
  im_scale = (params->levels - 1) * 1.0 / (1 << 24);
268
  rs_base = 35 - EVEN_SHIFT - params->rand_scale;
269
270
  for (i = start_plane; i < end_plane; i++)
271
    {
272
      float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3);
273
      int j;
274
      ctx->luts[i - start_plane] = lut;
275
276
      for (j = 0; j < ET_SRC_MAX + 1; j++)
277
        {
278
          double g = ((1 << 24) - params->luts[i][j]) * im_scale;
279
          int nl, rs;
280
281
          lut[j * 3] = g;
282
          if (g == 0.0)
283
            lut[j * 3 + 1] = 0.5;
284
          else
285
            lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g;
286
287
          nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT);
288
          rs = eb_compute_randshift(nl, rs_base,
289
                                    params->do_shadows, params->levels);
290
291
          lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs);
292
        }
293
    }
294
  for (i = i - start_plane; i < 4; i++)
295
    ctx->luts[i] = NULL;
296
297
  ctx->iir_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
298
  ctx->a_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
299
  ctx->b_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
300
  ctx->r_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16);
301
  for (i = 0; i < (xs + 32) * 4; i++)
302
    {
303
      ((float *)ctx->iir_line)[i] = 0;
304
      ctx->a_line[i] = 1;
305
      ctx->b_line[i] = aspect2;
306
      ctx->r_line[i] = 0;
307
    }
308
309
  ctx->skip_line = (char *)malloc((xs + 15) & -16);
310
311
  return ctx;
312
}
313
314
static void
315
eb_ctx_sse2_free(eb_ctx_sse2 *ctx)
316
{
317
  int i;
318
319
  for (i = 0; i < 4; i++)
320
    free(ctx->luts[i]);
321
  eb_free_aligned(ctx->iir_line);
322
  eb_free_aligned(ctx->a_line);
323
  eb_free_aligned(ctx->b_line);
324
  eb_free_aligned(ctx->r_line);
325
  free(ctx->skip_line);
326
  eb_free_aligned(ctx);
327
}
328
#endif
329
330
#ifdef USE_AVEC
331
static eb_ctx_avec *
332
eb_ctx_avec_new(const EvenBetterParams *params, int start_plane, int end_plane)
333
{
334
  int xs = params->source_width;
335
  int aspect2 = params->aspect * params->aspect;
336
  eb_ctx_avec *ctx;
337
  int i;
338
  double im_scale;
339
  double k;
340
  float imscale1, imscale2, rbmul, rsbase;
341
  float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale)));
342
  double rbscale = eb_compute_rbscale(params);
343
  vector unsigned int zero = vec_splat_u32(0);
344
  const vector float kernel = { 1.0 / 16, 3.0 / 16, 5.0 / 16, 7.0 / 16 };
345
  vector float almostone = { 255.0/256, 255.0/256, 255.0/256, 255.0/256 };
346
  int rs_base;
347
348
  ctx = (eb_ctx_avec *)eb_malloc_aligned(sizeof(eb_ctx_avec), 16);
349
  ctx->xs = xs;
350
351
  ctx->e = (vector float) zero;
352
  ctx->e_i_1 = (vector float) zero;
353
  ctx->r = zero;
354
  ctx->a = zero;
355
  im_scale = (params->levels - 1) * (1.0 / (1 << 24));
356
  rs_base = 35 - EVEN_SHIFT - params->rand_scale;
357
358
  if (params->gamma == 1.0)
359
    k = 0;
360
  else if (params->gamma == 1.8)
361
    k = 0.835;
362
  else if (params->gamma == 2.0)
363
    k = 1.0;
364
  else
365
    /* this shouldn't happen! */
366
    k = 0;
367
368
  for (;;)
369
    {
370
      vector float foff, f0, f1;
371
372
      imscale1 = (1 - k) * (params->levels - 1) * (256.0 / 255.0);
373
      imscale2 = k * (params->levels - 1) * sqrt(256.0 / 255.0);
374
      for (i = 0; i < 4; i++)
375
        {
376
          ((float *)&ctx->imscale1)[i] = imscale1;
377
          ((float *)&ctx->imscale2)[i] = imscale2;
378
        }
379
      f0 = vec_rsqrte(almostone);
380
      f0 = vec_madd(f0, almostone, (vector float)zero);
381
      f1 = vec_madd(f0, ctx->imscale2, (vector float)zero);
382
      foff = vec_madd(almostone, ctx->imscale1, f1);
383
      f1 = vec_nmsub(f0, ctx->imscale2, foff);
384
      f1 = vec_nmsub(almostone, ctx->imscale1, f1);
385
      if (vec_all_eq(f1, (vector float)zero))
386
        {
387
          ctx->foff = foff;
388
          break;
389
        }
390
      k += 1e-5;
391
    }
392
  rbmul = -r_mul * rbscale;
393
  rsbase = 1.0 / (1 << EVEN_SHIFT) / (1 << rs_base);
394
  for (i = 0; i < 4; i++)
395
    {
396
      ((int *)&ctx->b)[i] = aspect2;
397
      ((int *)&ctx->aspect2)[i] = aspect2;
398
      ((int *)&ctx->seed1)[i] = (i << 8) + 0x7000;
399
      ((int *)&ctx->seed2)[i] = (i << 16) + 0x9000;
400
      ((float *)&ctx->ohi)[i] = params->levels - 1;
401
      ((float *)&ctx->ehi)[i] = 1.1;
402
      ((float *)&ctx->elo)[i] = -0.1;
403
      ((float *)&ctx->r_mul)[i] = r_mul;
404
      ((float *)&ctx->rsbase)[i] = rsbase;
405
      ((float *)&ctx->rbmul)[i] = rbmul;
406
    }
407
  ctx->kernel = kernel;
408
409
  rs_base = 35 - EVEN_SHIFT - params->rand_scale;
410
411
  for (i = start_plane; i < end_plane; i++)
412
    {
413
      float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3);
414
      int j;
415
      ctx->luts[i - start_plane] = lut;
416
417
      for (j = 0; j < ET_SRC_MAX + 1; j++)
418
        {
419
          double g = ((1 << 24) - params->luts[i][j]) * im_scale;
420
          int nl, rs;
421
422
          lut[j * 3] = g;
423
          if (g == 0.0)
424
            lut[j * 3 + 1] = 0.5;
425
          else
426
            lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g;
427
          nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT);
428
          rs = eb_compute_randshift(nl, rs_base,
429
                                    params->do_shadows, params->levels);
430
431
          lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs);
432
        }
433
    }
434
  for (i = i - start_plane; i < 4; i++)
435
    ctx->luts[i] = NULL;
436
437
  ctx->iir_line = (vector float *)eb_malloc_aligned(16 * (xs + 32), 16);
438
  ctx->a_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
439
  ctx->b_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
440
  ctx->r_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16);
441
  for (i = 0; i < (xs + 32) * 4; i++)
442
    {
443
      ((float *)ctx->iir_line)[i] = 0;
444
      ((int *)ctx->a_line)[i] = 1;
445
      ((int *)ctx->b_line)[i] = aspect2;
446
      ((int *)ctx->r_line)[i] = 0;
447
    }
448
449
  ctx->skip_line = (char *)malloc((xs + 15) & -16);
450
451
  return ctx;
452
}
453
454
static void
455
eb_ctx_avec_free(eb_ctx_avec *ctx)
456
{
457
  int i;
458
459
  for (i = 0; i < 4; i++)
460
    free(ctx->luts[i]);
461
  eb_free_aligned(ctx->iir_line);
462
  eb_free_aligned(ctx->a_line);
463
  eb_free_aligned(ctx->b_line);
464
  eb_free_aligned(ctx->r_line);
465
  free(ctx->skip_line);
466
  eb_free_aligned(ctx);
467
}
468
469
#endif
470
471
#ifdef USE_VECTOR
472
static int
473
even_better_line_vector(EvenBetterCtx *ebc, uchar **dest,
474
                      const ET_Rll *const *src)
475
{
476
  int n_planes = ebc->n_planes;
477
  int xd = ebc->dest_width;
478
  int strip;
479
  eb_srcbuf sb_alloc;
480
  eb_srcbuf *srcbuf;
481
  uchar dummy_a[32];
482
  uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16);
483
#ifdef USE_SSE2
484
  int save_mxcsr = eb_sse2_set_daz();
485
#endif
486
487
  srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16);
488
489
  for (strip = 0; strip < n_planes; strip += 4)
490
    {
491
#ifdef USE_AVEC
492
      eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2];
493
#endif
494
#ifdef USE_SSE2
495
      eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2];
496
#endif
497
      uchar *destbufs[4];
498
      const ET_Rll *const *sbuf = src + strip;
499
      int count[4];
500
      int src_idx[4];
501
      int plane_idx, last_plane;
502
      float im[4], rb[4], rs[4];
503
      int i;
504
505
      last_plane = n_planes - strip < 4 ? n_planes - strip : 4;
506
      for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
507
        {
508
          count[plane_idx] = 0;
509
          src_idx[plane_idx] = 0;
510
          destbufs[plane_idx] = dest[plane_idx + strip];
511
        }
512
      for (; plane_idx < 4; plane_idx++)
513
        {
514
          int j;
515
516
          for (j = 0; j < 16; j++)
517
            {
518
              ((float *)&srcbuf->im)[j * 4 + plane_idx] = 0.0;
519
              ((float *)&srcbuf->rb)[j * 4 + plane_idx] = 0.0;
520
              ((float *)&srcbuf->rs)[j * 4 + plane_idx] = 0.0;
521
            }
522
        }
523
      for (i = 0; i < xd; i += 16)
524
        {
525
          int jmax = (xd - i) > 16 ? 16 : xd - i;
526
          int skip = 1;
527
          int j;
528
529
          for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
530
            {
531
              if (count[plane_idx] < 16 || im[plane_idx] != 0.0)
532
                {
533
                  skip = 0;
534
                  break;
535
                }
536
            }
537
          ctx->skip_line[i >> 4] = skip;
538
539
          if (skip)
540
            {
541
              /* all white */
542
543
              for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
544
                {
545
                  uchar *dst_ptr = destbufs[plane_idx];
546
                  if (jmax == 16)
547
                    {
548
                      ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
549
                      ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
550
                      ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
551
                      ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
552
                    }
553
                  else
554
                    {
555
                      for (j = 0; j < jmax; j++)
556
                        dst_ptr[i + j] = 0;
557
                    }
558
                  count[plane_idx] -= jmax;
559
                }
560
            }
561
          else
562
            {
563
              for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
564
                {
565
                  const float *lut = ctx->luts[plane_idx];
566
                  float imp = im[plane_idx];
567
                  float rbp = rb[plane_idx];
568
                  float rsp = rs[plane_idx];
569
                  for (j = 0; j < jmax; j++)
570
                    {
571
                      if (count[plane_idx] == 0)
572
                        {
573
                          const ET_Rll *src_p = sbuf[plane_idx] +
574
                            src_idx[plane_idx]++;
575
                          ET_SrcPixel src_pixel = src_p->value;
576
                          count[plane_idx] = src_p->length;
577
                          imp = lut[src_pixel * 3];
578
                          rbp = lut[src_pixel * 3 + 1];
579
                          rsp = lut[src_pixel * 3 + 2];
580
                        }
581
                      ((float *)&srcbuf->im)[j * 4 + plane_idx] = imp;
582
                      ((float *)&srcbuf->rb)[j * 4 + plane_idx] = rbp;
583
                      ((float *)&srcbuf->rs)[j * 4 + plane_idx] = rsp;
584
                      count[plane_idx]--;
585
                    }
586
                  im[plane_idx] = imp;
587
                  rb[plane_idx] = rbp;
588
                  rs[plane_idx] = rsp;
589
                }
590
              for (; plane_idx < 4; plane_idx++)
591
                {
592
                  destbufs[plane_idx] = dummy_dst - i;
593
                }
594
#ifdef USE_AVEC
595
              eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i);
596
#endif
597
#ifdef USE_SSE2
598
              eb_sse2_core(ctx, destbufs, srcbuf, i);
599
#endif
600
            }
601
        }
602
603
      for (i = xd & -16; i >= 0; i -= 16)
604
        {
605
          if (!ctx->skip_line[i >> 4])
606
            {
607
#ifdef USE_AVEC
608
              eb_avec_rev_rs(ctx, i + 15);
609
#endif
610
#ifdef USE_SSE2
611
              eb_sse2_rev_rs(ctx, i + 15);
612
#endif
613
            }
614
        }
615
    }
616
#ifdef USE_SSE2
617
  eb_sse2_restore_daz(save_mxcsr);
618
#endif
619
  return 0;
620
}
621
#endif
622
623
#ifdef USE_AVEC
624
static int
625
even_better_line_fastprep(EvenBetterCtx *ebc, uchar **dest,
626
                          const ET_SrcPixel *const *src)
627
{
628
  int n_planes = ebc->n_planes;
629
  int xd = ebc->dest_width;
630
  int strip;
631
  eb_srcbuf sb_alloc;
632
  eb_srcbuf *srcbuf;
633
  uchar dummy_a[32];
634
  uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16);
635
636
  srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16);
637
638
  for (strip = 0; strip < n_planes; strip += 4)
639
    {
640
#ifdef USE_AVEC
641
      eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2];
642
#endif
643
#ifdef USE_SSE2
644
      eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2];
645
#endif
646
      uchar *destbufs[4];
647
      const ET_SrcPixel *const *sbuf = src + strip;
648
      int plane_idx, last_plane;
649
      int i;
650
651
      last_plane = n_planes - strip < 4 ? n_planes - strip : 4;
652
      for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
653
        {
654
          destbufs[plane_idx] = dest[plane_idx + strip];
655
        }
656
      for (i = 0; i < xd; i += 16)
657
        {
658
          int noskip;
659
          noskip = eb_avec_prep_srcbuf(ctx, last_plane, srcbuf, sbuf, i);
660
          ctx->skip_line[i >> 4] = noskip;
661
          if (noskip)
662
            {
663
              for (plane_idx = last_plane; plane_idx < 4; plane_idx++)
664
                destbufs[plane_idx] = dummy_dst - i;
665
              eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i);
666
            }
667
          else
668
            {
669
              /* all white */
670
671
              for (plane_idx = 0; plane_idx < last_plane; plane_idx++)
672
                {
673
                  uchar *dst_ptr = destbufs[plane_idx];
674
                  ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
675
                  ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
676
                  ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
677
                  ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
678
                }
679
            }
680
        }
681
682
      for (i = xd & -16; i >= 0; i -= 16)
683
        {
684
          if (ctx->skip_line[i >> 4])
685
            {
686
#ifdef USE_AVEC
687
              eb_avec_rev_rs(ctx, i + 15);
688
#endif
689
#ifdef USE_SSE2
690
              eb_sse2_rev_rs(ctx, i + 15);
691
#endif
692
            }
693
        }
694
    }
695
  return 0;
696
}
697
#endif
698
699
/* Maximum number of planes, but actually we want to dynamically
700
   allocate all scratch buffers that depend on this. */
701
0
#define M 16
702
703
static void
704
even_better_line_hi (EvenBetterCtx *ebc, uchar **dest,
705
                     const ET_Rll *const *src)
706
0
{
707
0
  int a[M], b[M];
708
0
  int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M];
709
0
  int iml[M], rbl[M];
710
0
  int i, j;
711
0
  int im;
712
0
  int *pa, *pb, *piir, *pr;
713
0
  int r[M], rg;
714
0
  int xd;
715
0
  uint32 seed1 = ebc->seed1;
716
0
  uint32 seed2 = ebc->seed2;
717
0
  uint32 sum;
718
0
  int plane_idx;
719
0
  int r_scratch[M];
720
0
  int n_planes = ebc->n_planes;
721
0
  int levels = ebc->levels;
722
#ifdef OLD_QUANT
723
  int dith_mul = levels << 8;
724
#else
725
0
  int dith_mul = (levels - 1) << 8;
726
0
#endif
727
0
  int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1);
728
0
  int aspect2 = ebc->aspect * ebc->aspect;
729
0
  int *strengths = ebc->strengths;
730
0
  int even_elo = ebc->even_elo;
731
0
  int even_ehi = ebc->even_ehi;
732
0
  int coupling;
733
0
  int *c_line = ebc->c_line;
734
0
  int even_c1 = ebc->even_c1;
735
0
  int rand_shift;
736
0
  int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
737
0
  int count[M], src_idx[M];
738
0
  int rs[M];
739
740
0
  xd = ebc->dest_width;
741
742
0
  memset(rbl, 0x00, M * sizeof(int));
743
0
  memset(iml, 0x00, M * sizeof(int));
744
0
  memset(rs, 0x00, M * sizeof(int));
745
746
0
  for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
747
0
    {
748
0
      a[plane_idx] = 1;
749
0
      b[plane_idx] = aspect2;
750
0
      r[plane_idx] = 0;
751
0
      e_0_1[plane_idx] = 0;
752
0
      e_1_0[plane_idx] = 0;
753
0
      e_1_1[plane_idx] = 0;
754
0
      count[plane_idx] = 0;
755
0
      src_idx[plane_idx] = 0;
756
0
    }
757
758
0
  coupling = 0;
759
760
0
  for (i = 0; i < xd;)
761
0
    {
762
0
      int work_planes[M];
763
0
      int n_work = 0;
764
0
      int work_idx;
765
0
      int jmax;
766
767
0
      jmax = (xd - i) > 16 ? 16 : xd - i;
768
769
0
      for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
770
0
        {
771
0
          EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
772
0
          int *wcl = ctx->white_count_line;
773
0
          if (count[plane_idx] >= 16 && iml[plane_idx] == 0)
774
0
            wcl[i >> 4]++;
775
0
          else
776
0
            wcl[i >> 4] = 0;
777
0
          if (wcl[i >> 4] > 15)
778
0
            {
779
0
              uchar *dst_ptr = dest[plane_idx];
780
0
              if (jmax == 16)
781
0
                {
782
0
                  ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0;
783
0
                  ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0;
784
0
                  ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0;
785
0
                  ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0;
786
0
                }
787
0
              else
788
0
                {
789
0
                  for (j = 0; j < jmax; j++)
790
0
                    dst_ptr[i + j] = 0;
791
0
                }
792
0
              count[plane_idx] -= jmax;
793
0
            }
794
0
          else
795
0
            {
796
0
              work_planes[n_work++] = plane_idx;
797
0
            }
798
0
        }
799
800
0
      if (n_work == 0)
801
0
        {
802
          /* all planes were white */
803
0
          i += jmax;
804
0
          continue;
805
0
        }
806
807
0
      for (j = 0; j < jmax; j++)
808
0
        {
809
0
#ifdef FANCY_COUPLING
810
0
          coupling += c_line[i];
811
#else
812
          coupling = 0;
813
#endif
814
          /* Lookup image data and compute R for all planes. */
815
0
          for (work_idx = 0; work_idx < n_work; work_idx++)
816
0
            {
817
0
              int plane_idx = work_planes[work_idx];
818
0
              EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
819
0
              ET_SrcPixel src_pixel;
820
0
              int new_r;
821
822
0
              pr = ctx->r_line;
823
0
              pa = ctx->a_line;
824
0
              pb = ctx->b_line;
825
0
              if (count[plane_idx] == 0)
826
0
                {
827
0
                  const ET_Rll *src_p = src[plane_idx] + src_idx[plane_idx]++;
828
0
                  int *lut = ctx->lut;
829
0
                  int *rblut = ctx->rb_lut;
830
0
                  char *rslut = ctx->rs_lut;
831
832
0
                  count[plane_idx] = src_p->length;
833
0
                  src_pixel = src_p->value;
834
0
                  iml[plane_idx] = lut[src_pixel];
835
0
                  rbl[plane_idx] = rblut[src_pixel];
836
0
                  rs[plane_idx] = rslut[src_pixel];
837
0
                }
838
0
              count[plane_idx]--;
839
840
0
              if (r[plane_idx] + a[plane_idx] < pr[i])
841
0
                {
842
0
                  r[plane_idx] += a[plane_idx];
843
0
                  a[plane_idx] += 2;
844
0
                }
845
0
              else
846
0
                {
847
0
                  a[plane_idx] = pa[i];
848
0
                  b[plane_idx] = pb[i];
849
0
                  r[plane_idx] = pr[i];
850
0
                }
851
0
              if (iml[plane_idx] == 0)
852
0
                {
853
0
                  r_scratch[plane_idx] = 0;
854
0
                }
855
0
              else
856
0
                {
857
0
                  int r_tmp;
858
0
                  const int r_max = 0;
859
0
                  new_r = r[plane_idx];
860
0
                  if (new_r > even_rlimit)
861
0
                    new_r = even_rlimit;
862
                  /* Should we store back with the limit? */
863
864
0
                  rg = new_r << (EVEN_SHIFT - even_c1);
865
0
                  r_tmp = rg - rbl[plane_idx];
866
0
                  if (r_tmp > r_max) r_tmp >>= 3;
867
0
                  r_scratch[plane_idx] = r_tmp;
868
0
                }
869
0
            }
870
871
          /* Dither each plane. */
872
0
          for (work_idx = 0; work_idx < n_work; work_idx++)
873
0
            {
874
0
              int plane_idx = work_planes[work_idx];
875
0
              EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
876
0
              uchar *dst_ptr = dest[plane_idx];
877
0
              int new_e_1_0;
878
0
              int coupling_contribution;
879
880
0
              pr = ctx->r_line;
881
0
              pa = ctx->a_line;
882
0
              pb = ctx->b_line;
883
0
              piir = ctx->iir_line;
884
885
0
              im = iml[plane_idx];
886
0
              e_m1_1[plane_idx] = e_0_1[plane_idx];
887
0
              e_0_1[plane_idx] = e_1_1[plane_idx];
888
0
              e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1];
889
0
              new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 +
890
0
                            e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4);
891
0
              if (im == 0)
892
0
                {
893
0
                  dst_ptr[i] = 0;
894
0
                }
895
0
              else
896
0
                {
897
0
                  int err;
898
0
                  int imo;
899
900
0
                  err = new_e_1_0;
901
902
0
                  err += r_scratch[plane_idx];
903
904
                  /* Add the two seeds together */
905
0
                  sum = seed1 + seed2;
906
907
                  /* If the add generated a carry, increment
908
                   * the result of the addition.
909
                   */
910
0
                  if (sum < seed1 || sum < seed2) sum++;
911
912
                  /* Seed2 becomes old seed1, seed1 becomes result */
913
0
                  seed2 = seed1;
914
0
                  seed1 = sum;
915
916
0
                  rand_shift = rs[plane_idx];
917
0
                  err -= (sum >> rand_shift) - (0x80000000 >> rand_shift);
918
919
0
                  if (err < even_elo)
920
0
                    err = even_elo;
921
922
0
                  else if (err > even_ehi)
923
0
                    err = even_ehi;
924
925
0
#if 1
926
0
                  err += coupling;
927
0
#endif
928
929
#ifdef OLD_QUANT
930
                  imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8);
931
#else
932
0
                  imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8);
933
0
#endif
934
0
                  if (imo < 0) imo = 0;
935
0
                  else if (imo > levels - 1) imo = levels - 1;
936
0
                  dst_ptr[i] = imo;
937
0
                  coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT);
938
0
                  new_e_1_0 += coupling_contribution;
939
0
                  coupling += (coupling_contribution * strengths[plane_idx]) >> 8;
940
0
                }
941
0
              if (dst_ptr[i] != 0)
942
0
                {
943
0
                  a[plane_idx] = 1;
944
0
                  b[plane_idx] = aspect2;
945
0
                  r[plane_idx] = 0;
946
0
                }
947
0
              pa[i] = a[plane_idx];
948
0
              pb[i] = b[plane_idx];
949
0
              pr[i] = r[plane_idx];
950
0
              piir[i] = new_e_1_0;
951
0
              e_1_0[plane_idx] = new_e_1_0;
952
0
            }
953
0
#ifdef FANCY_COUPLING
954
0
          coupling = coupling >> 1;
955
0
          c_line[i] = coupling;
956
0
#endif
957
0
          i++;
958
0
        }
959
0
    }
960
961
  /* Note: this isn't white optimized, but the payoff is probably not
962
     that important. */
963
0
#ifdef FANCY_COUPLING
964
0
  coupling = 0;
965
0
  for (i = xd - 1; i >= 0; i--)
966
0
    {
967
0
      coupling = (coupling + c_line[i]) >> 1;
968
0
      c_line[i] = (coupling - (coupling >> 4));
969
0
    }
970
0
#endif
971
972
  /* Update distances. */
973
0
  for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
974
0
    {
975
0
      EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
976
0
      int *wcl = ctx->white_count_line;
977
0
      int av, bv, rv;
978
0
      int jmax;
979
980
0
      pr = ctx->r_line;
981
0
      pa = ctx->a_line;
982
0
      pb = ctx->b_line;
983
984
0
      av = 1;
985
0
      bv = 1;
986
0
      rv = 0;
987
0
      jmax = ((xd - 1) & 15) + 1;
988
0
      for (i = xd - 1; i >= 0;)
989
0
        {
990
0
          if (wcl[i >> 4] < 16)
991
0
            {
992
0
              for (j = 0; j < jmax; j++)
993
0
                {
994
0
                  if (rv + bv + av < pr[i] + pb[i])
995
0
                    {
996
0
                      rv += av;
997
0
                      av += 2;
998
0
                    }
999
0
                  else
1000
0
                    {
1001
0
                      rv = pr[i];
1002
0
                      av = pa[i];
1003
0
                      bv = pb[i];
1004
0
                    }
1005
0
                  if (rv > even_rlimit) rv = even_rlimit;
1006
0
                  pa[i] = av;
1007
0
                  pb[i] = bv + (aspect2 << 1);
1008
0
                  pr[i] = rv + bv;
1009
0
                  i--;
1010
0
                }
1011
0
            }
1012
0
          else
1013
0
            i -= jmax;
1014
0
          jmax = 16;
1015
0
        }
1016
0
    }
1017
1018
0
   ebc->seed1 = seed1;
1019
0
   ebc->seed2 = seed2;
1020
0
}
1021
1022
static void
1023
even_better_line_both (EvenBetterCtx *ebc, uchar **dest,
1024
                       const ET_Rll *const *src)
1025
0
{
1026
#if 0
1027
  int a[M], b[M];
1028
  int a_sh[M], b_sh[M];
1029
  int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M];
1030
  int imraw[M];
1031
  int iml[M];
1032
  int i;
1033
  int im;
1034
  int *lut;
1035
  const ET_SrcPixel *ps;
1036
  int *pa, *pb, *piir, *pr;
1037
  int *pa_sh, *pb_sh, *pr_sh;
1038
  int r[M], rb, rg;
1039
  int r_sh[M];
1040
  int *rblut;
1041
  int xd, xrem, xs;
1042
  uint32 seed1 = ebc->seed1;
1043
  uint32 seed2 = ebc->seed2;
1044
  uint32 sum;
1045
  int plane_idx;
1046
  int r_scratch[M];
1047
  int src_idx;
1048
  int n_planes = ebc->n_planes;
1049
  int levels = ebc->levels;
1050
#ifdef OLD_QUANT
1051
  int dith_mul = levels << 8;
1052
#else
1053
  int dith_mul = (levels - 1) << 8;
1054
#endif
1055
  int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1);
1056
  int aspect2 = ebc->aspect * ebc->aspect;
1057
  int *strengths = ebc->strengths;
1058
  int even_elo= ebc->even_elo;
1059
  int even_ehi= ebc->even_ehi;
1060
  int coupling;
1061
  int *c_line = ebc->c_line;
1062
  int even_c1 = ebc->even_c1;
1063
  int rand_shift = ebc->rand_shift;
1064
  int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
1065
1066
  xs = ebc->source_width;
1067
  xd = ebc->dest_width;
1068
  xrem = xd - xs;
1069
1070
  for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1071
    {
1072
      a[plane_idx] = 1;
1073
      b[plane_idx] = aspect2;
1074
      a_sh[plane_idx] = 1;
1075
      b_sh[plane_idx] = aspect2;
1076
      r[plane_idx] = 0;
1077
      r_sh[plane_idx] = 0;
1078
      e_0_1[plane_idx] = 0;
1079
      e_1_0[plane_idx] = 0;
1080
      e_1_1[plane_idx] = 0;
1081
    }
1082
1083
  coupling = 0;
1084
1085
  src_idx = 0;
1086
  for (i = 0; i < xd; i++)
1087
    {
1088
#ifdef FANCY_COUPLING
1089
      coupling += c_line[i];
1090
#else
1091
      coupling = 0;
1092
#endif
1093
1094
      xrem += xs;
1095
      if (xrem >= xd)
1096
        {
1097
          for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1098
            {
1099
              ps = src[plane_idx];
1100
              imraw[plane_idx] = ps[src_idx];
1101
            }
1102
          src_idx++;
1103
          xrem -= xd;
1104
        }
1105
1106
      /* Lookup image data and compute R for all planes. */
1107
      for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1108
        {
1109
          EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1110
          ET_SrcPixel src_pixel;
1111
          int new_r;
1112
1113
          pr = ctx->r_line;
1114
          pa = ctx->a_line;
1115
          pb = ctx->b_line;
1116
          pr_sh = ctx->r_line_sh;
1117
          pa_sh = ctx->a_line_sh;
1118
          pb_sh = ctx->b_line_sh;
1119
          lut = ctx->lut;
1120
          rblut = ctx->rb_lut;
1121
          src_pixel = imraw[plane_idx];
1122
1123
          im = lut[src_pixel];
1124
          iml[plane_idx] = im;
1125
          rb = rblut[src_pixel];
1126
          if (r[plane_idx] + a[plane_idx] < pr[i])
1127
            {
1128
              r[plane_idx] += a[plane_idx];
1129
              a[plane_idx] += 2;
1130
            }
1131
          else
1132
            {
1133
              a[plane_idx] = pa[i];
1134
              b[plane_idx] = pb[i];
1135
              r[plane_idx] = pr[i];
1136
            }
1137
          if (r_sh[plane_idx] + a_sh[plane_idx] < pr_sh[i])
1138
            {
1139
              r_sh[plane_idx] += a_sh[plane_idx];
1140
              a_sh[plane_idx] += 2;
1141
            }
1142
          else
1143
            {
1144
              a_sh[plane_idx] = pa_sh[i];
1145
              b_sh[plane_idx] = pb_sh[i];
1146
              r_sh[plane_idx] = pr_sh[i];
1147
            }
1148
          if (im == 0 || im == (1 << EVEN_SHIFT))
1149
            {
1150
              r_scratch[plane_idx] = 0;
1151
            }
1152
          else
1153
            {
1154
              new_r = r[plane_idx];
1155
              if (new_r > even_rlimit)
1156
                new_r = even_rlimit;
1157
              /* Should we store back with the limit? */
1158
              rg = new_r << (EVEN_SHIFT - even_c1);
1159
1160
              new_r = r_sh[plane_idx];
1161
              if (new_r > even_rlimit)
1162
                new_r = even_rlimit;
1163
              rg -= new_r << (EVEN_SHIFT - even_c1);
1164
              r_scratch[plane_idx] = rg - rb;
1165
            }
1166
        }
1167
1168
      /* Dither each plane. */
1169
      for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1170
        {
1171
          EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1172
          uchar *dst_ptr = dest[plane_idx];
1173
          int new_e_1_0;
1174
          int coupling_contribution;
1175
1176
          pr = ctx->r_line;
1177
          pa = ctx->a_line;
1178
          pb = ctx->b_line;
1179
          pr_sh = ctx->r_line_sh;
1180
          pa_sh = ctx->a_line_sh;
1181
          pb_sh = ctx->b_line_sh;
1182
          piir = ctx->iir_line;
1183
1184
          im = iml[plane_idx];
1185
          e_m1_1[plane_idx] = e_0_1[plane_idx];
1186
          e_0_1[plane_idx] = e_1_1[plane_idx];
1187
          e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1];
1188
          new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 +
1189
                        e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4);
1190
          if (im == 0)
1191
            {
1192
              dst_ptr[i] = 0;
1193
            }
1194
          else
1195
            {
1196
              int err;
1197
              int imo;
1198
1199
              err = new_e_1_0;
1200
1201
              err += r_scratch[plane_idx];
1202
1203
              /* Add the two seeds together */
1204
              sum = seed1 + seed2;
1205
1206
              /* If the add generated a carry, increment
1207
               * the result of the addition.
1208
               */
1209
              if (sum < seed1 || sum < seed2) sum++;
1210
1211
              /* Seed2 becomes old seed1, seed1 becomes result */
1212
              seed2 = seed1;
1213
              seed1 = sum;
1214
1215
              err -= (sum >> rand_shift) - (0x80000000 >> rand_shift);
1216
1217
              if (err < even_elo)
1218
                err = even_elo;
1219
1220
              else if (err > even_ehi)
1221
                err = even_ehi;
1222
1223
#if 1
1224
              err += coupling;
1225
#endif
1226
1227
#ifdef OLD_QUANT
1228
              imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8);
1229
#else
1230
              imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8);
1231
#endif
1232
              if (imo < 0) imo = 0;
1233
              else if (imo > levels - 1) imo = levels - 1;
1234
              dst_ptr[i] = imo;
1235
              coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT);
1236
              new_e_1_0 += coupling_contribution;
1237
              coupling += (coupling_contribution * strengths[plane_idx]) >> 8;
1238
            }
1239
          if (dst_ptr[i] != 0)
1240
            {
1241
              a[plane_idx] = 1;
1242
              b[plane_idx] = aspect2;
1243
              r[plane_idx] = 0;
1244
            }
1245
          if (dst_ptr[i] != levels - 1)
1246
            {
1247
              a_sh[plane_idx] = 1;
1248
              b_sh[plane_idx] = aspect2;
1249
              r_sh[plane_idx] = 0;
1250
            }
1251
          pa[i] = a[plane_idx];
1252
          pb[i] = b[plane_idx];
1253
          pr[i] = r[plane_idx];
1254
          pa_sh[i] = a_sh[plane_idx];
1255
          pb_sh[i] = b_sh[plane_idx];
1256
          pr_sh[i] = r_sh[plane_idx];
1257
          piir[i] = new_e_1_0;
1258
          e_1_0[plane_idx] = new_e_1_0;
1259
        }
1260
#ifdef FANCY_COUPLING
1261
      coupling = coupling >> 1;
1262
      c_line[i] = coupling;
1263
#endif
1264
    }
1265
1266
#ifdef FANCY_COUPLING
1267
  coupling = 0;
1268
  for (i = xd - 1; i >= 0; i--)
1269
    {
1270
      if (plane_idx == 0)
1271
        {
1272
          coupling = (coupling + c_line[i]) >> 1;
1273
          c_line[i] = (coupling - (coupling >> 4));
1274
        }
1275
    }
1276
#endif
1277
1278
  /* Update distances. */
1279
  for (plane_idx = 0; plane_idx < n_planes; plane_idx++)
1280
    {
1281
      EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx];
1282
      int av, bv, rv;
1283
      int av_sh, bv_sh, rv_sh;
1284
1285
      pr = ctx->r_line;
1286
      pa = ctx->a_line;
1287
      pb = ctx->b_line;
1288
      pr_sh = ctx->r_line_sh;
1289
      pa_sh = ctx->a_line_sh;
1290
      pb_sh = ctx->b_line_sh;
1291
1292
      av = 1;
1293
      bv = 1;
1294
      rv = 0;
1295
      av_sh = 1;
1296
      bv_sh = 1;
1297
      rv_sh = 0;
1298
      for (i = xd - 1; i >= 0; i--)
1299
        {
1300
          if (rv + bv + av < pr[i] + pb[i])
1301
            {
1302
              rv += av;
1303
              av += 2;
1304
            }
1305
          else
1306
            {
1307
              rv = pr[i];
1308
              av = pa[i];
1309
              bv = pb[i];
1310
            }
1311
          if (rv > even_rlimit) rv = even_rlimit;
1312
          pa[i] = av;
1313
          pb[i] = bv + (aspect2 << 1);
1314
          pr[i] = rv + bv;
1315
1316
          if (rv_sh + bv_sh + av_sh < pr_sh[i] + pb_sh[i])
1317
            {
1318
              rv_sh += av_sh;
1319
              av_sh += 2;
1320
            }
1321
          else
1322
            {
1323
              rv_sh = pr_sh[i];
1324
              av_sh = pa_sh[i];
1325
              bv_sh = pb_sh[i];
1326
            }
1327
          if (rv_sh > even_rlimit) rv_sh = even_rlimit;
1328
          pa_sh[i] = av_sh;
1329
          pb_sh[i] = bv_sh + (aspect2 << 1);
1330
          pr_sh[i] = rv_sh + bv_sh;
1331
        }
1332
    }
1333
1334
   ebc->seed1 = seed1;
1335
   ebc->seed2 = seed2;
1336
#endif
1337
0
}
1338
1339
/**
1340
 * even_better_line_rll: Screen a line using Even ToneFS screeing.
1341
 * @ctx: An #EBPlaneCtx context.
1342
 * @dest: Array of destination buffers, 8 bpp pixels each.
1343
 * @src: Array of source buffers, runlength encoded.
1344
 *
1345
 * Screens a single line using Even ToneFS screening.
1346
 **/
1347
void
1348
even_better_line_rll (EvenBetterCtx *ebc, uchar **dest,
1349
                      const ET_Rll *const *src)
1350
0
{
1351
1352
0
  if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT)
1353
0
    {
1354
0
      int i;
1355
1356
      /* Note: we should calculate the actual number of runlength
1357
         codes here. As it is, it will just waste storage a bit. */
1358
0
      for (i = 0; i < ebc->n_planes; i++)
1359
0
        fwrite (src[i], sizeof(ET_Rll), ebc->source_width,
1360
0
                ebc->dump_file);
1361
0
    }
1362
#ifdef USE_VECTOR
1363
  if (ebc->using_vectors)
1364
    even_better_line_vector(ebc, dest, src);
1365
  else
1366
#endif
1367
0
  if (ebc->do_shadows)
1368
0
    even_better_line_both (ebc, dest, src);
1369
0
  else
1370
0
    even_better_line_hi (ebc, dest, src);
1371
0
  if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT)
1372
0
    {
1373
0
      int i;
1374
1375
0
      for (i = 0; i < ebc->n_planes; i++)
1376
0
        fwrite (dest[i], 1, ebc->dest_width,
1377
0
                ebc->dump_file);
1378
0
    }
1379
0
}
1380
1381
/**
1382
 * even_better_compress_rll: Compress a single scan line to RLL format.
1383
 * @dst: Destination buffer.
1384
 * @src: Source buffer.
1385
 * @width: Number of source pixels.
1386
 *
1387
 * Return value: number of runlength codes.
1388
 **/
1389
static int
1390
even_better_compress_rll (ET_Rll *dst, const ET_SrcPixel *src,
1391
                          int src_width, int dst_width)
1392
0
{
1393
0
  int rll_idx;
1394
0
  int i;
1395
0
  int count;
1396
0
  ET_SrcPixel last_val;
1397
0
  int whole = dst_width / src_width;
1398
0
  int frac = dst_width % src_width;
1399
0
  int rem;
1400
1401
0
  rll_idx = 0;
1402
0
  last_val = src[0];
1403
0
  count = whole;
1404
0
  if (frac == 0)
1405
0
    {
1406
0
      for (i = 1; i < src_width; i++)
1407
0
        {
1408
0
          ET_SrcPixel val = src[i];
1409
1410
0
          if (count > 0xffff - whole || val != last_val)
1411
0
            {
1412
0
              dst[rll_idx].length = count;
1413
0
              dst[rll_idx].value = last_val;
1414
0
              rll_idx++;
1415
0
              last_val = val;
1416
0
              count = 0;
1417
0
            }
1418
0
          count += whole;
1419
0
        }
1420
0
    }
1421
0
  else
1422
0
    {
1423
0
      rem = frac;
1424
0
      for (i = 1; i < src_width; i++)
1425
0
        {
1426
0
          ET_SrcPixel val = src[i];
1427
1428
0
          if (count >= 0xffff - whole || val != last_val)
1429
0
            {
1430
0
              dst[rll_idx].length = count;
1431
0
              dst[rll_idx].value = last_val;
1432
0
              rll_idx++;
1433
0
              last_val = val;
1434
0
              count = 0;
1435
0
            }
1436
0
          count += whole;
1437
0
          rem += frac;
1438
0
          if (rem >= src_width)
1439
0
            {
1440
0
              count++;
1441
0
              rem -= src_width;
1442
0
            }
1443
0
        }
1444
0
    }
1445
0
  dst[rll_idx].length = count;
1446
0
  dst[rll_idx].value = last_val;
1447
0
  rll_idx++;
1448
0
  return rll_idx;
1449
0
}
1450
1451
/**
1452
 * even_better_line: Screen a line using Even TonenFS screeing.
1453
 * @ctx: An #EBPlaneCtx context.
1454
 * @dest: Array of destination buffers, 8 bpp pixels each.
1455
 * @src: Array of source buffer, ET_SrcPixel pixels each.
1456
 *
1457
 * Screens a single line using Even ToneFS screening.
1458
 **/
1459
void
1460
even_better_line (EvenBetterCtx *ebc, uchar **dest,
1461
                      const ET_SrcPixel *const *src)
1462
0
{
1463
0
  ET_Rll *rll_buf[M];
1464
0
  int i;
1465
0
  int source_width = ebc->source_width;
1466
0
  int dest_width = ebc->dest_width;
1467
1468
#ifdef USE_AVEC
1469
  if (ebc->using_vectors == 2)
1470
    {
1471
      even_better_line_fastprep (ebc, dest, src);
1472
    }
1473
  else
1474
#endif
1475
0
    {
1476
0
      for (i = 0; i < ebc->n_planes; i++)
1477
0
        {
1478
0
          rll_buf[i] = (ET_Rll *)malloc (source_width * sizeof(ET_Rll));
1479
0
          even_better_compress_rll (rll_buf[i], src[i], source_width, dest_width);
1480
0
        }
1481
0
      even_better_line_rll (ebc, dest, (const ET_Rll * const *)rll_buf);
1482
0
      for (i = 0; i < ebc->n_planes; i++)
1483
0
        free (rll_buf[i]);
1484
0
    }
1485
0
}
1486
1487
/**
1488
 * even_better_plane_free: Free an #EBPlaneCtx context.
1489
 * @ctx: The #EBPlaneCtx context to free.
1490
 *
1491
 * Frees @ctx.
1492
 **/
1493
static void
1494
even_better_plane_free (EBPlaneCtx *ctx)
1495
0
{
1496
0
  free (ctx->rb_line);
1497
0
  free (ctx->iir_line);
1498
0
  free (ctx->r_line);
1499
0
  free (ctx->a_line);
1500
0
  free (ctx->b_line);
1501
0
  free (ctx->lut);
1502
0
  free (ctx->rb_lut);
1503
0
  free (ctx->rs_lut);
1504
0
  free (ctx->white_count_line);
1505
0
  free (ctx);
1506
0
}
1507
1508
static int
1509
even_log2 (int x)
1510
0
{
1511
0
  int y = 0;
1512
0
  int z;
1513
1514
0
  for (z = x; z > 1; z = z >> 1)
1515
0
    y++;
1516
0
  return y;
1517
0
}
1518
1519
/**
1520
 * even_better_new: Create new Even ToneFS screening context.
1521
 * @source_width: Width of source buffer.
1522
 * @dest_width: Width of destination buffer, in pixels.
1523
 * @lut: Lookup table for gray values.
1524
 *
1525
 * Creates a new context for Even ToneFS screening.
1526
 *
1527
 * If @dest_width is larger than @source_width, then input lines will
1528
 * be expanded using nearest-neighbor sampling.
1529
 *
1530
 * @lut should be an array of 256 values, one for each possible input
1531
 * gray value. @lut is a lookup table for gray values. Each value
1532
 * ranges from 0 (black) to 2^24 (white).
1533
 *
1534
 * Return value: The new #EBPlaneCtx context.
1535
 **/
1536
static EBPlaneCtx *
1537
even_better_plane_new (const EvenBetterParams *params, EvenBetterCtx *ebc,
1538
                       int plane_idx)
1539
0
{
1540
0
  int source_width = params->source_width;
1541
0
  int dest_width = params->dest_width;
1542
0
  int *lut = params->luts[plane_idx];
1543
0
  EBPlaneCtx *result;
1544
0
  int i;
1545
0
  int *new_lut;
1546
0
  int *rb_lut;
1547
0
  char *rs_lut;
1548
0
  double rbscale = eb_compute_rbscale(params);
1549
0
  int even_c1 = ebc->even_c1;
1550
0
  int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1);
1551
0
  int do_shadows = params->do_shadows;
1552
0
  int log2_levels;
1553
0
  int rs_base;
1554
1555
0
  result = (EBPlaneCtx *)malloc (sizeof(EBPlaneCtx));
1556
1557
0
  result->source_width = source_width;
1558
0
  result->dest_width = dest_width;
1559
1560
0
  new_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1561
0
  for (i = 0; i < ET_SRC_MAX + 1; i++)
1562
0
    {
1563
0
      int nli;
1564
1565
0
      if (lut == NULL)
1566
0
        {
1567
0
#if ET_SRC_MAX == 255
1568
0
          nli = (i * 65793 + (i >> 7)) >> (24 - EVEN_SHIFT);
1569
#else
1570
          nli = (i * ((double) (1 << EVEN_SHIFT)) / ET_SRC_MAX) + 0.5;
1571
#endif
1572
0
        }
1573
0
      else
1574
0
        nli = lut[i] >> (24 - EVEN_SHIFT);
1575
0
      new_lut[i] = (1 << EVEN_SHIFT) - nli;
1576
0
    }
1577
1578
0
  rb_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1579
0
  rs_lut = (char *)malloc ((ET_SRC_MAX + 1) * sizeof(int));
1580
1581
0
  log2_levels = even_log2 (params->levels);
1582
0
  rs_base = 35 - EVEN_SHIFT + log2_levels - params->rand_scale;
1583
1584
0
  for (i = 0; i <= ET_SRC_MAX; i++)
1585
0
    {
1586
0
      double rb;
1587
0
      int nl = new_lut[i] * (params->levels - 1);
1588
0
      int rs;
1589
1590
0
      if (nl == 0)
1591
0
        rb = 0;
1592
0
      else
1593
0
        {
1594
0
          rb = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl;
1595
0
          if (rb > even_rlimit << (EVEN_SHIFT - even_c1))
1596
0
            rb = even_rlimit << (EVEN_SHIFT - even_c1);
1597
0
        }
1598
1599
0
      rs = eb_compute_randshift(nl, rs_base, do_shadows, params->levels);
1600
0
      rs_lut[i] = rs;
1601
1602
0
      if (params->do_shadows)
1603
0
        {
1604
0
          nl = ((1 << EVEN_SHIFT) - new_lut[i]) * (params->levels - 1);
1605
1606
0
          if (nl == 0)
1607
0
            rb = 0;
1608
0
          else
1609
0
            {
1610
0
              int rb_sh;
1611
0
              rb_sh = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl;
1612
0
              if (rb_sh > even_rlimit << (EVEN_SHIFT - even_c1))
1613
0
                rb_sh = even_rlimit << (EVEN_SHIFT - even_c1);
1614
0
              rb -= rb_sh;
1615
0
            }
1616
0
        }
1617
0
      rb_lut[i] = rb;
1618
1619
0
    }
1620
1621
0
  result->lut = new_lut;
1622
0
  result->rb_lut = rb_lut;
1623
0
  result->rs_lut = rs_lut;
1624
1625
0
  result->rb_line = (int *)calloc (dest_width, sizeof(int));
1626
0
  result->iir_line = (int *)calloc (dest_width, sizeof(int));
1627
0
  result->r_line = (int *)calloc (dest_width, sizeof(int));
1628
0
  result->a_line = (int *)calloc (dest_width, sizeof(int));
1629
0
  result->b_line = (int *)calloc (dest_width, sizeof(int));
1630
0
  result->white_count_line = (int *)calloc ((dest_width + 15) >> 4, sizeof(int));
1631
0
  if (do_shadows)
1632
0
    {
1633
0
      result->r_line_sh = (int *)calloc (dest_width, sizeof(int));
1634
0
      result->a_line_sh = (int *)calloc (dest_width, sizeof(int));
1635
0
      result->b_line_sh = (int *)calloc (dest_width, sizeof(int));
1636
0
    }
1637
0
  else
1638
0
    {
1639
0
      result->r_line_sh = NULL;
1640
0
      result->a_line_sh = NULL;
1641
0
      result->b_line_sh = NULL;
1642
0
    }
1643
0
  for (i = 0; i < dest_width; i++)
1644
0
    {
1645
0
      result->a_line[i] = 1;
1646
0
      result->b_line[i] = 1;
1647
0
      result->iir_line[i] = -((rand () & 0x7fff) << 6) >> (24 - EVEN_SHIFT);
1648
0
      if (do_shadows)
1649
0
        {
1650
0
          result->a_line_sh[i] = 1;
1651
0
          result->b_line_sh[i] = 1;
1652
0
        }
1653
0
    }
1654
1655
0
  return result;
1656
0
}
1657
1658
EvenBetterCtx *
1659
even_better_new (const EvenBetterParams *params)
1660
0
{
1661
0
  EvenBetterCtx *result = (EvenBetterCtx *)malloc (sizeof(EvenBetterCtx));
1662
0
  int n_planes = params->n_planes;
1663
0
  int i;
1664
0
  int log2_levels, log2_aspect;
1665
0
  int using_vectors = 0;
1666
1667
0
  if (params->dump_file)
1668
0
    {
1669
0
      int header[5];
1670
1671
0
      header[0] = 0x70644245;
1672
0
      header[1] = 'M' * 0x1010000 + 'I' * 0x101;
1673
0
      header[2] = EVENBETTER_VERSION;
1674
0
      header[3] = ET_SRC_MAX;
1675
0
      header[4] = sizeof(ET_SrcPixel);
1676
0
      fwrite (header, sizeof(int), sizeof(header) / sizeof(header[0]),
1677
0
              params->dump_file);
1678
0
      if (params->dump_level >= EB_DUMP_PARAMS)
1679
0
        {
1680
1681
0
          fwrite (params, 1, sizeof(EvenBetterParams), params->dump_file);
1682
0
        }
1683
0
      if (params->dump_level >= EB_DUMP_LUTS)
1684
0
        {
1685
0
          int i;
1686
0
          for (i = 0; i < params->n_planes; i++)
1687
0
            fwrite (params->luts[i], sizeof(int), ET_SRC_MAX + 1,
1688
0
                    params->dump_file);
1689
0
        }
1690
0
    }
1691
1692
0
  result->source_width = params->source_width;
1693
0
  result->dest_width = params->dest_width;
1694
0
  result->n_planes = n_planes;
1695
0
  result->levels = params->levels;
1696
1697
0
  result->aspect = params->aspect;
1698
1699
0
  result->even_ehi = 0.6 * (1 << EVEN_SHIFT) / (params->levels - 1);
1700
0
  result->even_elo = -result->even_ehi;
1701
1702
0
  result->strengths = (int *)malloc (sizeof(int) * n_planes);
1703
0
  memcpy (result->strengths, params->strengths,
1704
0
          sizeof(int) * n_planes);
1705
1706
0
  log2_levels = even_log2 (params->levels);
1707
0
  log2_aspect = even_log2 (params->aspect);
1708
0
  result->even_c1 = 6 + log2_aspect + log2_levels - params->even_c1_scale;
1709
0
  result->do_shadows = params->do_shadows;
1710
1711
0
  result->c_line = (int *)calloc (params->dest_width, sizeof(int));
1712
1713
0
  result->seed1 = 0x5324879f;
1714
0
  result->seed2 = 0xb78d0945;
1715
1716
0
  result->dump_file = params->dump_file;
1717
0
  result->dump_level = params->dump_level;
1718
1719
#ifdef USE_SSE2
1720
  using_vectors = eb_test_sse2();
1721
#endif
1722
#ifdef USE_AVEC
1723
  using_vectors = 1; /* todo: Altivec sensing */
1724
1725
  /* select fastprep */
1726
  if (sizeof(ET_SrcPixel) == 1 && using_vectors && params->gamma != 0)
1727
    using_vectors = 2;
1728
1729
#endif
1730
1731
#ifdef USE_VECTOR
1732
  result->using_vectors = using_vectors;
1733
#endif
1734
0
  if (using_vectors)
1735
0
    {
1736
#ifdef USE_SSE2
1737
      result->sse2_ctx = (eb_ctx_sse2 **)malloc(sizeof(eb_ctx_sse2 *) *
1738
                                                ((n_planes + 3) >> 2));
1739
      for (i = 0; i < n_planes; i += 4)
1740
        {
1741
          int end_plane = i + 4 < n_planes ? i + 4 : n_planes;
1742
          result->sse2_ctx[i >> 2] = eb_ctx_sse2_new(params, i, end_plane);
1743
        }
1744
#endif
1745
#ifdef USE_AVEC
1746
      result->avec_ctx = (eb_ctx_avec **)malloc(sizeof(eb_ctx_avec *) *
1747
                                                ((n_planes + 3) >> 2));
1748
      for (i = 0; i < n_planes; i += 4)
1749
        {
1750
          int end_plane = i + 4 < n_planes ? i + 4 : n_planes;
1751
          result->avec_ctx[i >> 2] = eb_ctx_avec_new(params, i, end_plane);
1752
        }
1753
#endif
1754
0
      result->plane_ctx = NULL;
1755
0
    }
1756
0
  else
1757
0
    {
1758
0
      result->plane_ctx = (EBPlaneCtx **)malloc(sizeof(EBPlaneCtx *) * n_planes);
1759
0
      for (i = 0; i < n_planes; i++)
1760
0
        result->plane_ctx[i] = even_better_plane_new (params, result, i);
1761
0
    }
1762
0
  return result;
1763
0
}
1764
1765
/**
1766
 * even_better_free: Free an #EvenBetterCtx context.
1767
 * @ctx: The #EvenBetterCtx context to free.
1768
 *
1769
 * Frees @ctx.
1770
 **/
1771
void
1772
even_better_free (EvenBetterCtx *ctx)
1773
0
{
1774
0
  int i;
1775
0
  int n_planes = ctx->n_planes;
1776
1777
0
  if (ctx->dump_file)
1778
0
    fclose (ctx->dump_file);
1779
1780
#ifdef USE_VECTOR
1781
  if (ctx->using_vectors)
1782
    {
1783
#ifdef USE_SSE2
1784
      for (i = 0; i < n_planes; i += 4)
1785
        eb_ctx_sse2_free(ctx->sse2_ctx[i >> 2]);
1786
      free(ctx->sse2_ctx);
1787
#endif
1788
#ifdef USE_AVEC
1789
      for (i = 0; i < n_planes; i += 4)
1790
        eb_ctx_avec_free(ctx->avec_ctx[i >> 2]);
1791
      free(ctx->avec_ctx);
1792
#endif
1793
    }
1794
  else
1795
#endif
1796
0
    {
1797
0
      for (i = 0; i < n_planes; i++)
1798
0
        even_better_plane_free (ctx->plane_ctx[i]);
1799
0
      free(ctx->plane_ctx);
1800
0
    }
1801
0
  free (ctx->strengths);
1802
0
  free (ctx->c_line);
1803
1804
0
  free (ctx);
1805
0
}