/src/libspectre/ghostscript/devices/rinkj/evenbetter-rll.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2020 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Testbed implementation of Even Better Screening. */ |
18 | | |
19 | | /* |
20 | | * Code in this module is covered by US Patents 5,055,942 and |
21 | | * 5,917,614, and corresponding international patents. |
22 | | */ |
23 | | |
24 | | #include <stdio.h> |
25 | | #include <stdlib.h> |
26 | | #include <string.h> |
27 | | #include <math.h> |
28 | | #include "evenbetter-rll.h" |
29 | | |
30 | | /* Set this define if compiling with AltiVec optimizations. */ |
31 | | #define noUSE_AVEC |
32 | | |
33 | | /* Set this define if compiling with SSE optimizations. */ |
34 | | #define noUSE_SSE2 |
35 | | |
36 | 0 | #define EVENBETTER_VERSION 133 |
37 | | |
38 | 0 | #define EVEN_SHIFT 16 |
39 | 0 | #define IMO_SHIFT 14 |
40 | | #define EVEN_RB_CAP (1 << (EVEN_SHIFT - 2)) |
41 | | |
42 | | #define FANCY_COUPLING |
43 | | |
44 | | #if defined(USE_AVEC) || defined(USE_SSE2) |
45 | | #define USE_VECTOR |
46 | | #endif |
47 | | |
48 | | #ifdef USE_AVEC |
49 | | #include "eb_avec.h" |
50 | | |
51 | | #endif |
52 | | |
53 | | #ifdef USE_SSE2 |
54 | | typedef struct _eb_ctx_sse2 eb_ctx_sse2; |
55 | | typedef struct _eb_srcbuf eb_srcbuf; |
56 | | |
57 | | int eb_test_sse2(void); |
58 | | int eb_sse2_core(eb_ctx_sse2 *ctx, unsigned char **out, eb_srcbuf *in, |
59 | | int offset); |
60 | | int eb_sse2_rev_rs(eb_ctx_sse2 *ctx, int offset); |
61 | | int eb_sse2_set_daz(void); |
62 | | void eb_sse2_restore_daz(int save_mxcsr); |
63 | | |
64 | | struct _eb_ctx_sse2 { |
65 | | int xs; |
66 | | int *iir_line; |
67 | | int *r_line; |
68 | | int *a_line; |
69 | | int *b_line; |
70 | | char *skip_line; |
71 | | int dummy[2]; |
72 | | float *luts[4]; |
73 | | float e[4]; |
74 | | float e_i_1[4]; |
75 | | int r[4]; |
76 | | int a[4]; |
77 | | int b[4]; |
78 | | int ones[4]; |
79 | | int twos[4]; |
80 | | int aspect2[4]; |
81 | | float ehi[4]; |
82 | | float elo[4]; |
83 | | float ohi[4]; |
84 | | float r_mul[4]; |
85 | | float kernel[4]; |
86 | | unsigned int seed1[4]; |
87 | | unsigned int seed2[4]; |
88 | | }; |
89 | | |
90 | | struct _eb_srcbuf { |
91 | | float im[64]; |
92 | | float rb[64]; |
93 | | float rs[64]; |
94 | | int dummy[3]; |
95 | | }; |
96 | | |
97 | | #endif |
98 | | |
99 | | typedef struct _EBPlaneCtx EBPlaneCtx; |
100 | | typedef unsigned int uint32; |
101 | | |
102 | | struct _EvenBetterCtx { |
103 | | int source_width; |
104 | | int dest_width; |
105 | | int n_planes; |
106 | | int levels; /* Number of levels on output, <= 256 */ |
107 | | EBPlaneCtx **plane_ctx; |
108 | | int aspect; |
109 | | int *strengths; |
110 | | int even_elo; |
111 | | int even_ehi; |
112 | | int *c_line; |
113 | | |
114 | | int even_c1; |
115 | | int do_shadows; |
116 | | |
117 | | uint32 seed1; |
118 | | uint32 seed2; |
119 | | |
120 | | FILE *dump_file; |
121 | | EbDumpLevel dump_level; |
122 | | |
123 | | #ifdef USE_SSE2 |
124 | | eb_ctx_sse2 **sse2_ctx; |
125 | | int using_vectors; |
126 | | #endif |
127 | | #ifdef USE_AVEC |
128 | | eb_ctx_avec **avec_ctx; |
129 | | int using_vectors; |
130 | | #endif |
131 | | }; |
132 | | |
133 | | struct _EBPlaneCtx { |
134 | | int source_width; |
135 | | int dest_width; |
136 | | int *rb_line; |
137 | | int *iir_line; |
138 | | int *r_line; |
139 | | int *a_line; |
140 | | int *b_line; |
141 | | int *r_line_sh; |
142 | | int *a_line_sh; |
143 | | int *b_line_sh; |
144 | | int *lut; |
145 | | int *rb_lut; |
146 | | char *rs_lut; |
147 | | int *white_count_line; |
148 | | }; |
149 | | |
150 | | void * |
151 | | eb_malloc_aligned(int size, int align) |
152 | 0 | { |
153 | 0 | void *result; |
154 | 0 | void *alloced = malloc(size + align); |
155 | 0 | int pad; |
156 | |
|
157 | 0 | if (alloced == 0) |
158 | 0 | return 0; |
159 | 0 | pad = (((int)(size_t)alloced + 12) & 15) + 4; |
160 | 0 | result = (void *)(pad + (char *)alloced); |
161 | 0 | ((int *)result)[-1] = pad; |
162 | 0 | return result; |
163 | 0 | } |
164 | | |
165 | | void |
166 | | eb_free_aligned(void *p) |
167 | 0 | { |
168 | 0 | int pad = ((int *)p)[-1]; |
169 | 0 | free((char*)p - pad); |
170 | 0 | } |
171 | | |
172 | | static double |
173 | | eb_compute_rbscale(const EvenBetterParams *params) |
174 | 0 | { |
175 | 0 | double rbscale = params->rbscale; |
176 | |
|
177 | 0 | if (rbscale == 0.0) |
178 | 0 | { |
179 | 0 | rbscale = params->aspect == 1 ? 0.95 : |
180 | 0 | params->aspect == 2 ? 1.8 : |
181 | 0 | params->aspect == 4 ? 3.6 : 1; |
182 | 0 | } |
183 | 0 | return rbscale; |
184 | 0 | } |
185 | | |
186 | | static int |
187 | | eb_compute_randshift(int nl, int rs_base, int do_shadows, int levels) |
188 | 0 | { |
189 | 0 | int rs = rs_base; |
190 | 0 | if ((nl > (90 << (EVEN_SHIFT - 10)) && |
191 | 0 | nl < (129 << (EVEN_SHIFT - 10))) || |
192 | 0 | (nl > (162 << (EVEN_SHIFT - 10)) && |
193 | 0 | nl < (180 << (EVEN_SHIFT - 10)))) |
194 | 0 | rs--; |
195 | 0 | else if (nl > (321 << (EVEN_SHIFT - 10)) && |
196 | 0 | nl < (361 << (EVEN_SHIFT - 10))) |
197 | 0 | { |
198 | 0 | rs--; |
199 | 0 | if (nl > (331 << (EVEN_SHIFT - 10)) && |
200 | 0 | nl < (351 << (EVEN_SHIFT - 10))) |
201 | 0 | rs--; |
202 | 0 | } |
203 | 0 | else if ((do_shadows || |
204 | 0 | nl == (levels - 1) << EVEN_SHIFT) && |
205 | 0 | nl > ((levels - 1) << EVEN_SHIFT) - |
206 | 0 | (1 << (EVEN_SHIFT - 2))) |
207 | 0 | { |
208 | | /* don't add randomness in extreme shadows */ |
209 | 0 | } |
210 | 0 | else if ((nl > (3 << (EVEN_SHIFT - 2)))) |
211 | 0 | { |
212 | 0 | nl -= (nl + (1 << (EVEN_SHIFT - 2))) & -(1 << (EVEN_SHIFT - 1)); |
213 | 0 | if (nl < 0) nl = -nl; |
214 | 0 | if (nl < (1 << (EVEN_SHIFT - 4))) rs--; |
215 | 0 | if (nl < (1 << (EVEN_SHIFT - 5))) rs--; |
216 | 0 | if (nl < (1 << (EVEN_SHIFT - 6))) rs--; |
217 | 0 | } |
218 | 0 | else |
219 | 0 | { |
220 | 0 | if (nl < (3 << (EVEN_SHIFT - 3))) nl += 1 << (EVEN_SHIFT - 2); |
221 | 0 | nl = nl - (1 << (EVEN_SHIFT - 1)); |
222 | 0 | if (nl < 0) nl = -nl; |
223 | 0 | if (nl < (1 << (EVEN_SHIFT - 4))) rs--; |
224 | 0 | if (nl < (1 << (EVEN_SHIFT - 5))) rs--; |
225 | 0 | if (nl < (1 << (EVEN_SHIFT - 6))) rs--; |
226 | 0 | } |
227 | 0 | return rs; |
228 | 0 | } |
229 | | |
230 | | #ifdef USE_SSE2 |
231 | | static eb_ctx_sse2 * |
232 | | eb_ctx_sse2_new(const EvenBetterParams *params, int start_plane, int end_plane) |
233 | | { |
234 | | int xs = params->source_width; |
235 | | int aspect2 = params->aspect * params->aspect; |
236 | | eb_ctx_sse2 *ctx; |
237 | | int i; |
238 | | double im_scale; |
239 | | float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale))); |
240 | | double rbscale = eb_compute_rbscale(params); |
241 | | int rs_base; |
242 | | |
243 | | ctx = (eb_ctx_sse2 *)eb_malloc_aligned(sizeof(eb_ctx_sse2), 16); |
244 | | ctx->xs = xs; |
245 | | for (i = 0; i < 4; i++) |
246 | | { |
247 | | ctx->e[i] = 0.0; |
248 | | ctx->e_i_1[i] = 0.0; |
249 | | ctx->r[i] = 0; |
250 | | ctx->a[i] = 1; |
251 | | ctx->b[i] = aspect2; |
252 | | ctx->ones[i] = 1; |
253 | | ctx->twos[i] = 2; |
254 | | ctx->aspect2[i] = aspect2; |
255 | | ctx->ohi[i] = params->levels - 1; |
256 | | ctx->ehi[i] = 1.1; |
257 | | ctx->elo[i] = -0.1; |
258 | | ctx->r_mul[i] = r_mul; |
259 | | ctx->seed1[i] = (i << 8) + 0x7000; |
260 | | ctx->seed2[i] = (i << 16) + 0x9000; |
261 | | } |
262 | | ctx->kernel[0] = 1.0 / 16; |
263 | | ctx->kernel[1] = 3.0 / 16; |
264 | | ctx->kernel[2] = 5.0 / 16; |
265 | | ctx->kernel[3] = 7.0 / 16; |
266 | | |
267 | | im_scale = (params->levels - 1) * 1.0 / (1 << 24); |
268 | | rs_base = 35 - EVEN_SHIFT - params->rand_scale; |
269 | | |
270 | | for (i = start_plane; i < end_plane; i++) |
271 | | { |
272 | | float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3); |
273 | | int j; |
274 | | ctx->luts[i - start_plane] = lut; |
275 | | |
276 | | for (j = 0; j < ET_SRC_MAX + 1; j++) |
277 | | { |
278 | | double g = ((1 << 24) - params->luts[i][j]) * im_scale; |
279 | | int nl, rs; |
280 | | |
281 | | lut[j * 3] = g; |
282 | | if (g == 0.0) |
283 | | lut[j * 3 + 1] = 0.5; |
284 | | else |
285 | | lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g; |
286 | | |
287 | | nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT); |
288 | | rs = eb_compute_randshift(nl, rs_base, |
289 | | params->do_shadows, params->levels); |
290 | | |
291 | | lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs); |
292 | | } |
293 | | } |
294 | | for (i = i - start_plane; i < 4; i++) |
295 | | ctx->luts[i] = NULL; |
296 | | |
297 | | ctx->iir_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16); |
298 | | ctx->a_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16); |
299 | | ctx->b_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16); |
300 | | ctx->r_line = (int *)eb_malloc_aligned(16 * (xs + 32), 16); |
301 | | for (i = 0; i < (xs + 32) * 4; i++) |
302 | | { |
303 | | ((float *)ctx->iir_line)[i] = 0; |
304 | | ctx->a_line[i] = 1; |
305 | | ctx->b_line[i] = aspect2; |
306 | | ctx->r_line[i] = 0; |
307 | | } |
308 | | |
309 | | ctx->skip_line = (char *)malloc((xs + 15) & -16); |
310 | | |
311 | | return ctx; |
312 | | } |
313 | | |
314 | | static void |
315 | | eb_ctx_sse2_free(eb_ctx_sse2 *ctx) |
316 | | { |
317 | | int i; |
318 | | |
319 | | for (i = 0; i < 4; i++) |
320 | | free(ctx->luts[i]); |
321 | | eb_free_aligned(ctx->iir_line); |
322 | | eb_free_aligned(ctx->a_line); |
323 | | eb_free_aligned(ctx->b_line); |
324 | | eb_free_aligned(ctx->r_line); |
325 | | free(ctx->skip_line); |
326 | | eb_free_aligned(ctx); |
327 | | } |
328 | | #endif |
329 | | |
330 | | #ifdef USE_AVEC |
331 | | static eb_ctx_avec * |
332 | | eb_ctx_avec_new(const EvenBetterParams *params, int start_plane, int end_plane) |
333 | | { |
334 | | int xs = params->source_width; |
335 | | int aspect2 = params->aspect * params->aspect; |
336 | | eb_ctx_avec *ctx; |
337 | | int i; |
338 | | double im_scale; |
339 | | double k; |
340 | | float imscale1, imscale2, rbmul, rsbase; |
341 | | float r_mul = 1.0 / (params->aspect * (1 << (6 - params->even_c1_scale))); |
342 | | double rbscale = eb_compute_rbscale(params); |
343 | | vector unsigned int zero = vec_splat_u32(0); |
344 | | const vector float kernel = { 1.0 / 16, 3.0 / 16, 5.0 / 16, 7.0 / 16 }; |
345 | | vector float almostone = { 255.0/256, 255.0/256, 255.0/256, 255.0/256 }; |
346 | | int rs_base; |
347 | | |
348 | | ctx = (eb_ctx_avec *)eb_malloc_aligned(sizeof(eb_ctx_avec), 16); |
349 | | ctx->xs = xs; |
350 | | |
351 | | ctx->e = (vector float) zero; |
352 | | ctx->e_i_1 = (vector float) zero; |
353 | | ctx->r = zero; |
354 | | ctx->a = zero; |
355 | | im_scale = (params->levels - 1) * (1.0 / (1 << 24)); |
356 | | rs_base = 35 - EVEN_SHIFT - params->rand_scale; |
357 | | |
358 | | if (params->gamma == 1.0) |
359 | | k = 0; |
360 | | else if (params->gamma == 1.8) |
361 | | k = 0.835; |
362 | | else if (params->gamma == 2.0) |
363 | | k = 1.0; |
364 | | else |
365 | | /* this shouldn't happen! */ |
366 | | k = 0; |
367 | | |
368 | | for (;;) |
369 | | { |
370 | | vector float foff, f0, f1; |
371 | | |
372 | | imscale1 = (1 - k) * (params->levels - 1) * (256.0 / 255.0); |
373 | | imscale2 = k * (params->levels - 1) * sqrt(256.0 / 255.0); |
374 | | for (i = 0; i < 4; i++) |
375 | | { |
376 | | ((float *)&ctx->imscale1)[i] = imscale1; |
377 | | ((float *)&ctx->imscale2)[i] = imscale2; |
378 | | } |
379 | | f0 = vec_rsqrte(almostone); |
380 | | f0 = vec_madd(f0, almostone, (vector float)zero); |
381 | | f1 = vec_madd(f0, ctx->imscale2, (vector float)zero); |
382 | | foff = vec_madd(almostone, ctx->imscale1, f1); |
383 | | f1 = vec_nmsub(f0, ctx->imscale2, foff); |
384 | | f1 = vec_nmsub(almostone, ctx->imscale1, f1); |
385 | | if (vec_all_eq(f1, (vector float)zero)) |
386 | | { |
387 | | ctx->foff = foff; |
388 | | break; |
389 | | } |
390 | | k += 1e-5; |
391 | | } |
392 | | rbmul = -r_mul * rbscale; |
393 | | rsbase = 1.0 / (1 << EVEN_SHIFT) / (1 << rs_base); |
394 | | for (i = 0; i < 4; i++) |
395 | | { |
396 | | ((int *)&ctx->b)[i] = aspect2; |
397 | | ((int *)&ctx->aspect2)[i] = aspect2; |
398 | | ((int *)&ctx->seed1)[i] = (i << 8) + 0x7000; |
399 | | ((int *)&ctx->seed2)[i] = (i << 16) + 0x9000; |
400 | | ((float *)&ctx->ohi)[i] = params->levels - 1; |
401 | | ((float *)&ctx->ehi)[i] = 1.1; |
402 | | ((float *)&ctx->elo)[i] = -0.1; |
403 | | ((float *)&ctx->r_mul)[i] = r_mul; |
404 | | ((float *)&ctx->rsbase)[i] = rsbase; |
405 | | ((float *)&ctx->rbmul)[i] = rbmul; |
406 | | } |
407 | | ctx->kernel = kernel; |
408 | | |
409 | | rs_base = 35 - EVEN_SHIFT - params->rand_scale; |
410 | | |
411 | | for (i = start_plane; i < end_plane; i++) |
412 | | { |
413 | | float *lut = (float *)malloc((ET_SRC_MAX + 1) * sizeof(float) * 3); |
414 | | int j; |
415 | | ctx->luts[i - start_plane] = lut; |
416 | | |
417 | | for (j = 0; j < ET_SRC_MAX + 1; j++) |
418 | | { |
419 | | double g = ((1 << 24) - params->luts[i][j]) * im_scale; |
420 | | int nl, rs; |
421 | | |
422 | | lut[j * 3] = g; |
423 | | if (g == 0.0) |
424 | | lut[j * 3 + 1] = 0.5; |
425 | | else |
426 | | lut[j * 3 + 1] = 0.5 - r_mul * rbscale / g; |
427 | | nl = (params->levels - 1 - g) * (1 << EVEN_SHIFT); |
428 | | rs = eb_compute_randshift(nl, rs_base, |
429 | | params->do_shadows, params->levels); |
430 | | |
431 | | lut[j * 3 + 2] = 1.0 / (1 << EVEN_SHIFT) / (1 << rs); |
432 | | } |
433 | | } |
434 | | for (i = i - start_plane; i < 4; i++) |
435 | | ctx->luts[i] = NULL; |
436 | | |
437 | | ctx->iir_line = (vector float *)eb_malloc_aligned(16 * (xs + 32), 16); |
438 | | ctx->a_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16); |
439 | | ctx->b_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16); |
440 | | ctx->r_line = (vector unsigned int *)eb_malloc_aligned(16 * (xs + 32), 16); |
441 | | for (i = 0; i < (xs + 32) * 4; i++) |
442 | | { |
443 | | ((float *)ctx->iir_line)[i] = 0; |
444 | | ((int *)ctx->a_line)[i] = 1; |
445 | | ((int *)ctx->b_line)[i] = aspect2; |
446 | | ((int *)ctx->r_line)[i] = 0; |
447 | | } |
448 | | |
449 | | ctx->skip_line = (char *)malloc((xs + 15) & -16); |
450 | | |
451 | | return ctx; |
452 | | } |
453 | | |
454 | | static void |
455 | | eb_ctx_avec_free(eb_ctx_avec *ctx) |
456 | | { |
457 | | int i; |
458 | | |
459 | | for (i = 0; i < 4; i++) |
460 | | free(ctx->luts[i]); |
461 | | eb_free_aligned(ctx->iir_line); |
462 | | eb_free_aligned(ctx->a_line); |
463 | | eb_free_aligned(ctx->b_line); |
464 | | eb_free_aligned(ctx->r_line); |
465 | | free(ctx->skip_line); |
466 | | eb_free_aligned(ctx); |
467 | | } |
468 | | |
469 | | #endif |
470 | | |
471 | | #ifdef USE_VECTOR |
472 | | static int |
473 | | even_better_line_vector(EvenBetterCtx *ebc, uchar **dest, |
474 | | const ET_Rll *const *src) |
475 | | { |
476 | | int n_planes = ebc->n_planes; |
477 | | int xd = ebc->dest_width; |
478 | | int strip; |
479 | | eb_srcbuf sb_alloc; |
480 | | eb_srcbuf *srcbuf; |
481 | | uchar dummy_a[32]; |
482 | | uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16); |
483 | | #ifdef USE_SSE2 |
484 | | int save_mxcsr = eb_sse2_set_daz(); |
485 | | #endif |
486 | | |
487 | | srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16); |
488 | | |
489 | | for (strip = 0; strip < n_planes; strip += 4) |
490 | | { |
491 | | #ifdef USE_AVEC |
492 | | eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2]; |
493 | | #endif |
494 | | #ifdef USE_SSE2 |
495 | | eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2]; |
496 | | #endif |
497 | | uchar *destbufs[4]; |
498 | | const ET_Rll *const *sbuf = src + strip; |
499 | | int count[4]; |
500 | | int src_idx[4]; |
501 | | int plane_idx, last_plane; |
502 | | float im[4], rb[4], rs[4]; |
503 | | int i; |
504 | | |
505 | | last_plane = n_planes - strip < 4 ? n_planes - strip : 4; |
506 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
507 | | { |
508 | | count[plane_idx] = 0; |
509 | | src_idx[plane_idx] = 0; |
510 | | destbufs[plane_idx] = dest[plane_idx + strip]; |
511 | | } |
512 | | for (; plane_idx < 4; plane_idx++) |
513 | | { |
514 | | int j; |
515 | | |
516 | | for (j = 0; j < 16; j++) |
517 | | { |
518 | | ((float *)&srcbuf->im)[j * 4 + plane_idx] = 0.0; |
519 | | ((float *)&srcbuf->rb)[j * 4 + plane_idx] = 0.0; |
520 | | ((float *)&srcbuf->rs)[j * 4 + plane_idx] = 0.0; |
521 | | } |
522 | | } |
523 | | for (i = 0; i < xd; i += 16) |
524 | | { |
525 | | int jmax = (xd - i) > 16 ? 16 : xd - i; |
526 | | int skip = 1; |
527 | | int j; |
528 | | |
529 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
530 | | { |
531 | | if (count[plane_idx] < 16 || im[plane_idx] != 0.0) |
532 | | { |
533 | | skip = 0; |
534 | | break; |
535 | | } |
536 | | } |
537 | | ctx->skip_line[i >> 4] = skip; |
538 | | |
539 | | if (skip) |
540 | | { |
541 | | /* all white */ |
542 | | |
543 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
544 | | { |
545 | | uchar *dst_ptr = destbufs[plane_idx]; |
546 | | if (jmax == 16) |
547 | | { |
548 | | ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0; |
549 | | ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0; |
550 | | ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0; |
551 | | ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0; |
552 | | } |
553 | | else |
554 | | { |
555 | | for (j = 0; j < jmax; j++) |
556 | | dst_ptr[i + j] = 0; |
557 | | } |
558 | | count[plane_idx] -= jmax; |
559 | | } |
560 | | } |
561 | | else |
562 | | { |
563 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
564 | | { |
565 | | const float *lut = ctx->luts[plane_idx]; |
566 | | float imp = im[plane_idx]; |
567 | | float rbp = rb[plane_idx]; |
568 | | float rsp = rs[plane_idx]; |
569 | | for (j = 0; j < jmax; j++) |
570 | | { |
571 | | if (count[plane_idx] == 0) |
572 | | { |
573 | | const ET_Rll *src_p = sbuf[plane_idx] + |
574 | | src_idx[plane_idx]++; |
575 | | ET_SrcPixel src_pixel = src_p->value; |
576 | | count[plane_idx] = src_p->length; |
577 | | imp = lut[src_pixel * 3]; |
578 | | rbp = lut[src_pixel * 3 + 1]; |
579 | | rsp = lut[src_pixel * 3 + 2]; |
580 | | } |
581 | | ((float *)&srcbuf->im)[j * 4 + plane_idx] = imp; |
582 | | ((float *)&srcbuf->rb)[j * 4 + plane_idx] = rbp; |
583 | | ((float *)&srcbuf->rs)[j * 4 + plane_idx] = rsp; |
584 | | count[plane_idx]--; |
585 | | } |
586 | | im[plane_idx] = imp; |
587 | | rb[plane_idx] = rbp; |
588 | | rs[plane_idx] = rsp; |
589 | | } |
590 | | for (; plane_idx < 4; plane_idx++) |
591 | | { |
592 | | destbufs[plane_idx] = dummy_dst - i; |
593 | | } |
594 | | #ifdef USE_AVEC |
595 | | eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i); |
596 | | #endif |
597 | | #ifdef USE_SSE2 |
598 | | eb_sse2_core(ctx, destbufs, srcbuf, i); |
599 | | #endif |
600 | | } |
601 | | } |
602 | | |
603 | | for (i = xd & -16; i >= 0; i -= 16) |
604 | | { |
605 | | if (!ctx->skip_line[i >> 4]) |
606 | | { |
607 | | #ifdef USE_AVEC |
608 | | eb_avec_rev_rs(ctx, i + 15); |
609 | | #endif |
610 | | #ifdef USE_SSE2 |
611 | | eb_sse2_rev_rs(ctx, i + 15); |
612 | | #endif |
613 | | } |
614 | | } |
615 | | } |
616 | | #ifdef USE_SSE2 |
617 | | eb_sse2_restore_daz(save_mxcsr); |
618 | | #endif |
619 | | return 0; |
620 | | } |
621 | | #endif |
622 | | |
623 | | #ifdef USE_AVEC |
624 | | static int |
625 | | even_better_line_fastprep(EvenBetterCtx *ebc, uchar **dest, |
626 | | const ET_SrcPixel *const *src) |
627 | | { |
628 | | int n_planes = ebc->n_planes; |
629 | | int xd = ebc->dest_width; |
630 | | int strip; |
631 | | eb_srcbuf sb_alloc; |
632 | | eb_srcbuf *srcbuf; |
633 | | uchar dummy_a[32]; |
634 | | uchar *dummy_dst = (uchar *)(((int)dummy_a + 15) & -16); |
635 | | |
636 | | srcbuf = (eb_srcbuf *)(((int)&sb_alloc + 12) & -16); |
637 | | |
638 | | for (strip = 0; strip < n_planes; strip += 4) |
639 | | { |
640 | | #ifdef USE_AVEC |
641 | | eb_ctx_avec *ctx = ebc->avec_ctx[strip >> 2]; |
642 | | #endif |
643 | | #ifdef USE_SSE2 |
644 | | eb_ctx_sse2 *ctx = ebc->sse2_ctx[strip >> 2]; |
645 | | #endif |
646 | | uchar *destbufs[4]; |
647 | | const ET_SrcPixel *const *sbuf = src + strip; |
648 | | int plane_idx, last_plane; |
649 | | int i; |
650 | | |
651 | | last_plane = n_planes - strip < 4 ? n_planes - strip : 4; |
652 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
653 | | { |
654 | | destbufs[plane_idx] = dest[plane_idx + strip]; |
655 | | } |
656 | | for (i = 0; i < xd; i += 16) |
657 | | { |
658 | | int noskip; |
659 | | noskip = eb_avec_prep_srcbuf(ctx, last_plane, srcbuf, sbuf, i); |
660 | | ctx->skip_line[i >> 4] = noskip; |
661 | | if (noskip) |
662 | | { |
663 | | for (plane_idx = last_plane; plane_idx < 4; plane_idx++) |
664 | | destbufs[plane_idx] = dummy_dst - i; |
665 | | eb_avec_core(ctx, (vector unsigned char **)destbufs, srcbuf, i); |
666 | | } |
667 | | else |
668 | | { |
669 | | /* all white */ |
670 | | |
671 | | for (plane_idx = 0; plane_idx < last_plane; plane_idx++) |
672 | | { |
673 | | uchar *dst_ptr = destbufs[plane_idx]; |
674 | | ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0; |
675 | | ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0; |
676 | | ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0; |
677 | | ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0; |
678 | | } |
679 | | } |
680 | | } |
681 | | |
682 | | for (i = xd & -16; i >= 0; i -= 16) |
683 | | { |
684 | | if (ctx->skip_line[i >> 4]) |
685 | | { |
686 | | #ifdef USE_AVEC |
687 | | eb_avec_rev_rs(ctx, i + 15); |
688 | | #endif |
689 | | #ifdef USE_SSE2 |
690 | | eb_sse2_rev_rs(ctx, i + 15); |
691 | | #endif |
692 | | } |
693 | | } |
694 | | } |
695 | | return 0; |
696 | | } |
697 | | #endif |
698 | | |
699 | | /* Maximum number of planes, but actually we want to dynamically |
700 | | allocate all scratch buffers that depend on this. */ |
701 | 0 | #define M 16 |
702 | | |
703 | | static void |
704 | | even_better_line_hi (EvenBetterCtx *ebc, uchar **dest, |
705 | | const ET_Rll *const *src) |
706 | 0 | { |
707 | 0 | int a[M], b[M]; |
708 | 0 | int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M]; |
709 | 0 | int iml[M], rbl[M]; |
710 | 0 | int i, j; |
711 | 0 | int im; |
712 | 0 | int *pa, *pb, *piir, *pr; |
713 | 0 | int r[M], rg; |
714 | 0 | int xd; |
715 | 0 | uint32 seed1 = ebc->seed1; |
716 | 0 | uint32 seed2 = ebc->seed2; |
717 | 0 | uint32 sum; |
718 | 0 | int plane_idx; |
719 | 0 | int r_scratch[M]; |
720 | 0 | int n_planes = ebc->n_planes; |
721 | 0 | int levels = ebc->levels; |
722 | | #ifdef OLD_QUANT |
723 | | int dith_mul = levels << 8; |
724 | | #else |
725 | 0 | int dith_mul = (levels - 1) << 8; |
726 | 0 | #endif |
727 | 0 | int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1); |
728 | 0 | int aspect2 = ebc->aspect * ebc->aspect; |
729 | 0 | int *strengths = ebc->strengths; |
730 | 0 | int even_elo = ebc->even_elo; |
731 | 0 | int even_ehi = ebc->even_ehi; |
732 | 0 | int coupling; |
733 | 0 | int *c_line = ebc->c_line; |
734 | 0 | int even_c1 = ebc->even_c1; |
735 | 0 | int rand_shift; |
736 | 0 | int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1); |
737 | 0 | int count[M], src_idx[M]; |
738 | 0 | int rs[M]; |
739 | |
|
740 | 0 | xd = ebc->dest_width; |
741 | |
|
742 | 0 | memset(rbl, 0x00, M * sizeof(int)); |
743 | 0 | memset(iml, 0x00, M * sizeof(int)); |
744 | 0 | memset(rs, 0x00, M * sizeof(int)); |
745 | |
|
746 | 0 | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
747 | 0 | { |
748 | 0 | a[plane_idx] = 1; |
749 | 0 | b[plane_idx] = aspect2; |
750 | 0 | r[plane_idx] = 0; |
751 | 0 | e_0_1[plane_idx] = 0; |
752 | 0 | e_1_0[plane_idx] = 0; |
753 | 0 | e_1_1[plane_idx] = 0; |
754 | 0 | count[plane_idx] = 0; |
755 | 0 | src_idx[plane_idx] = 0; |
756 | 0 | } |
757 | |
|
758 | 0 | coupling = 0; |
759 | |
|
760 | 0 | for (i = 0; i < xd;) |
761 | 0 | { |
762 | 0 | int work_planes[M]; |
763 | 0 | int n_work = 0; |
764 | 0 | int work_idx; |
765 | 0 | int jmax; |
766 | |
|
767 | 0 | jmax = (xd - i) > 16 ? 16 : xd - i; |
768 | |
|
769 | 0 | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
770 | 0 | { |
771 | 0 | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
772 | 0 | int *wcl = ctx->white_count_line; |
773 | 0 | if (count[plane_idx] >= 16 && iml[plane_idx] == 0) |
774 | 0 | wcl[i >> 4]++; |
775 | 0 | else |
776 | 0 | wcl[i >> 4] = 0; |
777 | 0 | if (wcl[i >> 4] > 15) |
778 | 0 | { |
779 | 0 | uchar *dst_ptr = dest[plane_idx]; |
780 | 0 | if (jmax == 16) |
781 | 0 | { |
782 | 0 | ((uint32 *)dst_ptr)[(i >> 2) + 0] = 0; |
783 | 0 | ((uint32 *)dst_ptr)[(i >> 2) + 1] = 0; |
784 | 0 | ((uint32 *)dst_ptr)[(i >> 2) + 2] = 0; |
785 | 0 | ((uint32 *)dst_ptr)[(i >> 2) + 3] = 0; |
786 | 0 | } |
787 | 0 | else |
788 | 0 | { |
789 | 0 | for (j = 0; j < jmax; j++) |
790 | 0 | dst_ptr[i + j] = 0; |
791 | 0 | } |
792 | 0 | count[plane_idx] -= jmax; |
793 | 0 | } |
794 | 0 | else |
795 | 0 | { |
796 | 0 | work_planes[n_work++] = plane_idx; |
797 | 0 | } |
798 | 0 | } |
799 | |
|
800 | 0 | if (n_work == 0) |
801 | 0 | { |
802 | | /* all planes were white */ |
803 | 0 | i += jmax; |
804 | 0 | continue; |
805 | 0 | } |
806 | | |
807 | 0 | for (j = 0; j < jmax; j++) |
808 | 0 | { |
809 | 0 | #ifdef FANCY_COUPLING |
810 | 0 | coupling += c_line[i]; |
811 | | #else |
812 | | coupling = 0; |
813 | | #endif |
814 | | /* Lookup image data and compute R for all planes. */ |
815 | 0 | for (work_idx = 0; work_idx < n_work; work_idx++) |
816 | 0 | { |
817 | 0 | int plane_idx = work_planes[work_idx]; |
818 | 0 | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
819 | 0 | ET_SrcPixel src_pixel; |
820 | 0 | int new_r; |
821 | |
|
822 | 0 | pr = ctx->r_line; |
823 | 0 | pa = ctx->a_line; |
824 | 0 | pb = ctx->b_line; |
825 | 0 | if (count[plane_idx] == 0) |
826 | 0 | { |
827 | 0 | const ET_Rll *src_p = src[plane_idx] + src_idx[plane_idx]++; |
828 | 0 | int *lut = ctx->lut; |
829 | 0 | int *rblut = ctx->rb_lut; |
830 | 0 | char *rslut = ctx->rs_lut; |
831 | |
|
832 | 0 | count[plane_idx] = src_p->length; |
833 | 0 | src_pixel = src_p->value; |
834 | 0 | iml[plane_idx] = lut[src_pixel]; |
835 | 0 | rbl[plane_idx] = rblut[src_pixel]; |
836 | 0 | rs[plane_idx] = rslut[src_pixel]; |
837 | 0 | } |
838 | 0 | count[plane_idx]--; |
839 | |
|
840 | 0 | if (r[plane_idx] + a[plane_idx] < pr[i]) |
841 | 0 | { |
842 | 0 | r[plane_idx] += a[plane_idx]; |
843 | 0 | a[plane_idx] += 2; |
844 | 0 | } |
845 | 0 | else |
846 | 0 | { |
847 | 0 | a[plane_idx] = pa[i]; |
848 | 0 | b[plane_idx] = pb[i]; |
849 | 0 | r[plane_idx] = pr[i]; |
850 | 0 | } |
851 | 0 | if (iml[plane_idx] == 0) |
852 | 0 | { |
853 | 0 | r_scratch[plane_idx] = 0; |
854 | 0 | } |
855 | 0 | else |
856 | 0 | { |
857 | 0 | int r_tmp; |
858 | 0 | const int r_max = 0; |
859 | 0 | new_r = r[plane_idx]; |
860 | 0 | if (new_r > even_rlimit) |
861 | 0 | new_r = even_rlimit; |
862 | | /* Should we store back with the limit? */ |
863 | |
|
864 | 0 | rg = new_r << (EVEN_SHIFT - even_c1); |
865 | 0 | r_tmp = rg - rbl[plane_idx]; |
866 | 0 | if (r_tmp > r_max) r_tmp >>= 3; |
867 | 0 | r_scratch[plane_idx] = r_tmp; |
868 | 0 | } |
869 | 0 | } |
870 | | |
871 | | /* Dither each plane. */ |
872 | 0 | for (work_idx = 0; work_idx < n_work; work_idx++) |
873 | 0 | { |
874 | 0 | int plane_idx = work_planes[work_idx]; |
875 | 0 | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
876 | 0 | uchar *dst_ptr = dest[plane_idx]; |
877 | 0 | int new_e_1_0; |
878 | 0 | int coupling_contribution; |
879 | |
|
880 | 0 | pr = ctx->r_line; |
881 | 0 | pa = ctx->a_line; |
882 | 0 | pb = ctx->b_line; |
883 | 0 | piir = ctx->iir_line; |
884 | |
|
885 | 0 | im = iml[plane_idx]; |
886 | 0 | e_m1_1[plane_idx] = e_0_1[plane_idx]; |
887 | 0 | e_0_1[plane_idx] = e_1_1[plane_idx]; |
888 | 0 | e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1]; |
889 | 0 | new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 + |
890 | 0 | e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4); |
891 | 0 | if (im == 0) |
892 | 0 | { |
893 | 0 | dst_ptr[i] = 0; |
894 | 0 | } |
895 | 0 | else |
896 | 0 | { |
897 | 0 | int err; |
898 | 0 | int imo; |
899 | |
|
900 | 0 | err = new_e_1_0; |
901 | |
|
902 | 0 | err += r_scratch[plane_idx]; |
903 | | |
904 | | /* Add the two seeds together */ |
905 | 0 | sum = seed1 + seed2; |
906 | | |
907 | | /* If the add generated a carry, increment |
908 | | * the result of the addition. |
909 | | */ |
910 | 0 | if (sum < seed1 || sum < seed2) sum++; |
911 | | |
912 | | /* Seed2 becomes old seed1, seed1 becomes result */ |
913 | 0 | seed2 = seed1; |
914 | 0 | seed1 = sum; |
915 | |
|
916 | 0 | rand_shift = rs[plane_idx]; |
917 | 0 | err -= (sum >> rand_shift) - (0x80000000 >> rand_shift); |
918 | |
|
919 | 0 | if (err < even_elo) |
920 | 0 | err = even_elo; |
921 | | |
922 | 0 | else if (err > even_ehi) |
923 | 0 | err = even_ehi; |
924 | |
|
925 | 0 | #if 1 |
926 | 0 | err += coupling; |
927 | 0 | #endif |
928 | |
|
929 | | #ifdef OLD_QUANT |
930 | | imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8); |
931 | | #else |
932 | 0 | imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8); |
933 | 0 | #endif |
934 | 0 | if (imo < 0) imo = 0; |
935 | 0 | else if (imo > levels - 1) imo = levels - 1; |
936 | 0 | dst_ptr[i] = imo; |
937 | 0 | coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT); |
938 | 0 | new_e_1_0 += coupling_contribution; |
939 | 0 | coupling += (coupling_contribution * strengths[plane_idx]) >> 8; |
940 | 0 | } |
941 | 0 | if (dst_ptr[i] != 0) |
942 | 0 | { |
943 | 0 | a[plane_idx] = 1; |
944 | 0 | b[plane_idx] = aspect2; |
945 | 0 | r[plane_idx] = 0; |
946 | 0 | } |
947 | 0 | pa[i] = a[plane_idx]; |
948 | 0 | pb[i] = b[plane_idx]; |
949 | 0 | pr[i] = r[plane_idx]; |
950 | 0 | piir[i] = new_e_1_0; |
951 | 0 | e_1_0[plane_idx] = new_e_1_0; |
952 | 0 | } |
953 | 0 | #ifdef FANCY_COUPLING |
954 | 0 | coupling = coupling >> 1; |
955 | 0 | c_line[i] = coupling; |
956 | 0 | #endif |
957 | 0 | i++; |
958 | 0 | } |
959 | 0 | } |
960 | | |
961 | | /* Note: this isn't white optimized, but the payoff is probably not |
962 | | that important. */ |
963 | 0 | #ifdef FANCY_COUPLING |
964 | 0 | coupling = 0; |
965 | 0 | for (i = xd - 1; i >= 0; i--) |
966 | 0 | { |
967 | 0 | coupling = (coupling + c_line[i]) >> 1; |
968 | 0 | c_line[i] = (coupling - (coupling >> 4)); |
969 | 0 | } |
970 | 0 | #endif |
971 | | |
972 | | /* Update distances. */ |
973 | 0 | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
974 | 0 | { |
975 | 0 | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
976 | 0 | int *wcl = ctx->white_count_line; |
977 | 0 | int av, bv, rv; |
978 | 0 | int jmax; |
979 | |
|
980 | 0 | pr = ctx->r_line; |
981 | 0 | pa = ctx->a_line; |
982 | 0 | pb = ctx->b_line; |
983 | |
|
984 | 0 | av = 1; |
985 | 0 | bv = 1; |
986 | 0 | rv = 0; |
987 | 0 | jmax = ((xd - 1) & 15) + 1; |
988 | 0 | for (i = xd - 1; i >= 0;) |
989 | 0 | { |
990 | 0 | if (wcl[i >> 4] < 16) |
991 | 0 | { |
992 | 0 | for (j = 0; j < jmax; j++) |
993 | 0 | { |
994 | 0 | if (rv + bv + av < pr[i] + pb[i]) |
995 | 0 | { |
996 | 0 | rv += av; |
997 | 0 | av += 2; |
998 | 0 | } |
999 | 0 | else |
1000 | 0 | { |
1001 | 0 | rv = pr[i]; |
1002 | 0 | av = pa[i]; |
1003 | 0 | bv = pb[i]; |
1004 | 0 | } |
1005 | 0 | if (rv > even_rlimit) rv = even_rlimit; |
1006 | 0 | pa[i] = av; |
1007 | 0 | pb[i] = bv + (aspect2 << 1); |
1008 | 0 | pr[i] = rv + bv; |
1009 | 0 | i--; |
1010 | 0 | } |
1011 | 0 | } |
1012 | 0 | else |
1013 | 0 | i -= jmax; |
1014 | 0 | jmax = 16; |
1015 | 0 | } |
1016 | 0 | } |
1017 | |
|
1018 | 0 | ebc->seed1 = seed1; |
1019 | 0 | ebc->seed2 = seed2; |
1020 | 0 | } |
1021 | | |
1022 | | static void |
1023 | | even_better_line_both (EvenBetterCtx *ebc, uchar **dest, |
1024 | | const ET_Rll *const *src) |
1025 | 0 | { |
1026 | | #if 0 |
1027 | | int a[M], b[M]; |
1028 | | int a_sh[M], b_sh[M]; |
1029 | | int e_1_0[M], e_m1_1[M], e_0_1[M], e_1_1[M]; |
1030 | | int imraw[M]; |
1031 | | int iml[M]; |
1032 | | int i; |
1033 | | int im; |
1034 | | int *lut; |
1035 | | const ET_SrcPixel *ps; |
1036 | | int *pa, *pb, *piir, *pr; |
1037 | | int *pa_sh, *pb_sh, *pr_sh; |
1038 | | int r[M], rb, rg; |
1039 | | int r_sh[M]; |
1040 | | int *rblut; |
1041 | | int xd, xrem, xs; |
1042 | | uint32 seed1 = ebc->seed1; |
1043 | | uint32 seed2 = ebc->seed2; |
1044 | | uint32 sum; |
1045 | | int plane_idx; |
1046 | | int r_scratch[M]; |
1047 | | int src_idx; |
1048 | | int n_planes = ebc->n_planes; |
1049 | | int levels = ebc->levels; |
1050 | | #ifdef OLD_QUANT |
1051 | | int dith_mul = levels << 8; |
1052 | | #else |
1053 | | int dith_mul = (levels - 1) << 8; |
1054 | | #endif |
1055 | | int imo_mul = (1 << (EVEN_SHIFT + IMO_SHIFT)) / (levels - 1); |
1056 | | int aspect2 = ebc->aspect * ebc->aspect; |
1057 | | int *strengths = ebc->strengths; |
1058 | | int even_elo= ebc->even_elo; |
1059 | | int even_ehi= ebc->even_ehi; |
1060 | | int coupling; |
1061 | | int *c_line = ebc->c_line; |
1062 | | int even_c1 = ebc->even_c1; |
1063 | | int rand_shift = ebc->rand_shift; |
1064 | | int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1); |
1065 | | |
1066 | | xs = ebc->source_width; |
1067 | | xd = ebc->dest_width; |
1068 | | xrem = xd - xs; |
1069 | | |
1070 | | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
1071 | | { |
1072 | | a[plane_idx] = 1; |
1073 | | b[plane_idx] = aspect2; |
1074 | | a_sh[plane_idx] = 1; |
1075 | | b_sh[plane_idx] = aspect2; |
1076 | | r[plane_idx] = 0; |
1077 | | r_sh[plane_idx] = 0; |
1078 | | e_0_1[plane_idx] = 0; |
1079 | | e_1_0[plane_idx] = 0; |
1080 | | e_1_1[plane_idx] = 0; |
1081 | | } |
1082 | | |
1083 | | coupling = 0; |
1084 | | |
1085 | | src_idx = 0; |
1086 | | for (i = 0; i < xd; i++) |
1087 | | { |
1088 | | #ifdef FANCY_COUPLING |
1089 | | coupling += c_line[i]; |
1090 | | #else |
1091 | | coupling = 0; |
1092 | | #endif |
1093 | | |
1094 | | xrem += xs; |
1095 | | if (xrem >= xd) |
1096 | | { |
1097 | | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
1098 | | { |
1099 | | ps = src[plane_idx]; |
1100 | | imraw[plane_idx] = ps[src_idx]; |
1101 | | } |
1102 | | src_idx++; |
1103 | | xrem -= xd; |
1104 | | } |
1105 | | |
1106 | | /* Lookup image data and compute R for all planes. */ |
1107 | | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
1108 | | { |
1109 | | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
1110 | | ET_SrcPixel src_pixel; |
1111 | | int new_r; |
1112 | | |
1113 | | pr = ctx->r_line; |
1114 | | pa = ctx->a_line; |
1115 | | pb = ctx->b_line; |
1116 | | pr_sh = ctx->r_line_sh; |
1117 | | pa_sh = ctx->a_line_sh; |
1118 | | pb_sh = ctx->b_line_sh; |
1119 | | lut = ctx->lut; |
1120 | | rblut = ctx->rb_lut; |
1121 | | src_pixel = imraw[plane_idx]; |
1122 | | |
1123 | | im = lut[src_pixel]; |
1124 | | iml[plane_idx] = im; |
1125 | | rb = rblut[src_pixel]; |
1126 | | if (r[plane_idx] + a[plane_idx] < pr[i]) |
1127 | | { |
1128 | | r[plane_idx] += a[plane_idx]; |
1129 | | a[plane_idx] += 2; |
1130 | | } |
1131 | | else |
1132 | | { |
1133 | | a[plane_idx] = pa[i]; |
1134 | | b[plane_idx] = pb[i]; |
1135 | | r[plane_idx] = pr[i]; |
1136 | | } |
1137 | | if (r_sh[plane_idx] + a_sh[plane_idx] < pr_sh[i]) |
1138 | | { |
1139 | | r_sh[plane_idx] += a_sh[plane_idx]; |
1140 | | a_sh[plane_idx] += 2; |
1141 | | } |
1142 | | else |
1143 | | { |
1144 | | a_sh[plane_idx] = pa_sh[i]; |
1145 | | b_sh[plane_idx] = pb_sh[i]; |
1146 | | r_sh[plane_idx] = pr_sh[i]; |
1147 | | } |
1148 | | if (im == 0 || im == (1 << EVEN_SHIFT)) |
1149 | | { |
1150 | | r_scratch[plane_idx] = 0; |
1151 | | } |
1152 | | else |
1153 | | { |
1154 | | new_r = r[plane_idx]; |
1155 | | if (new_r > even_rlimit) |
1156 | | new_r = even_rlimit; |
1157 | | /* Should we store back with the limit? */ |
1158 | | rg = new_r << (EVEN_SHIFT - even_c1); |
1159 | | |
1160 | | new_r = r_sh[plane_idx]; |
1161 | | if (new_r > even_rlimit) |
1162 | | new_r = even_rlimit; |
1163 | | rg -= new_r << (EVEN_SHIFT - even_c1); |
1164 | | r_scratch[plane_idx] = rg - rb; |
1165 | | } |
1166 | | } |
1167 | | |
1168 | | /* Dither each plane. */ |
1169 | | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
1170 | | { |
1171 | | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
1172 | | uchar *dst_ptr = dest[plane_idx]; |
1173 | | int new_e_1_0; |
1174 | | int coupling_contribution; |
1175 | | |
1176 | | pr = ctx->r_line; |
1177 | | pa = ctx->a_line; |
1178 | | pb = ctx->b_line; |
1179 | | pr_sh = ctx->r_line_sh; |
1180 | | pa_sh = ctx->a_line_sh; |
1181 | | pb_sh = ctx->b_line_sh; |
1182 | | piir = ctx->iir_line; |
1183 | | |
1184 | | im = iml[plane_idx]; |
1185 | | e_m1_1[plane_idx] = e_0_1[plane_idx]; |
1186 | | e_0_1[plane_idx] = e_1_1[plane_idx]; |
1187 | | e_1_1[plane_idx] = i == xd - 1 ? 0 : piir[i + 1]; |
1188 | | new_e_1_0 = ((e_1_0[plane_idx] * 7 + e_m1_1[plane_idx] * 3 + |
1189 | | e_0_1[plane_idx] * 5 + e_1_1[plane_idx] * 1) >> 4); |
1190 | | if (im == 0) |
1191 | | { |
1192 | | dst_ptr[i] = 0; |
1193 | | } |
1194 | | else |
1195 | | { |
1196 | | int err; |
1197 | | int imo; |
1198 | | |
1199 | | err = new_e_1_0; |
1200 | | |
1201 | | err += r_scratch[plane_idx]; |
1202 | | |
1203 | | /* Add the two seeds together */ |
1204 | | sum = seed1 + seed2; |
1205 | | |
1206 | | /* If the add generated a carry, increment |
1207 | | * the result of the addition. |
1208 | | */ |
1209 | | if (sum < seed1 || sum < seed2) sum++; |
1210 | | |
1211 | | /* Seed2 becomes old seed1, seed1 becomes result */ |
1212 | | seed2 = seed1; |
1213 | | seed1 = sum; |
1214 | | |
1215 | | err -= (sum >> rand_shift) - (0x80000000 >> rand_shift); |
1216 | | |
1217 | | if (err < even_elo) |
1218 | | err = even_elo; |
1219 | | |
1220 | | else if (err > even_ehi) |
1221 | | err = even_ehi; |
1222 | | |
1223 | | #if 1 |
1224 | | err += coupling; |
1225 | | #endif |
1226 | | |
1227 | | #ifdef OLD_QUANT |
1228 | | imo = ((err + im) * dith_mul) >> (EVEN_SHIFT + 8); |
1229 | | #else |
1230 | | imo = ((err + im) * dith_mul + (1 << (EVEN_SHIFT + 7))) >> (EVEN_SHIFT + 8); |
1231 | | #endif |
1232 | | if (imo < 0) imo = 0; |
1233 | | else if (imo > levels - 1) imo = levels - 1; |
1234 | | dst_ptr[i] = imo; |
1235 | | coupling_contribution = im - ((imo * imo_mul) >> IMO_SHIFT); |
1236 | | new_e_1_0 += coupling_contribution; |
1237 | | coupling += (coupling_contribution * strengths[plane_idx]) >> 8; |
1238 | | } |
1239 | | if (dst_ptr[i] != 0) |
1240 | | { |
1241 | | a[plane_idx] = 1; |
1242 | | b[plane_idx] = aspect2; |
1243 | | r[plane_idx] = 0; |
1244 | | } |
1245 | | if (dst_ptr[i] != levels - 1) |
1246 | | { |
1247 | | a_sh[plane_idx] = 1; |
1248 | | b_sh[plane_idx] = aspect2; |
1249 | | r_sh[plane_idx] = 0; |
1250 | | } |
1251 | | pa[i] = a[plane_idx]; |
1252 | | pb[i] = b[plane_idx]; |
1253 | | pr[i] = r[plane_idx]; |
1254 | | pa_sh[i] = a_sh[plane_idx]; |
1255 | | pb_sh[i] = b_sh[plane_idx]; |
1256 | | pr_sh[i] = r_sh[plane_idx]; |
1257 | | piir[i] = new_e_1_0; |
1258 | | e_1_0[plane_idx] = new_e_1_0; |
1259 | | } |
1260 | | #ifdef FANCY_COUPLING |
1261 | | coupling = coupling >> 1; |
1262 | | c_line[i] = coupling; |
1263 | | #endif |
1264 | | } |
1265 | | |
1266 | | #ifdef FANCY_COUPLING |
1267 | | coupling = 0; |
1268 | | for (i = xd - 1; i >= 0; i--) |
1269 | | { |
1270 | | if (plane_idx == 0) |
1271 | | { |
1272 | | coupling = (coupling + c_line[i]) >> 1; |
1273 | | c_line[i] = (coupling - (coupling >> 4)); |
1274 | | } |
1275 | | } |
1276 | | #endif |
1277 | | |
1278 | | /* Update distances. */ |
1279 | | for (plane_idx = 0; plane_idx < n_planes; plane_idx++) |
1280 | | { |
1281 | | EBPlaneCtx *ctx = ebc->plane_ctx[plane_idx]; |
1282 | | int av, bv, rv; |
1283 | | int av_sh, bv_sh, rv_sh; |
1284 | | |
1285 | | pr = ctx->r_line; |
1286 | | pa = ctx->a_line; |
1287 | | pb = ctx->b_line; |
1288 | | pr_sh = ctx->r_line_sh; |
1289 | | pa_sh = ctx->a_line_sh; |
1290 | | pb_sh = ctx->b_line_sh; |
1291 | | |
1292 | | av = 1; |
1293 | | bv = 1; |
1294 | | rv = 0; |
1295 | | av_sh = 1; |
1296 | | bv_sh = 1; |
1297 | | rv_sh = 0; |
1298 | | for (i = xd - 1; i >= 0; i--) |
1299 | | { |
1300 | | if (rv + bv + av < pr[i] + pb[i]) |
1301 | | { |
1302 | | rv += av; |
1303 | | av += 2; |
1304 | | } |
1305 | | else |
1306 | | { |
1307 | | rv = pr[i]; |
1308 | | av = pa[i]; |
1309 | | bv = pb[i]; |
1310 | | } |
1311 | | if (rv > even_rlimit) rv = even_rlimit; |
1312 | | pa[i] = av; |
1313 | | pb[i] = bv + (aspect2 << 1); |
1314 | | pr[i] = rv + bv; |
1315 | | |
1316 | | if (rv_sh + bv_sh + av_sh < pr_sh[i] + pb_sh[i]) |
1317 | | { |
1318 | | rv_sh += av_sh; |
1319 | | av_sh += 2; |
1320 | | } |
1321 | | else |
1322 | | { |
1323 | | rv_sh = pr_sh[i]; |
1324 | | av_sh = pa_sh[i]; |
1325 | | bv_sh = pb_sh[i]; |
1326 | | } |
1327 | | if (rv_sh > even_rlimit) rv_sh = even_rlimit; |
1328 | | pa_sh[i] = av_sh; |
1329 | | pb_sh[i] = bv_sh + (aspect2 << 1); |
1330 | | pr_sh[i] = rv_sh + bv_sh; |
1331 | | } |
1332 | | } |
1333 | | |
1334 | | ebc->seed1 = seed1; |
1335 | | ebc->seed2 = seed2; |
1336 | | #endif |
1337 | 0 | } |
1338 | | |
1339 | | /** |
1340 | | * even_better_line_rll: Screen a line using Even ToneFS screeing. |
1341 | | * @ctx: An #EBPlaneCtx context. |
1342 | | * @dest: Array of destination buffers, 8 bpp pixels each. |
1343 | | * @src: Array of source buffers, runlength encoded. |
1344 | | * |
1345 | | * Screens a single line using Even ToneFS screening. |
1346 | | **/ |
1347 | | void |
1348 | | even_better_line_rll (EvenBetterCtx *ebc, uchar **dest, |
1349 | | const ET_Rll *const *src) |
1350 | 0 | { |
1351 | |
|
1352 | 0 | if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT) |
1353 | 0 | { |
1354 | 0 | int i; |
1355 | | |
1356 | | /* Note: we should calculate the actual number of runlength |
1357 | | codes here. As it is, it will just waste storage a bit. */ |
1358 | 0 | for (i = 0; i < ebc->n_planes; i++) |
1359 | 0 | fwrite (src[i], sizeof(ET_Rll), ebc->source_width, |
1360 | 0 | ebc->dump_file); |
1361 | 0 | } |
1362 | | #ifdef USE_VECTOR |
1363 | | if (ebc->using_vectors) |
1364 | | even_better_line_vector(ebc, dest, src); |
1365 | | else |
1366 | | #endif |
1367 | 0 | if (ebc->do_shadows) |
1368 | 0 | even_better_line_both (ebc, dest, src); |
1369 | 0 | else |
1370 | 0 | even_better_line_hi (ebc, dest, src); |
1371 | 0 | if (ebc->dump_file && ebc->dump_level >= EB_DUMP_INPUT) |
1372 | 0 | { |
1373 | 0 | int i; |
1374 | |
|
1375 | 0 | for (i = 0; i < ebc->n_planes; i++) |
1376 | 0 | fwrite (dest[i], 1, ebc->dest_width, |
1377 | 0 | ebc->dump_file); |
1378 | 0 | } |
1379 | 0 | } |
1380 | | |
1381 | | /** |
1382 | | * even_better_compress_rll: Compress a single scan line to RLL format. |
1383 | | * @dst: Destination buffer. |
1384 | | * @src: Source buffer. |
1385 | | * @width: Number of source pixels. |
1386 | | * |
1387 | | * Return value: number of runlength codes. |
1388 | | **/ |
1389 | | static int |
1390 | | even_better_compress_rll (ET_Rll *dst, const ET_SrcPixel *src, |
1391 | | int src_width, int dst_width) |
1392 | 0 | { |
1393 | 0 | int rll_idx; |
1394 | 0 | int i; |
1395 | 0 | int count; |
1396 | 0 | ET_SrcPixel last_val; |
1397 | 0 | int whole = dst_width / src_width; |
1398 | 0 | int frac = dst_width % src_width; |
1399 | 0 | int rem; |
1400 | |
|
1401 | 0 | rll_idx = 0; |
1402 | 0 | last_val = src[0]; |
1403 | 0 | count = whole; |
1404 | 0 | if (frac == 0) |
1405 | 0 | { |
1406 | 0 | for (i = 1; i < src_width; i++) |
1407 | 0 | { |
1408 | 0 | ET_SrcPixel val = src[i]; |
1409 | |
|
1410 | 0 | if (count > 0xffff - whole || val != last_val) |
1411 | 0 | { |
1412 | 0 | dst[rll_idx].length = count; |
1413 | 0 | dst[rll_idx].value = last_val; |
1414 | 0 | rll_idx++; |
1415 | 0 | last_val = val; |
1416 | 0 | count = 0; |
1417 | 0 | } |
1418 | 0 | count += whole; |
1419 | 0 | } |
1420 | 0 | } |
1421 | 0 | else |
1422 | 0 | { |
1423 | 0 | rem = frac; |
1424 | 0 | for (i = 1; i < src_width; i++) |
1425 | 0 | { |
1426 | 0 | ET_SrcPixel val = src[i]; |
1427 | |
|
1428 | 0 | if (count >= 0xffff - whole || val != last_val) |
1429 | 0 | { |
1430 | 0 | dst[rll_idx].length = count; |
1431 | 0 | dst[rll_idx].value = last_val; |
1432 | 0 | rll_idx++; |
1433 | 0 | last_val = val; |
1434 | 0 | count = 0; |
1435 | 0 | } |
1436 | 0 | count += whole; |
1437 | 0 | rem += frac; |
1438 | 0 | if (rem >= src_width) |
1439 | 0 | { |
1440 | 0 | count++; |
1441 | 0 | rem -= src_width; |
1442 | 0 | } |
1443 | 0 | } |
1444 | 0 | } |
1445 | 0 | dst[rll_idx].length = count; |
1446 | 0 | dst[rll_idx].value = last_val; |
1447 | 0 | rll_idx++; |
1448 | 0 | return rll_idx; |
1449 | 0 | } |
1450 | | |
1451 | | /** |
1452 | | * even_better_line: Screen a line using Even TonenFS screeing. |
1453 | | * @ctx: An #EBPlaneCtx context. |
1454 | | * @dest: Array of destination buffers, 8 bpp pixels each. |
1455 | | * @src: Array of source buffer, ET_SrcPixel pixels each. |
1456 | | * |
1457 | | * Screens a single line using Even ToneFS screening. |
1458 | | **/ |
1459 | | void |
1460 | | even_better_line (EvenBetterCtx *ebc, uchar **dest, |
1461 | | const ET_SrcPixel *const *src) |
1462 | 0 | { |
1463 | 0 | ET_Rll *rll_buf[M]; |
1464 | 0 | int i; |
1465 | 0 | int source_width = ebc->source_width; |
1466 | 0 | int dest_width = ebc->dest_width; |
1467 | |
|
1468 | | #ifdef USE_AVEC |
1469 | | if (ebc->using_vectors == 2) |
1470 | | { |
1471 | | even_better_line_fastprep (ebc, dest, src); |
1472 | | } |
1473 | | else |
1474 | | #endif |
1475 | 0 | { |
1476 | 0 | for (i = 0; i < ebc->n_planes; i++) |
1477 | 0 | { |
1478 | 0 | rll_buf[i] = (ET_Rll *)malloc (source_width * sizeof(ET_Rll)); |
1479 | 0 | even_better_compress_rll (rll_buf[i], src[i], source_width, dest_width); |
1480 | 0 | } |
1481 | 0 | even_better_line_rll (ebc, dest, (const ET_Rll * const *)rll_buf); |
1482 | 0 | for (i = 0; i < ebc->n_planes; i++) |
1483 | 0 | free (rll_buf[i]); |
1484 | 0 | } |
1485 | 0 | } |
1486 | | |
1487 | | /** |
1488 | | * even_better_plane_free: Free an #EBPlaneCtx context. |
1489 | | * @ctx: The #EBPlaneCtx context to free. |
1490 | | * |
1491 | | * Frees @ctx. |
1492 | | **/ |
1493 | | static void |
1494 | | even_better_plane_free (EBPlaneCtx *ctx) |
1495 | 0 | { |
1496 | 0 | free (ctx->rb_line); |
1497 | 0 | free (ctx->iir_line); |
1498 | 0 | free (ctx->r_line); |
1499 | 0 | free (ctx->a_line); |
1500 | 0 | free (ctx->b_line); |
1501 | 0 | free (ctx->lut); |
1502 | 0 | free (ctx->rb_lut); |
1503 | 0 | free (ctx->rs_lut); |
1504 | 0 | free (ctx->white_count_line); |
1505 | 0 | free (ctx); |
1506 | 0 | } |
1507 | | |
1508 | | static int |
1509 | | even_log2 (int x) |
1510 | 0 | { |
1511 | 0 | int y = 0; |
1512 | 0 | int z; |
1513 | |
|
1514 | 0 | for (z = x; z > 1; z = z >> 1) |
1515 | 0 | y++; |
1516 | 0 | return y; |
1517 | 0 | } |
1518 | | |
1519 | | /** |
1520 | | * even_better_new: Create new Even ToneFS screening context. |
1521 | | * @source_width: Width of source buffer. |
1522 | | * @dest_width: Width of destination buffer, in pixels. |
1523 | | * @lut: Lookup table for gray values. |
1524 | | * |
1525 | | * Creates a new context for Even ToneFS screening. |
1526 | | * |
1527 | | * If @dest_width is larger than @source_width, then input lines will |
1528 | | * be expanded using nearest-neighbor sampling. |
1529 | | * |
1530 | | * @lut should be an array of 256 values, one for each possible input |
1531 | | * gray value. @lut is a lookup table for gray values. Each value |
1532 | | * ranges from 0 (black) to 2^24 (white). |
1533 | | * |
1534 | | * Return value: The new #EBPlaneCtx context. |
1535 | | **/ |
1536 | | static EBPlaneCtx * |
1537 | | even_better_plane_new (const EvenBetterParams *params, EvenBetterCtx *ebc, |
1538 | | int plane_idx) |
1539 | 0 | { |
1540 | 0 | int source_width = params->source_width; |
1541 | 0 | int dest_width = params->dest_width; |
1542 | 0 | int *lut = params->luts[plane_idx]; |
1543 | 0 | EBPlaneCtx *result; |
1544 | 0 | int i; |
1545 | 0 | int *new_lut; |
1546 | 0 | int *rb_lut; |
1547 | 0 | char *rs_lut; |
1548 | 0 | double rbscale = eb_compute_rbscale(params); |
1549 | 0 | int even_c1 = ebc->even_c1; |
1550 | 0 | int even_rlimit = 1 << (30 - EVEN_SHIFT + even_c1); |
1551 | 0 | int do_shadows = params->do_shadows; |
1552 | 0 | int log2_levels; |
1553 | 0 | int rs_base; |
1554 | |
|
1555 | 0 | result = (EBPlaneCtx *)malloc (sizeof(EBPlaneCtx)); |
1556 | |
|
1557 | 0 | result->source_width = source_width; |
1558 | 0 | result->dest_width = dest_width; |
1559 | |
|
1560 | 0 | new_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int)); |
1561 | 0 | for (i = 0; i < ET_SRC_MAX + 1; i++) |
1562 | 0 | { |
1563 | 0 | int nli; |
1564 | |
|
1565 | 0 | if (lut == NULL) |
1566 | 0 | { |
1567 | 0 | #if ET_SRC_MAX == 255 |
1568 | 0 | nli = (i * 65793 + (i >> 7)) >> (24 - EVEN_SHIFT); |
1569 | | #else |
1570 | | nli = (i * ((double) (1 << EVEN_SHIFT)) / ET_SRC_MAX) + 0.5; |
1571 | | #endif |
1572 | 0 | } |
1573 | 0 | else |
1574 | 0 | nli = lut[i] >> (24 - EVEN_SHIFT); |
1575 | 0 | new_lut[i] = (1 << EVEN_SHIFT) - nli; |
1576 | 0 | } |
1577 | |
|
1578 | 0 | rb_lut = (int *)malloc ((ET_SRC_MAX + 1) * sizeof(int)); |
1579 | 0 | rs_lut = (char *)malloc ((ET_SRC_MAX + 1) * sizeof(int)); |
1580 | |
|
1581 | 0 | log2_levels = even_log2 (params->levels); |
1582 | 0 | rs_base = 35 - EVEN_SHIFT + log2_levels - params->rand_scale; |
1583 | |
|
1584 | 0 | for (i = 0; i <= ET_SRC_MAX; i++) |
1585 | 0 | { |
1586 | 0 | double rb; |
1587 | 0 | int nl = new_lut[i] * (params->levels - 1); |
1588 | 0 | int rs; |
1589 | |
|
1590 | 0 | if (nl == 0) |
1591 | 0 | rb = 0; |
1592 | 0 | else |
1593 | 0 | { |
1594 | 0 | rb = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl; |
1595 | 0 | if (rb > even_rlimit << (EVEN_SHIFT - even_c1)) |
1596 | 0 | rb = even_rlimit << (EVEN_SHIFT - even_c1); |
1597 | 0 | } |
1598 | |
|
1599 | 0 | rs = eb_compute_randshift(nl, rs_base, do_shadows, params->levels); |
1600 | 0 | rs_lut[i] = rs; |
1601 | |
|
1602 | 0 | if (params->do_shadows) |
1603 | 0 | { |
1604 | 0 | nl = ((1 << EVEN_SHIFT) - new_lut[i]) * (params->levels - 1); |
1605 | |
|
1606 | 0 | if (nl == 0) |
1607 | 0 | rb = 0; |
1608 | 0 | else |
1609 | 0 | { |
1610 | 0 | int rb_sh; |
1611 | 0 | rb_sh = (rbscale * (1 << (2 * EVEN_SHIFT - even_c1))) / nl; |
1612 | 0 | if (rb_sh > even_rlimit << (EVEN_SHIFT - even_c1)) |
1613 | 0 | rb_sh = even_rlimit << (EVEN_SHIFT - even_c1); |
1614 | 0 | rb -= rb_sh; |
1615 | 0 | } |
1616 | 0 | } |
1617 | 0 | rb_lut[i] = rb; |
1618 | |
|
1619 | 0 | } |
1620 | |
|
1621 | 0 | result->lut = new_lut; |
1622 | 0 | result->rb_lut = rb_lut; |
1623 | 0 | result->rs_lut = rs_lut; |
1624 | |
|
1625 | 0 | result->rb_line = (int *)calloc (dest_width, sizeof(int)); |
1626 | 0 | result->iir_line = (int *)calloc (dest_width, sizeof(int)); |
1627 | 0 | result->r_line = (int *)calloc (dest_width, sizeof(int)); |
1628 | 0 | result->a_line = (int *)calloc (dest_width, sizeof(int)); |
1629 | 0 | result->b_line = (int *)calloc (dest_width, sizeof(int)); |
1630 | 0 | result->white_count_line = (int *)calloc ((dest_width + 15) >> 4, sizeof(int)); |
1631 | 0 | if (do_shadows) |
1632 | 0 | { |
1633 | 0 | result->r_line_sh = (int *)calloc (dest_width, sizeof(int)); |
1634 | 0 | result->a_line_sh = (int *)calloc (dest_width, sizeof(int)); |
1635 | 0 | result->b_line_sh = (int *)calloc (dest_width, sizeof(int)); |
1636 | 0 | } |
1637 | 0 | else |
1638 | 0 | { |
1639 | 0 | result->r_line_sh = NULL; |
1640 | 0 | result->a_line_sh = NULL; |
1641 | 0 | result->b_line_sh = NULL; |
1642 | 0 | } |
1643 | 0 | for (i = 0; i < dest_width; i++) |
1644 | 0 | { |
1645 | 0 | result->a_line[i] = 1; |
1646 | 0 | result->b_line[i] = 1; |
1647 | 0 | result->iir_line[i] = -((rand () & 0x7fff) << 6) >> (24 - EVEN_SHIFT); |
1648 | 0 | if (do_shadows) |
1649 | 0 | { |
1650 | 0 | result->a_line_sh[i] = 1; |
1651 | 0 | result->b_line_sh[i] = 1; |
1652 | 0 | } |
1653 | 0 | } |
1654 | |
|
1655 | 0 | return result; |
1656 | 0 | } |
1657 | | |
1658 | | EvenBetterCtx * |
1659 | | even_better_new (const EvenBetterParams *params) |
1660 | 0 | { |
1661 | 0 | EvenBetterCtx *result = (EvenBetterCtx *)malloc (sizeof(EvenBetterCtx)); |
1662 | 0 | int n_planes = params->n_planes; |
1663 | 0 | int i; |
1664 | 0 | int log2_levels, log2_aspect; |
1665 | 0 | int using_vectors = 0; |
1666 | |
|
1667 | 0 | if (params->dump_file) |
1668 | 0 | { |
1669 | 0 | int header[5]; |
1670 | |
|
1671 | 0 | header[0] = 0x70644245; |
1672 | 0 | header[1] = 'M' * 0x1010000 + 'I' * 0x101; |
1673 | 0 | header[2] = EVENBETTER_VERSION; |
1674 | 0 | header[3] = ET_SRC_MAX; |
1675 | 0 | header[4] = sizeof(ET_SrcPixel); |
1676 | 0 | fwrite (header, sizeof(int), sizeof(header) / sizeof(header[0]), |
1677 | 0 | params->dump_file); |
1678 | 0 | if (params->dump_level >= EB_DUMP_PARAMS) |
1679 | 0 | { |
1680 | |
|
1681 | 0 | fwrite (params, 1, sizeof(EvenBetterParams), params->dump_file); |
1682 | 0 | } |
1683 | 0 | if (params->dump_level >= EB_DUMP_LUTS) |
1684 | 0 | { |
1685 | 0 | int i; |
1686 | 0 | for (i = 0; i < params->n_planes; i++) |
1687 | 0 | fwrite (params->luts[i], sizeof(int), ET_SRC_MAX + 1, |
1688 | 0 | params->dump_file); |
1689 | 0 | } |
1690 | 0 | } |
1691 | |
|
1692 | 0 | result->source_width = params->source_width; |
1693 | 0 | result->dest_width = params->dest_width; |
1694 | 0 | result->n_planes = n_planes; |
1695 | 0 | result->levels = params->levels; |
1696 | |
|
1697 | 0 | result->aspect = params->aspect; |
1698 | |
|
1699 | 0 | result->even_ehi = 0.6 * (1 << EVEN_SHIFT) / (params->levels - 1); |
1700 | 0 | result->even_elo = -result->even_ehi; |
1701 | |
|
1702 | 0 | result->strengths = (int *)malloc (sizeof(int) * n_planes); |
1703 | 0 | memcpy (result->strengths, params->strengths, |
1704 | 0 | sizeof(int) * n_planes); |
1705 | |
|
1706 | 0 | log2_levels = even_log2 (params->levels); |
1707 | 0 | log2_aspect = even_log2 (params->aspect); |
1708 | 0 | result->even_c1 = 6 + log2_aspect + log2_levels - params->even_c1_scale; |
1709 | 0 | result->do_shadows = params->do_shadows; |
1710 | |
|
1711 | 0 | result->c_line = (int *)calloc (params->dest_width, sizeof(int)); |
1712 | |
|
1713 | 0 | result->seed1 = 0x5324879f; |
1714 | 0 | result->seed2 = 0xb78d0945; |
1715 | |
|
1716 | 0 | result->dump_file = params->dump_file; |
1717 | 0 | result->dump_level = params->dump_level; |
1718 | |
|
1719 | | #ifdef USE_SSE2 |
1720 | | using_vectors = eb_test_sse2(); |
1721 | | #endif |
1722 | | #ifdef USE_AVEC |
1723 | | using_vectors = 1; /* todo: Altivec sensing */ |
1724 | | |
1725 | | /* select fastprep */ |
1726 | | if (sizeof(ET_SrcPixel) == 1 && using_vectors && params->gamma != 0) |
1727 | | using_vectors = 2; |
1728 | | |
1729 | | #endif |
1730 | |
|
1731 | | #ifdef USE_VECTOR |
1732 | | result->using_vectors = using_vectors; |
1733 | | #endif |
1734 | 0 | if (using_vectors) |
1735 | 0 | { |
1736 | | #ifdef USE_SSE2 |
1737 | | result->sse2_ctx = (eb_ctx_sse2 **)malloc(sizeof(eb_ctx_sse2 *) * |
1738 | | ((n_planes + 3) >> 2)); |
1739 | | for (i = 0; i < n_planes; i += 4) |
1740 | | { |
1741 | | int end_plane = i + 4 < n_planes ? i + 4 : n_planes; |
1742 | | result->sse2_ctx[i >> 2] = eb_ctx_sse2_new(params, i, end_plane); |
1743 | | } |
1744 | | #endif |
1745 | | #ifdef USE_AVEC |
1746 | | result->avec_ctx = (eb_ctx_avec **)malloc(sizeof(eb_ctx_avec *) * |
1747 | | ((n_planes + 3) >> 2)); |
1748 | | for (i = 0; i < n_planes; i += 4) |
1749 | | { |
1750 | | int end_plane = i + 4 < n_planes ? i + 4 : n_planes; |
1751 | | result->avec_ctx[i >> 2] = eb_ctx_avec_new(params, i, end_plane); |
1752 | | } |
1753 | | #endif |
1754 | 0 | result->plane_ctx = NULL; |
1755 | 0 | } |
1756 | 0 | else |
1757 | 0 | { |
1758 | 0 | result->plane_ctx = (EBPlaneCtx **)malloc(sizeof(EBPlaneCtx *) * n_planes); |
1759 | 0 | for (i = 0; i < n_planes; i++) |
1760 | 0 | result->plane_ctx[i] = even_better_plane_new (params, result, i); |
1761 | 0 | } |
1762 | 0 | return result; |
1763 | 0 | } |
1764 | | |
1765 | | /** |
1766 | | * even_better_free: Free an #EvenBetterCtx context. |
1767 | | * @ctx: The #EvenBetterCtx context to free. |
1768 | | * |
1769 | | * Frees @ctx. |
1770 | | **/ |
1771 | | void |
1772 | | even_better_free (EvenBetterCtx *ctx) |
1773 | 0 | { |
1774 | 0 | int i; |
1775 | 0 | int n_planes = ctx->n_planes; |
1776 | |
|
1777 | 0 | if (ctx->dump_file) |
1778 | 0 | fclose (ctx->dump_file); |
1779 | |
|
1780 | | #ifdef USE_VECTOR |
1781 | | if (ctx->using_vectors) |
1782 | | { |
1783 | | #ifdef USE_SSE2 |
1784 | | for (i = 0; i < n_planes; i += 4) |
1785 | | eb_ctx_sse2_free(ctx->sse2_ctx[i >> 2]); |
1786 | | free(ctx->sse2_ctx); |
1787 | | #endif |
1788 | | #ifdef USE_AVEC |
1789 | | for (i = 0; i < n_planes; i += 4) |
1790 | | eb_ctx_avec_free(ctx->avec_ctx[i >> 2]); |
1791 | | free(ctx->avec_ctx); |
1792 | | #endif |
1793 | | } |
1794 | | else |
1795 | | #endif |
1796 | 0 | { |
1797 | 0 | for (i = 0; i < n_planes; i++) |
1798 | 0 | even_better_plane_free (ctx->plane_ctx[i]); |
1799 | 0 | free(ctx->plane_ctx); |
1800 | 0 | } |
1801 | 0 | free (ctx->strengths); |
1802 | 0 | free (ctx->c_line); |
1803 | |
|
1804 | 0 | free (ctx); |
1805 | 0 | } |