/src/mupdf/source/fitz/draw-scale-simple.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2021 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | /* |
24 | | This code does smooth scaling of a pixmap. |
25 | | |
26 | | This function returns a new pixmap representing the area starting at (0,0) |
27 | | given by taking the source pixmap src, scaling it to width w, and height h, |
28 | | and then positioning it at (frac(x),frac(y)). |
29 | | |
30 | | This is a cut-down version of draw_scale.c that only copes with filters |
31 | | that return values strictly in the 0..1 range, and uses bytes for |
32 | | intermediate results rather than ints. |
33 | | */ |
34 | | |
35 | | #include "mupdf/fitz.h" |
36 | | |
37 | | #include "draw-imp.h" |
38 | | #include "pixmap-imp.h" |
39 | | |
40 | | #include <math.h> |
41 | | #include <string.h> |
42 | | #include <assert.h> |
43 | | #include <limits.h> |
44 | | |
45 | | /* Do we special case handling of single pixel high/wide images? The |
46 | | * 'purest' handling is given by not special casing them, but certain |
47 | | * files that use such images 'stack' them to give full images. Not |
48 | | * special casing them results in then being fainter and giving noticeable |
49 | | * rounding errors. |
50 | | */ |
51 | | #define SINGLE_PIXEL_SPECIALS |
52 | | |
53 | | /* |
54 | | Consider a row of source samples, src, of width src_w, positioned at x, |
55 | | scaled to width dst_w. |
56 | | |
57 | | src[i] is centred at: x + (i + 0.5)*dst_w/src_w |
58 | | |
59 | | Therefore the distance between the centre of the jth output pixel and |
60 | | the centre of the ith source sample is: |
61 | | |
62 | | dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) |
63 | | |
64 | | When scaling up, therefore: |
65 | | |
66 | | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
67 | | (for all ints i) |
68 | | |
69 | | This can be simplified by noticing that filters are only non zero within |
70 | | a given filter width (henceforth called W). So: |
71 | | |
72 | | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
73 | | (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) |
74 | | |
75 | | When scaling down, each filtered source sample is stretched to be wider |
76 | | to avoid aliasing issues. This effectively reduces the distance between |
77 | | centres. |
78 | | |
79 | | dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) |
80 | | (where F = dst_w/src_w) |
81 | | (for ints i, s.t. (j-W)/F < i < (j+W)/F) |
82 | | |
83 | | */ |
84 | | |
85 | | typedef struct fz_scale_filter |
86 | | { |
87 | | int width; |
88 | | float (*fn)(struct fz_scale_filter *, float); |
89 | | } fz_scale_filter; |
90 | | |
91 | | /* Image scale filters */ |
92 | | |
93 | | static float |
94 | | triangle(fz_scale_filter *filter, float f) |
95 | 0 | { |
96 | 0 | if (f >= 1) |
97 | 0 | return 0; |
98 | 0 | return 1-f; |
99 | 0 | } |
100 | | |
101 | | static float |
102 | | box(fz_scale_filter *filter, float f) |
103 | 0 | { |
104 | 0 | if (f >= 0.5f) |
105 | 0 | return 0; |
106 | 0 | return 1; |
107 | 0 | } |
108 | | |
109 | | static float |
110 | | simple(fz_scale_filter *filter, float x) |
111 | 21.5M | { |
112 | 21.5M | if (x >= 1) |
113 | 32.1k | return 0; |
114 | 21.4M | return 1 + (2*x - 3)*x*x; |
115 | 21.5M | } |
116 | | |
117 | | fz_scale_filter fz_scale_filter_box = { 1, box }; |
118 | | fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; |
119 | | fz_scale_filter fz_scale_filter_simple = { 1, simple }; |
120 | | |
121 | | /* |
122 | | We build ourselves a set of tables to contain the precalculated weights |
123 | | for a given set of scale settings. |
124 | | |
125 | | The first dst_w entries in index are the index into index of the |
126 | | sets of weight for each destination pixel. |
127 | | |
128 | | Each of the sets of weights is a set of values consisting of: |
129 | | the minimum source pixel index used for this destination pixel |
130 | | the number of weights used for this destination pixel |
131 | | the weights themselves |
132 | | |
133 | | So to calculate dst[i] we do the following: |
134 | | |
135 | | weights = &index[index[i]]; |
136 | | min = *weights++; |
137 | | len = *weights++; |
138 | | dst[i] = 0; |
139 | | while (--len > 0) |
140 | | dst[i] += src[min++] * *weights++ |
141 | | |
142 | | in addition, we guarantee that at the end of this process weights will now |
143 | | point to the weights value for dst pixel i+1. |
144 | | |
145 | | In the simplest version of this algorithm, we would scale the whole image |
146 | | horizontally first into a temporary buffer, then scale that temporary |
147 | | buffer again vertically to give us our result. Using such a simple |
148 | | algorithm would mean that could use the same style of weights for both |
149 | | horizontal and vertical scaling. |
150 | | |
151 | | Unfortunately, this would also require a large temporary buffer, |
152 | | particularly in the case where we are scaling up. |
153 | | |
154 | | We therefore modify the algorithm as follows; we scale scanlines from the |
155 | | source image horizontally into a temporary buffer, until we have all the |
156 | | contributors for a given output scanline. We then produce that output |
157 | | scanline from the temporary buffer. In this way we restrict the height |
158 | | of the temporary buffer to a small fraction of the final size. |
159 | | |
160 | | Unfortunately, this means that the pseudo code for recombining a |
161 | | scanline of fully scaled pixels is as follows: |
162 | | |
163 | | weights = &index[index[y]]; |
164 | | min = *weights++; |
165 | | len = *weights++; |
166 | | for (x=0 to dst_w) |
167 | | min2 = min |
168 | | len2 = len |
169 | | weights2 = weights |
170 | | dst[x] = 0; |
171 | | while (--len2 > 0) |
172 | | dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ |
173 | | |
174 | | i.e. it requires a % operation for every source pixel - this is typically |
175 | | expensive. |
176 | | |
177 | | To avoid this, we alter the order in which vertical weights are stored, |
178 | | so that they are ordered in the same order as the temporary buffer lines |
179 | | would appear. This simplifies the algorithm to: |
180 | | |
181 | | weights = &index[index[y]]; |
182 | | min = *weights++; |
183 | | len = *weights++; |
184 | | for (x=0 to dst_w) |
185 | | min2 = 0 |
186 | | len2 = len |
187 | | weights2 = weights |
188 | | dst[x] = 0; |
189 | | while (--len2 > 0) |
190 | | dst[x] += temp[i][min2++] * *weights2++ |
191 | | |
192 | | This means that len may be larger than it needs to be (due to the |
193 | | possible inclusion of a zero weight row or two), but in practise this |
194 | | is only an increase of 1 or 2 at worst. |
195 | | |
196 | | We implement this by generating the weights as normal (but ensuring we |
197 | | leave enough space) and then reordering afterwards. |
198 | | |
199 | | */ |
200 | | |
201 | | /* This structure is accessed from ARM code - bear this in mind before |
202 | | * altering it! */ |
203 | | typedef struct |
204 | | { |
205 | | int flip; /* true if outputting reversed */ |
206 | | int count; /* number of output pixels we have records for in this table */ |
207 | | int max_len; /* Maximum number of weights for any one output pixel */ |
208 | | int n; /* number of components (src->n) */ |
209 | | int new_line; /* True if no weights for the current output pixel */ |
210 | | int patch_l; /* How many output pixels we skip over */ |
211 | | int index[1]; |
212 | | } fz_weights; |
213 | | |
214 | | struct fz_scale_cache |
215 | | { |
216 | | int src_w; |
217 | | float x; |
218 | | float dst_w; |
219 | | fz_scale_filter *filter; |
220 | | int vertical; |
221 | | int dst_w_int; |
222 | | int patch_l; |
223 | | int patch_r; |
224 | | int n; |
225 | | int flip; |
226 | | fz_weights *weights; |
227 | | }; |
228 | | |
229 | | static fz_weights * |
230 | | new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) |
231 | 44.9k | { |
232 | 44.9k | int max_len; |
233 | 44.9k | fz_weights *weights; |
234 | | |
235 | 44.9k | if (src_w > dst_w) |
236 | 44.9k | { |
237 | | /* Scaling down, so there will be a maximum of |
238 | | * 2*filterwidth*src_w/dst_w src pixels |
239 | | * contributing to each dst pixel. */ |
240 | 44.9k | max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); |
241 | 44.9k | if (max_len > src_w) |
242 | 6.36k | max_len = src_w; |
243 | 44.9k | } |
244 | 0 | else |
245 | 0 | { |
246 | | /* Scaling up, so there will be a maximum of |
247 | | * 2*filterwidth src pixels contributing to each dst pixel. |
248 | | */ |
249 | 0 | max_len = 2 * filter->width; |
250 | 0 | } |
251 | | /* We need the size of the struct, |
252 | | * plus patch_w*sizeof(int) for the index |
253 | | * plus (2+max_len)*sizeof(int) for the weights |
254 | | * plus room for an extra set of weights for reordering. |
255 | | */ |
256 | 44.9k | weights = fz_malloc(ctx, sizeof(*weights)+(size_t)(max_len+3)*(patch_w+1)*sizeof(int)); |
257 | 44.9k | if (!weights) |
258 | 0 | return NULL; |
259 | 44.9k | weights->count = -1; |
260 | 44.9k | weights->max_len = max_len; |
261 | 44.9k | weights->index[0] = patch_w; |
262 | 44.9k | weights->n = n; |
263 | 44.9k | weights->patch_l = patch_l; |
264 | 44.9k | weights->flip = flip; |
265 | 44.9k | return weights; |
266 | 44.9k | } |
267 | | |
268 | | /* j is destination pixel in the patch_l..patch_l+patch_w range */ |
269 | | static void |
270 | | init_weights(fz_weights *weights, int j) |
271 | 7.23M | { |
272 | 7.23M | int index; |
273 | | |
274 | 7.23M | j -= weights->patch_l; |
275 | 7.23M | assert(weights->count == j-1); |
276 | 7.23M | weights->count++; |
277 | 7.23M | weights->new_line = 1; |
278 | 7.23M | if (j == 0) |
279 | 44.9k | index = weights->index[0]; |
280 | 7.19M | else |
281 | 7.19M | { |
282 | 7.19M | index = weights->index[j-1]; |
283 | 7.19M | index += 2 + weights->index[index+1]; |
284 | 7.19M | } |
285 | 7.23M | weights->index[j] = index; /* row pointer */ |
286 | 7.23M | weights->index[index] = 0; /* min */ |
287 | 7.23M | weights->index[index+1] = 0; /* len */ |
288 | 7.23M | } |
289 | | |
290 | | static void |
291 | | insert_weight(fz_weights *weights, int j, int i, int weight) |
292 | 19.6M | { |
293 | 19.6M | int min, len, index; |
294 | | |
295 | | /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ |
296 | 19.6M | j -= weights->patch_l; |
297 | 19.6M | if (weights->new_line) |
298 | 7.23M | { |
299 | | /* New line */ |
300 | 7.23M | weights->new_line = 0; |
301 | 7.23M | index = weights->index[j]; /* row pointer */ |
302 | 7.23M | weights->index[index] = i; /* min */ |
303 | 7.23M | weights->index[index+1] = 0; /* len */ |
304 | 7.23M | } |
305 | 19.6M | index = weights->index[j]; |
306 | 19.6M | min = weights->index[index++]; |
307 | 19.6M | len = weights->index[index++]; |
308 | 19.6M | while (i < min) |
309 | 0 | { |
310 | | /* This only happens in rare cases, but we need to insert |
311 | | * one earlier. In exceedingly rare cases we may need to |
312 | | * insert more than one earlier. */ |
313 | 0 | int k; |
314 | |
|
315 | 0 | for (k = len; k > 0; k--) |
316 | 0 | { |
317 | 0 | weights->index[index+k] = weights->index[index+k-1]; |
318 | 0 | } |
319 | 0 | weights->index[index] = 0; |
320 | 0 | min--; |
321 | 0 | len++; |
322 | 0 | weights->index[index-2] = min; |
323 | 0 | weights->index[index-1] = len; |
324 | 0 | } |
325 | 19.6M | if (i-min >= len) |
326 | 19.6M | { |
327 | | /* The usual case */ |
328 | 19.6M | while (i-min >= ++len) |
329 | 0 | { |
330 | 0 | weights->index[index+len-1] = 0; |
331 | 0 | } |
332 | 19.6M | assert(len-1 == i-min); |
333 | 19.6M | weights->index[index+i-min] = weight; |
334 | 19.6M | weights->index[index-1] = len; |
335 | 19.6M | assert(len <= weights->max_len); |
336 | 19.6M | } |
337 | 0 | else |
338 | 0 | { |
339 | | /* Infrequent case */ |
340 | 0 | weights->index[index+i-min] += weight; |
341 | 0 | } |
342 | 19.6M | } |
343 | | |
344 | | static void |
345 | | add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, |
346 | | float x, float F, float G, int src_w, float dst_w) |
347 | 21.5M | { |
348 | 21.5M | float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); |
349 | 21.5M | float f; |
350 | 21.5M | int weight; |
351 | | |
352 | 21.5M | dist *= G; |
353 | 21.5M | if (dist < 0) |
354 | 10.7M | dist = -dist; |
355 | 21.5M | f = filter->fn(filter, dist)*F; |
356 | 21.5M | weight = (int)(256*f+0.5f); |
357 | | |
358 | | /* Ensure i is in range */ |
359 | 21.5M | if (i < 0 || i >= src_w) |
360 | 763k | return; |
361 | 20.7M | if (weight != 0) |
362 | 19.6M | insert_weight(weights, j, i, weight); |
363 | 20.7M | } |
364 | | |
365 | | static void |
366 | | reorder_weights(fz_weights *weights, int j, int src_w) |
367 | 2.93M | { |
368 | 2.93M | int idx = weights->index[j - weights->patch_l]; |
369 | 2.93M | int min = weights->index[idx++]; |
370 | 2.93M | int len = weights->index[idx++]; |
371 | 2.93M | int max = weights->max_len; |
372 | 2.93M | int tmp = idx+max; |
373 | 2.93M | int i, off; |
374 | | |
375 | | /* Copy into the temporary area */ |
376 | 2.93M | memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); |
377 | | |
378 | | /* Pad out if required */ |
379 | 2.93M | assert(len <= max); |
380 | 2.93M | assert(min+len <= src_w); |
381 | 2.93M | off = 0; |
382 | 2.93M | if (len < max) |
383 | 1.33M | { |
384 | 1.33M | memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); |
385 | 1.33M | len = max; |
386 | 1.33M | if (min + len > src_w) |
387 | 16.1k | { |
388 | 16.1k | off = min + len - src_w; |
389 | 16.1k | min = src_w - len; |
390 | 16.1k | weights->index[idx-2] = min; |
391 | 16.1k | } |
392 | 1.33M | weights->index[idx-1] = len; |
393 | 1.33M | } |
394 | | |
395 | | /* Copy back into the proper places */ |
396 | 12.6M | for (i = 0; i < len; i++) |
397 | 9.68M | { |
398 | 9.68M | weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; |
399 | 9.68M | } |
400 | 2.93M | } |
401 | | |
402 | | /* Due to rounding and edge effects, the sums for the weights sometimes don't |
403 | | * add up to 256. This causes visible rendering effects. Therefore, we take |
404 | | * pains to ensure that they 1) never exceed 256, and 2) add up to exactly |
405 | | * 256 for all pixels that are completely covered. See bug #691629. */ |
406 | | static void |
407 | | check_weights(fz_weights *weights, int j, int w, float x, float wf) |
408 | 7.23M | { |
409 | 7.23M | int idx, len; |
410 | 7.23M | int sum = 0; |
411 | 7.23M | int max = -256; |
412 | 7.23M | int maxidx = 0; |
413 | 7.23M | int i; |
414 | | |
415 | 7.23M | idx = weights->index[j - weights->patch_l]; |
416 | 7.23M | idx++; /* min */ |
417 | 7.23M | len = weights->index[idx++]; |
418 | | |
419 | 26.9M | for(i=0; i < len; i++) |
420 | 19.6M | { |
421 | 19.6M | int v = weights->index[idx++]; |
422 | 19.6M | sum += v; |
423 | 19.6M | if (v > max) |
424 | 12.7M | { |
425 | 12.7M | max = v; |
426 | 12.7M | maxidx = idx; |
427 | 12.7M | } |
428 | 19.6M | } |
429 | | /* If we aren't the first or last pixel, OR if the sum is too big |
430 | | * then adjust it. */ |
431 | 7.23M | if (((j != 0) && (j != w-1)) || (sum > 256)) |
432 | 7.15M | weights->index[maxidx-1] += 256-sum; |
433 | | /* Otherwise, if we are the first pixel, and it's fully covered, then |
434 | | * adjust it. */ |
435 | 80.6k | else if ((j == 0) && (x < 0.0001f) && (sum != 256)) |
436 | 39.4k | weights->index[maxidx-1] += 256-sum; |
437 | | /* Finally, if we are the last pixel, and it's fully covered, then |
438 | | * adjust it. */ |
439 | 41.2k | else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256)) |
440 | 32.4k | weights->index[maxidx-1] += 256-sum; |
441 | 7.23M | } |
442 | | |
443 | | static int |
444 | | window_fix(int l, int *rp, float window, float centre) |
445 | 169 | { |
446 | 169 | int r = *rp; |
447 | 338 | while (centre - l > window) |
448 | 169 | l++; |
449 | 338 | while (r - centre > window) |
450 | 169 | r--; |
451 | 169 | *rp = r; |
452 | 169 | return l; |
453 | 169 | } |
454 | | |
455 | | static fz_weights * |
456 | | make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) |
457 | 74.8k | { |
458 | 74.8k | fz_weights *weights; |
459 | 74.8k | float F, G; |
460 | 74.8k | float window; |
461 | 74.8k | int j; |
462 | | |
463 | 74.8k | if (cache) |
464 | 74.8k | { |
465 | 74.8k | if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && |
466 | 74.8k | cache->filter == filter && cache->vertical == vertical && |
467 | 74.8k | cache->dst_w_int == dst_w_int && |
468 | 74.8k | cache->patch_l == patch_l && cache->patch_r == patch_r && |
469 | 74.8k | cache->n == n && cache->flip == flip) |
470 | 29.8k | { |
471 | 29.8k | return cache->weights; |
472 | 29.8k | } |
473 | 44.9k | cache->src_w = src_w; |
474 | 44.9k | cache->x = x; |
475 | 44.9k | cache->dst_w = dst_w; |
476 | 44.9k | cache->filter = filter; |
477 | 44.9k | cache->vertical = vertical; |
478 | 44.9k | cache->dst_w_int = dst_w_int; |
479 | 44.9k | cache->patch_l = patch_l; |
480 | 44.9k | cache->patch_r = patch_r; |
481 | 44.9k | cache->n = n; |
482 | 44.9k | cache->flip = flip; |
483 | 44.9k | fz_free(ctx, cache->weights); |
484 | 44.9k | cache->weights = NULL; |
485 | 44.9k | } |
486 | | |
487 | 44.9k | if (dst_w < src_w) |
488 | 44.9k | { |
489 | | /* Scaling down */ |
490 | 44.9k | F = dst_w / src_w; |
491 | 44.9k | G = 1; |
492 | 44.9k | } |
493 | 0 | else |
494 | 0 | { |
495 | | /* Scaling up */ |
496 | 0 | F = 1; |
497 | 0 | G = src_w / dst_w; |
498 | 0 | } |
499 | 44.9k | window = filter->width / F; |
500 | 44.9k | weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); |
501 | 44.9k | if (!weights) |
502 | 0 | return NULL; |
503 | 7.28M | for (j = patch_l; j < patch_r; j++) |
504 | 7.23M | { |
505 | | /* find the position of the centre of dst[j] in src space */ |
506 | 7.23M | float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; |
507 | 7.23M | int l, r; |
508 | 7.23M | l = ceilf(centre - window); |
509 | 7.23M | r = floorf(centre + window); |
510 | | |
511 | | /* Now, due to the vagaries of floating point, if centre is large, l |
512 | | * and r can actually end up further than 2*window apart. All we care |
513 | | * about in this case is that we don't crash! We want a cheap correction |
514 | | * that avoids the assert and doesn't cost too much in the normal case. |
515 | | * This should do. */ |
516 | 7.23M | if (r - l > 2 * window) |
517 | 169 | l = window_fix(l, &r, window, centre); |
518 | | |
519 | 7.23M | init_weights(weights, j); |
520 | 28.7M | for (; l <= r; l++) |
521 | 21.5M | { |
522 | 21.5M | add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); |
523 | 21.5M | } |
524 | 7.23M | if (weights->new_line) |
525 | 227 | { |
526 | | /* In very rare cases (bug 706764) we might not actually |
527 | | * have generated any non-zero weights for this destination |
528 | | * pixel. Just use the central pixel. */ |
529 | 227 | int src_x = floorf(centre); |
530 | 227 | if (src_x >= src_w) |
531 | 0 | src_x = src_w-1; |
532 | 227 | if (src_x < 0) |
533 | 2 | src_x = 0; |
534 | 227 | insert_weight(weights, j, src_x, 1); |
535 | 227 | } |
536 | 7.23M | check_weights(weights, j, dst_w_int, x, dst_w); |
537 | 7.23M | if (vertical) |
538 | 2.93M | { |
539 | 2.93M | reorder_weights(weights, j, src_w); |
540 | 2.93M | } |
541 | 7.23M | } |
542 | 44.9k | weights->count++; /* weights->count = dst_w_int now */ |
543 | 44.9k | if (cache) |
544 | 44.9k | { |
545 | 44.9k | cache->weights = weights; |
546 | 44.9k | } |
547 | 44.9k | return weights; |
548 | 44.9k | } |
549 | | |
550 | | static void |
551 | | scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
552 | 0 | { |
553 | 0 | const int *contrib = &weights->index[weights->index[0]]; |
554 | 0 | int len, i, j, n; |
555 | 0 | const unsigned char *min; |
556 | 0 | int tmp[FZ_MAX_COLORS]; |
557 | 0 | int *t = tmp; |
558 | |
|
559 | 0 | n = weights->n; |
560 | 0 | for (j = 0; j < n; j++) |
561 | 0 | tmp[j] = 128; |
562 | 0 | if (weights->flip) |
563 | 0 | { |
564 | 0 | dst += (weights->count-1)*n; |
565 | 0 | for (i=weights->count; i > 0; i--) |
566 | 0 | { |
567 | 0 | min = &src[n * *contrib++]; |
568 | 0 | len = *contrib++; |
569 | 0 | while (len-- > 0) |
570 | 0 | { |
571 | 0 | for (j = n; j > 0; j--) |
572 | 0 | *t++ += *min++ * *contrib; |
573 | 0 | t -= n; |
574 | 0 | contrib++; |
575 | 0 | } |
576 | 0 | for (j = n; j > 0; j--) |
577 | 0 | { |
578 | 0 | *dst++ = (unsigned char)(*t>>8); |
579 | 0 | *t++ = 128; |
580 | 0 | } |
581 | 0 | t -= n; |
582 | 0 | dst -= n*2; |
583 | 0 | } |
584 | 0 | } |
585 | 0 | else |
586 | 0 | { |
587 | 0 | for (i=weights->count; i > 0; i--) |
588 | 0 | { |
589 | 0 | min = &src[n * *contrib++]; |
590 | 0 | len = *contrib++; |
591 | 0 | while (len-- > 0) |
592 | 0 | { |
593 | 0 | for (j = n; j > 0; j--) |
594 | 0 | *t++ += *min++ * *contrib; |
595 | 0 | t -= n; |
596 | 0 | contrib++; |
597 | 0 | } |
598 | 0 | for (j = n; j > 0; j--) |
599 | 0 | { |
600 | 0 | *dst++ = (unsigned char)(*t>>8); |
601 | 0 | *t++ = 128; |
602 | 0 | } |
603 | 0 | t -= n; |
604 | 0 | } |
605 | 0 | } |
606 | 0 | } |
607 | | |
608 | | #ifdef ARCH_ARM |
609 | | |
610 | | static void |
611 | | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
612 | | __attribute__((naked)); |
613 | | |
614 | | static void |
615 | | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
616 | | __attribute__((naked)); |
617 | | |
618 | | static void |
619 | | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
620 | | __attribute__((naked)); |
621 | | |
622 | | static void |
623 | | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
624 | | __attribute__((naked)); |
625 | | |
626 | | static void |
627 | | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
628 | | __attribute__((naked)); |
629 | | |
630 | | static void |
631 | | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
632 | | __attribute__((naked)); |
633 | | |
634 | | static void |
635 | | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
636 | | { |
637 | | asm volatile( |
638 | | ENTER_ARM |
639 | | ".syntax unified\n" |
640 | | "stmfd r13!,{r4-r7,r9,r14} \n" |
641 | | "@ r0 = dst \n" |
642 | | "@ r1 = src \n" |
643 | | "@ r2 = weights \n" |
644 | | "ldr r12,[r2],#4 @ r12= flip \n" |
645 | | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
646 | | "ldr r4, [r2] @ r4 = index[0] \n" |
647 | | "cmp r12,#0 @ if (flip) \n" |
648 | | "beq 5f @ { \n" |
649 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
650 | | "add r0, r0, r3 @ dst += count \n" |
651 | | "1: \n" |
652 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
653 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
654 | | "mov r5, #128 @ r5 = a = 128 \n" |
655 | | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
656 | | "subs r9, r9, #1 @ len-- \n" |
657 | | "blt 3f @ while (len >= 0) \n" |
658 | | "2: @ { \n" |
659 | | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
660 | | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
661 | | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
662 | | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
663 | | "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" |
664 | | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
665 | | "mla r5, r12,r14,r5 @ g += r14 * r12 \n" |
666 | | "bge 2b @ } \n" |
667 | | "3: \n" |
668 | | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
669 | | "strb r5,[r0, #-1]! @ *--dst=a \n" |
670 | | "subs r3, r3, #1 @ i-- \n" |
671 | | "bgt 1b @ \n" |
672 | | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
673 | | "5:" |
674 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
675 | | "6:" |
676 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
677 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
678 | | "mov r5, #128 @ r5 = a = 128 \n" |
679 | | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
680 | | "subs r9, r9, #1 @ len-- \n" |
681 | | "blt 9f @ while (len > 0) \n" |
682 | | "7: @ { \n" |
683 | | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
684 | | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
685 | | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
686 | | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
687 | | "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" |
688 | | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
689 | | "mla r5, r12,r14,r5 @ a += r14 * r12 \n" |
690 | | "bge 7b @ } \n" |
691 | | "9: \n" |
692 | | "mov r5, r5, LSR #8 @ a >>= 8 \n" |
693 | | "strb r5, [r0], #1 @ *dst++=a \n" |
694 | | "subs r3, r3, #1 @ i-- \n" |
695 | | "bgt 6b @ \n" |
696 | | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
697 | | ENTER_THUMB |
698 | | ); |
699 | | } |
700 | | |
701 | | static void |
702 | | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
703 | | { |
704 | | asm volatile( |
705 | | ENTER_ARM |
706 | | "stmfd r13!,{r4-r6,r9-r11,r14} \n" |
707 | | "@ r0 = dst \n" |
708 | | "@ r1 = src \n" |
709 | | "@ r2 = weights \n" |
710 | | "ldr r12,[r2],#4 @ r12= flip \n" |
711 | | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
712 | | "ldr r4, [r2] @ r4 = index[0] \n" |
713 | | "cmp r12,#0 @ if (flip) \n" |
714 | | "beq 4f @ { \n" |
715 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
716 | | "add r0, r0, r3, LSL #1 @ dst += 2*count \n" |
717 | | "1: \n" |
718 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
719 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
720 | | "mov r5, #128 @ r5 = g = 128 \n" |
721 | | "mov r6, #128 @ r6 = a = 128 \n" |
722 | | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
723 | | "cmp r9, #0 @ while (len-- > 0) \n" |
724 | | "beq 3f @ { \n" |
725 | | "2: \n" |
726 | | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
727 | | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
728 | | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
729 | | "subs r9, r9, #1 @ r9 = len-- \n" |
730 | | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
731 | | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
732 | | "bgt 2b @ } \n" |
733 | | "3: \n" |
734 | | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
735 | | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
736 | | "strb r5, [r0, #-2]! @ *--dst=a \n" |
737 | | "strb r6, [r0, #1] @ *--dst=g \n" |
738 | | "subs r3, r3, #1 @ i-- \n" |
739 | | "bgt 1b @ \n" |
740 | | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
741 | | "4:" |
742 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
743 | | "5:" |
744 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
745 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
746 | | "mov r5, #128 @ r5 = g = 128 \n" |
747 | | "mov r6, #128 @ r6 = a = 128 \n" |
748 | | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
749 | | "cmp r9, #0 @ while (len-- > 0) \n" |
750 | | "beq 7f @ { \n" |
751 | | "6: \n" |
752 | | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
753 | | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
754 | | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
755 | | "subs r9, r9, #1 @ r9 = len-- \n" |
756 | | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
757 | | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
758 | | "bgt 6b @ } \n" |
759 | | "7: \n" |
760 | | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
761 | | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
762 | | "strb r5, [r0], #1 @ *dst++=g \n" |
763 | | "strb r6, [r0], #1 @ *dst++=a \n" |
764 | | "subs r3, r3, #1 @ i-- \n" |
765 | | "bgt 5b @ \n" |
766 | | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
767 | | ENTER_THUMB |
768 | | ); |
769 | | } |
770 | | |
771 | | static void |
772 | | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
773 | | { |
774 | | asm volatile( |
775 | | ENTER_ARM |
776 | | "stmfd r13!,{r4-r11,r14} \n" |
777 | | "@ r0 = dst \n" |
778 | | "@ r1 = src \n" |
779 | | "@ r2 = weights \n" |
780 | | "ldr r12,[r2],#4 @ r12= flip \n" |
781 | | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
782 | | "ldr r4, [r2] @ r4 = index[0] \n" |
783 | | "cmp r12,#0 @ if (flip) \n" |
784 | | "beq 4f @ { \n" |
785 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
786 | | "add r0, r0, r3, LSL #1 @ \n" |
787 | | "add r0, r0, r3 @ dst += 3*count \n" |
788 | | "1: \n" |
789 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
790 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
791 | | "mov r5, #128 @ r5 = r = 128 \n" |
792 | | "mov r6, #128 @ r6 = g = 128 \n" |
793 | | "add r7, r1, r4, LSL #1 @ \n" |
794 | | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
795 | | "mov r7, #128 @ r7 = b = 128 \n" |
796 | | "cmp r9, #0 @ while (len-- > 0) \n" |
797 | | "beq 3f @ { \n" |
798 | | "2: \n" |
799 | | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
800 | | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
801 | | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
802 | | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
803 | | "subs r9, r9, #1 @ r9 = len-- \n" |
804 | | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
805 | | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
806 | | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
807 | | "bgt 2b @ } \n" |
808 | | "3: \n" |
809 | | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
810 | | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
811 | | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
812 | | "strb r5, [r0, #-3]! @ *--dst=r \n" |
813 | | "strb r6, [r0, #1] @ *--dst=g \n" |
814 | | "strb r7, [r0, #2] @ *--dst=b \n" |
815 | | "subs r3, r3, #1 @ i-- \n" |
816 | | "bgt 1b @ \n" |
817 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
818 | | "4:" |
819 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
820 | | "5:" |
821 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
822 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
823 | | "mov r5, #128 @ r5 = r = 128 \n" |
824 | | "mov r6, #128 @ r6 = g = 128 \n" |
825 | | "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" |
826 | | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
827 | | "mov r7, #128 @ r7 = b = 128 \n" |
828 | | "cmp r9, #0 @ while (len-- > 0) \n" |
829 | | "beq 7f @ { \n" |
830 | | "6: \n" |
831 | | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
832 | | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
833 | | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
834 | | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
835 | | "subs r9, r9, #1 @ r9 = len-- \n" |
836 | | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
837 | | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
838 | | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
839 | | "bgt 6b @ } \n" |
840 | | "7: \n" |
841 | | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
842 | | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
843 | | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
844 | | "strb r5, [r0], #1 @ *dst++=r \n" |
845 | | "strb r6, [r0], #1 @ *dst++=g \n" |
846 | | "strb r7, [r0], #1 @ *dst++=b \n" |
847 | | "subs r3, r3, #1 @ i-- \n" |
848 | | "bgt 5b @ \n" |
849 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
850 | | ENTER_THUMB |
851 | | ); |
852 | | } |
853 | | |
854 | | static void |
855 | | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
856 | | { |
857 | | asm volatile( |
858 | | ENTER_ARM |
859 | | "stmfd r13!,{r4-r11,r14} \n" |
860 | | "@ r0 = dst \n" |
861 | | "@ r1 = src \n" |
862 | | "@ r2 = weights \n" |
863 | | "ldr r12,[r2],#4 @ r12= flip \n" |
864 | | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
865 | | "ldr r4, [r2] @ r4 = index[0] \n" |
866 | | "ldr r5,=0x00800080 @ r5 = rounding \n" |
867 | | "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" |
868 | | "cmp r12,#0 @ if (flip) \n" |
869 | | "beq 4f @ { \n" |
870 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
871 | | "add r0, r0, r3, LSL #2 @ dst += 4*count \n" |
872 | | "1: \n" |
873 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
874 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
875 | | "mov r7, r5 @ r7 = b = rounding \n" |
876 | | "mov r8, r5 @ r8 = a = rounding \n" |
877 | | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
878 | | "cmp r9, #0 @ while (len-- > 0) \n" |
879 | | "beq 3f @ { \n" |
880 | | "2: \n" |
881 | | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
882 | | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
883 | | "subs r9, r9, #1 @ r9 = len-- \n" |
884 | | "and r12,r6, r11 @ r12 = __22__00 \n" |
885 | | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
886 | | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
887 | | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
888 | | "bgt 2b @ } \n" |
889 | | "3: \n" |
890 | | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
891 | | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
892 | | "orr r7, r7, r8 @ r7 = 33221100 \n" |
893 | | "str r7, [r0, #-4]! @ *--dst=r \n" |
894 | | "subs r3, r3, #1 @ i-- \n" |
895 | | "bgt 1b @ \n" |
896 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
897 | | "4: \n" |
898 | | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
899 | | "5: \n" |
900 | | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
901 | | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
902 | | "mov r7, r5 @ r7 = b = rounding \n" |
903 | | "mov r8, r5 @ r8 = a = rounding \n" |
904 | | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
905 | | "cmp r9, #0 @ while (len-- > 0) \n" |
906 | | "beq 7f @ { \n" |
907 | | "6: \n" |
908 | | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
909 | | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
910 | | "subs r9, r9, #1 @ r9 = len-- \n" |
911 | | "and r12,r6, r11 @ r12 = __22__00 \n" |
912 | | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
913 | | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
914 | | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
915 | | "bgt 6b @ } \n" |
916 | | "7: \n" |
917 | | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
918 | | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
919 | | "orr r7, r7, r8 @ r7 = 33221100 \n" |
920 | | "str r7, [r0], #4 @ *dst++=r \n" |
921 | | "subs r3, r3, #1 @ i-- \n" |
922 | | "bgt 5b @ \n" |
923 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
924 | | ENTER_THUMB |
925 | | ); |
926 | | } |
927 | | |
928 | | static void |
929 | | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
930 | | { |
931 | | asm volatile( |
932 | | ENTER_ARM |
933 | | "stmfd r13!,{r4-r11,r14} \n" |
934 | | "@ r0 = dst \n" |
935 | | "@ r1 = src \n" |
936 | | "@ r2 = &weights->index[0] \n" |
937 | | "@ r3 = width \n" |
938 | | "@ r12= row \n" |
939 | | "ldr r14,[r13,#4*9] @ r14= n \n" |
940 | | "ldr r12,[r13,#4*10] @ r12= row \n" |
941 | | "add r2, r2, #24 @ r2 = weights->index \n" |
942 | | "mul r3, r14, r3 @ r3 = width *= n \n" |
943 | | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
944 | | "add r2, r2, #4 @ r2 = &index[1] \n" |
945 | | "subs r6, r3, #4 @ r6 = x = width-4 \n" |
946 | | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
947 | | " @ r14= len = *contrib \n" |
948 | | "blt 4f @ while (x >= 0) { \n" |
949 | | #ifndef ARCH_UNALIGNED_OK |
950 | | "tst r3, #3 @ if ((r3 & 3) \n" |
951 | | "tsteq r1, #3 @ || (r1 & 3)) \n" |
952 | | "bne 4f @ can't do fast code \n" |
953 | | #endif |
954 | | "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" |
955 | | "1: \n" |
956 | | "ldr r7, =0x00800080 @ r5 = val0 = round \n" |
957 | | "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" |
958 | | " @ r1 = min = src \n" |
959 | | " @ r2 = contrib2-4 \n" |
960 | | "movs r8, r14 @ r8 = len2 = len \n" |
961 | | "mov r5, r7 @ r7 = val1 = round \n" |
962 | | "ble 3f @ while (len2-- > 0) { \n" |
963 | | "2: \n" |
964 | | "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" |
965 | | "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" |
966 | | "subs r8, r8, #1 @ len2-- \n" |
967 | | "and r11,r9, r12 @ r11= __22__00 \n" |
968 | | "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" |
969 | | "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" |
970 | | "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" |
971 | | "bgt 2b @ } \n" |
972 | | "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" |
973 | | "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" |
974 | | "orr r5, r5, r7 @ r5 = 33221100 \n" |
975 | | "3: \n" |
976 | | "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" |
977 | | "subs r6, r6, #4 @ x-- \n" |
978 | | "add r1, r1, #4 @ src++ \n" |
979 | | "str r5, [r0], #4 @ *dst++ = val \n" |
980 | | "bge 1b @ \n" |
981 | | "4: @ } (Less than 4 to go) \n" |
982 | | "adds r6, r6, #4 @ r6 = x += 4 \n" |
983 | | "beq 8f @ if (x == 0) done \n" |
984 | | "5: \n" |
985 | | "mov r5, r1 @ r5 = min = src \n" |
986 | | "mov r7, #128 @ r7 = val = 128 \n" |
987 | | "movs r8, r14 @ r8 = len2 = len \n" |
988 | | "add r9, r2, #4 @ r9 = contrib2 \n" |
989 | | "ble 7f @ while (len2-- > 0) { \n" |
990 | | "6: \n" |
991 | | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
992 | | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
993 | | "subs r8, r8, #1 @ len2-- \n" |
994 | | "@ stall r12 \n" |
995 | | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
996 | | "bgt 6b @ } \n" |
997 | | "7: \n" |
998 | | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
999 | | "subs r6, r6, #1 @ x-- \n" |
1000 | | "add r1, r1, #1 @ src++ \n" |
1001 | | "strb r7, [r0], #1 @ *dst++ = val \n" |
1002 | | "bgt 5b @ \n" |
1003 | | "8: \n" |
1004 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
1005 | | ".ltorg \n" |
1006 | | ENTER_THUMB |
1007 | | ); |
1008 | | } |
1009 | | |
1010 | | static void |
1011 | | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
1012 | | { |
1013 | | asm volatile( |
1014 | | ENTER_ARM |
1015 | | "stmfd r13!,{r4-r11,r14} \n" |
1016 | | "mov r11,#255 @ r11= 255 \n" |
1017 | | "ldr r12,[r13,#4*10] @ r12= row \n" |
1018 | | "@ r0 = dst \n" |
1019 | | "@ r1 = src \n" |
1020 | | "@ r2 = &weights->index[0] \n" |
1021 | | "@ r3 = width \n" |
1022 | | "@ r11= 255 \n" |
1023 | | "@ r12= row \n" |
1024 | | "add r2, r2, #24 @ r2 = weights->index \n" |
1025 | | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
1026 | | "add r2, r2, #4 @ r2 = &index[1] \n" |
1027 | | "mov r6, r3 @ r6 = x = width \n" |
1028 | | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
1029 | | " @ r14= len = *contrib \n" |
1030 | | "5: \n" |
1031 | | "ldr r4,[r13,#4*9] @ r10= nn = n \n" |
1032 | | "1: \n" |
1033 | | "mov r5, r1 @ r5 = min = src \n" |
1034 | | "mov r7, #128 @ r7 = val = 128 \n" |
1035 | | "movs r8, r14 @ r8 = len2 = len \n" |
1036 | | "add r9, r2, #4 @ r9 = contrib2 \n" |
1037 | | "ble 7f @ while (len2-- > 0) { \n" |
1038 | | "6: \n" |
1039 | | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
1040 | | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
1041 | | "subs r8, r8, #1 @ len2-- \n" |
1042 | | "@ stall r12 \n" |
1043 | | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
1044 | | "bgt 6b @ } \n" |
1045 | | "7: \n" |
1046 | | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
1047 | | "subs r4, r4, #1 @ r4 = nn-- \n" |
1048 | | "add r1, r1, #1 @ src++ \n" |
1049 | | "strb r7, [r0], #1 @ *dst++ = val \n" |
1050 | | "bgt 1b @ \n" |
1051 | | "subs r6, r6, #1 @ x-- \n" |
1052 | | "strb r11,[r0], #1 @ *dst++ = 255 \n" |
1053 | | "bgt 5b @ \n" |
1054 | | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
1055 | | ".ltorg \n" |
1056 | | ENTER_THUMB |
1057 | | ); |
1058 | | } |
1059 | | #else |
1060 | | |
1061 | | static void |
1062 | | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1063 | 3.54M | { |
1064 | 3.54M | const int *contrib = &weights->index[weights->index[0]]; |
1065 | 3.54M | int len, i; |
1066 | 3.54M | const unsigned char *min; |
1067 | | |
1068 | 3.54M | assert(weights->n == 1); |
1069 | 3.54M | if (weights->flip) |
1070 | 84.9k | { |
1071 | 84.9k | dst += weights->count; |
1072 | 1.98M | for (i=weights->count; i > 0; i--) |
1073 | 1.89M | { |
1074 | 1.89M | int val = 128; |
1075 | 1.89M | min = &src[*contrib++]; |
1076 | 1.89M | len = *contrib++; |
1077 | 6.62M | while (len-- > 0) |
1078 | 4.72M | { |
1079 | 4.72M | val += *min++ * *contrib++; |
1080 | 4.72M | } |
1081 | 1.89M | *--dst = (unsigned char)(val>>8); |
1082 | 1.89M | } |
1083 | 84.9k | } |
1084 | 3.46M | else |
1085 | 3.46M | { |
1086 | 734M | for (i=weights->count; i > 0; i--) |
1087 | 730M | { |
1088 | 730M | int val = 128; |
1089 | 730M | min = &src[*contrib++]; |
1090 | 730M | len = *contrib++; |
1091 | 2.67G | while (len-- > 0) |
1092 | 1.94G | { |
1093 | 1.94G | val += *min++ * *contrib++; |
1094 | 1.94G | } |
1095 | 730M | *dst++ = (unsigned char)(val>>8); |
1096 | 730M | } |
1097 | 3.46M | } |
1098 | 3.54M | } |
1099 | | |
1100 | | static void |
1101 | | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1102 | 1.29k | { |
1103 | 1.29k | const int *contrib = &weights->index[weights->index[0]]; |
1104 | 1.29k | int len, i; |
1105 | 1.29k | const unsigned char *min; |
1106 | | |
1107 | 1.29k | assert(weights->n == 2); |
1108 | 1.29k | if (weights->flip) |
1109 | 0 | { |
1110 | 0 | dst += 2*weights->count; |
1111 | 0 | for (i=weights->count; i > 0; i--) |
1112 | 0 | { |
1113 | 0 | int c1 = 128; |
1114 | 0 | int c2 = 128; |
1115 | 0 | min = &src[2 * *contrib++]; |
1116 | 0 | len = *contrib++; |
1117 | 0 | while (len-- > 0) |
1118 | 0 | { |
1119 | 0 | c1 += *min++ * *contrib; |
1120 | 0 | c2 += *min++ * *contrib++; |
1121 | 0 | } |
1122 | 0 | *--dst = (unsigned char)(c2>>8); |
1123 | 0 | *--dst = (unsigned char)(c1>>8); |
1124 | 0 | } |
1125 | 0 | } |
1126 | 1.29k | else |
1127 | 1.29k | { |
1128 | 1.19M | for (i=weights->count; i > 0; i--) |
1129 | 1.19M | { |
1130 | 1.19M | int c1 = 128; |
1131 | 1.19M | int c2 = 128; |
1132 | 1.19M | min = &src[2 * *contrib++]; |
1133 | 1.19M | len = *contrib++; |
1134 | 4.29M | while (len-- > 0) |
1135 | 3.10M | { |
1136 | 3.10M | c1 += *min++ * *contrib; |
1137 | 3.10M | c2 += *min++ * *contrib++; |
1138 | 3.10M | } |
1139 | 1.19M | *dst++ = (unsigned char)(c1>>8); |
1140 | 1.19M | *dst++ = (unsigned char)(c2>>8); |
1141 | 1.19M | } |
1142 | 1.29k | } |
1143 | 1.29k | } |
1144 | | |
1145 | | static void |
1146 | | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1147 | 1.64M | { |
1148 | 1.64M | const int *contrib = &weights->index[weights->index[0]]; |
1149 | 1.64M | int len, i; |
1150 | 1.64M | const unsigned char *min; |
1151 | | |
1152 | 1.64M | assert(weights->n == 3); |
1153 | 1.64M | if (weights->flip) |
1154 | 4.15k | { |
1155 | 4.15k | dst += 3*weights->count; |
1156 | 746k | for (i=weights->count; i > 0; i--) |
1157 | 742k | { |
1158 | 742k | int c1 = 128; |
1159 | 742k | int c2 = 128; |
1160 | 742k | int c3 = 128; |
1161 | 742k | min = &src[3 * *contrib++]; |
1162 | 742k | len = *contrib++; |
1163 | 3.06M | while (len-- > 0) |
1164 | 2.31M | { |
1165 | 2.31M | int c = *contrib++; |
1166 | 2.31M | c1 += *min++ * c; |
1167 | 2.31M | c2 += *min++ * c; |
1168 | 2.31M | c3 += *min++ * c; |
1169 | 2.31M | } |
1170 | 742k | *--dst = (unsigned char)(c3>>8); |
1171 | 742k | *--dst = (unsigned char)(c2>>8); |
1172 | 742k | *--dst = (unsigned char)(c1>>8); |
1173 | 742k | } |
1174 | 4.15k | } |
1175 | 1.63M | else |
1176 | 1.63M | { |
1177 | 307M | for (i=weights->count; i > 0; i--) |
1178 | 306M | { |
1179 | 306M | int c1 = 128; |
1180 | 306M | int c2 = 128; |
1181 | 306M | int c3 = 128; |
1182 | 306M | min = &src[3 * *contrib++]; |
1183 | 306M | len = *contrib++; |
1184 | 1.12G | while (len-- > 0) |
1185 | 814M | { |
1186 | 814M | int c = *contrib++; |
1187 | 814M | c1 += *min++ * c; |
1188 | 814M | c2 += *min++ * c; |
1189 | 814M | c3 += *min++ * c; |
1190 | 814M | } |
1191 | 306M | *dst++ = (unsigned char)(c1>>8); |
1192 | 306M | *dst++ = (unsigned char)(c2>>8); |
1193 | 306M | *dst++ = (unsigned char)(c3>>8); |
1194 | 306M | } |
1195 | 1.63M | } |
1196 | 1.64M | } |
1197 | | |
1198 | | static void |
1199 | | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1200 | 77.5k | { |
1201 | 77.5k | const int *contrib = &weights->index[weights->index[0]]; |
1202 | 77.5k | int len, i; |
1203 | 77.5k | const unsigned char *min; |
1204 | | |
1205 | 77.5k | assert(weights->n == 4); |
1206 | 77.5k | if (weights->flip) |
1207 | 0 | { |
1208 | 0 | dst += 4*weights->count; |
1209 | 0 | for (i=weights->count; i > 0; i--) |
1210 | 0 | { |
1211 | 0 | int r = 128; |
1212 | 0 | int g = 128; |
1213 | 0 | int b = 128; |
1214 | 0 | int a = 128; |
1215 | 0 | min = &src[4 * *contrib++]; |
1216 | 0 | len = *contrib++; |
1217 | 0 | while (len-- > 0) |
1218 | 0 | { |
1219 | 0 | r += *min++ * *contrib; |
1220 | 0 | g += *min++ * *contrib; |
1221 | 0 | b += *min++ * *contrib; |
1222 | 0 | a += *min++ * *contrib++; |
1223 | 0 | } |
1224 | 0 | *--dst = (unsigned char)(a>>8); |
1225 | 0 | *--dst = (unsigned char)(b>>8); |
1226 | 0 | *--dst = (unsigned char)(g>>8); |
1227 | 0 | *--dst = (unsigned char)(r>>8); |
1228 | 0 | } |
1229 | 0 | } |
1230 | 77.5k | else |
1231 | 77.5k | { |
1232 | 18.2M | for (i=weights->count; i > 0; i--) |
1233 | 18.1M | { |
1234 | 18.1M | int r = 128; |
1235 | 18.1M | int g = 128; |
1236 | 18.1M | int b = 128; |
1237 | 18.1M | int a = 128; |
1238 | 18.1M | min = &src[4 * *contrib++]; |
1239 | 18.1M | len = *contrib++; |
1240 | 67.2M | while (len-- > 0) |
1241 | 49.0M | { |
1242 | 49.0M | r += *min++ * *contrib; |
1243 | 49.0M | g += *min++ * *contrib; |
1244 | 49.0M | b += *min++ * *contrib; |
1245 | 49.0M | a += *min++ * *contrib++; |
1246 | 49.0M | } |
1247 | 18.1M | *dst++ = (unsigned char)(r>>8); |
1248 | 18.1M | *dst++ = (unsigned char)(g>>8); |
1249 | 18.1M | *dst++ = (unsigned char)(b>>8); |
1250 | 18.1M | *dst++ = (unsigned char)(a>>8); |
1251 | 18.1M | } |
1252 | 77.5k | } |
1253 | 77.5k | } |
1254 | | |
1255 | | static void |
1256 | | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
1257 | 3.35M | { |
1258 | 3.35M | const int *contrib = &weights->index[weights->index[row]]; |
1259 | 3.35M | int len, x; |
1260 | 3.35M | int width = w * n; |
1261 | | |
1262 | 3.35M | contrib++; /* Skip min */ |
1263 | 3.35M | len = *contrib++; |
1264 | 1.28G | for (x=width; x > 0; x--) |
1265 | 1.27G | { |
1266 | 1.27G | const unsigned char *min = src; |
1267 | 1.27G | int val = 128; |
1268 | 1.27G | int len2 = len; |
1269 | 1.27G | const int *contrib2 = contrib; |
1270 | | |
1271 | 5.21G | while (len2-- > 0) |
1272 | 3.93G | { |
1273 | 3.93G | val += *min * *contrib2++; |
1274 | 3.93G | min += width; |
1275 | 3.93G | } |
1276 | 1.27G | *dst++ = (unsigned char)(val>>8); |
1277 | 1.27G | src++; |
1278 | 1.27G | } |
1279 | 3.35M | } |
1280 | | |
1281 | | static void |
1282 | | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
1283 | 2.24k | { |
1284 | 2.24k | const int *contrib = &weights->index[weights->index[row]]; |
1285 | 2.24k | int len, x; |
1286 | 2.24k | int width = w * n; |
1287 | | |
1288 | 2.24k | contrib++; /* Skip min */ |
1289 | 2.24k | len = *contrib++; |
1290 | 19.4k | for (x=w; x > 0; x--) |
1291 | 17.2k | { |
1292 | 17.2k | int nn; |
1293 | 34.4k | for (nn = n; nn > 0; nn--) |
1294 | 17.2k | { |
1295 | 17.2k | const unsigned char *min = src; |
1296 | 17.2k | int val = 128; |
1297 | 17.2k | int len2 = len; |
1298 | 17.2k | const int *contrib2 = contrib; |
1299 | | |
1300 | 156k | while (len2-- > 0) |
1301 | 139k | { |
1302 | 139k | val += *min * *contrib2++; |
1303 | 139k | min += width; |
1304 | 139k | } |
1305 | 17.2k | *dst++ = (unsigned char)(val>>8); |
1306 | 17.2k | src++; |
1307 | 17.2k | } |
1308 | 17.2k | *dst++ = 255; |
1309 | 17.2k | } |
1310 | 2.24k | } |
1311 | | #endif |
1312 | | |
1313 | | #ifdef SINGLE_PIXEL_SPECIALS |
1314 | | static void |
1315 | | duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride) |
1316 | 2 | { |
1317 | 2 | int i; |
1318 | | |
1319 | 4 | for (i = n; i > 0; i--) |
1320 | 2 | *dst++ = *src++; |
1321 | 2 | if (forcealpha) |
1322 | 0 | *dst++ = 255; |
1323 | 2 | n += forcealpha; |
1324 | 3 | for (i = w-1; i > 0; i--) |
1325 | 1 | { |
1326 | 1 | memcpy(dst, dst-n, n); |
1327 | 1 | dst += n; |
1328 | 1 | } |
1329 | 2 | w *= n; |
1330 | 2 | dst -= w; |
1331 | 2 | h--; |
1332 | 2 | while (h--) |
1333 | 0 | { |
1334 | 0 | memcpy(dst+stride, dst, w); |
1335 | 0 | dst += stride; |
1336 | 0 | } |
1337 | 2 | } |
1338 | | |
1339 | | static void |
1340 | | scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha) |
1341 | 211 | { |
1342 | 211 | const int *contrib = &weights->index[weights->index[0]]; |
1343 | 211 | int min, len, i, j, n, nf; |
1344 | 211 | int tmp[FZ_MAX_COLORS]; |
1345 | | |
1346 | 211 | n = weights->n; |
1347 | 211 | nf = n + forcealpha; |
1348 | | /* Scale a single row */ |
1349 | 422 | for (j = 0; j < nf; j++) |
1350 | 211 | tmp[j] = 128; |
1351 | 211 | if (weights->flip) |
1352 | 0 | { |
1353 | 0 | dst += (weights->count-1)*nf; |
1354 | 0 | for (i=weights->count; i > 0; i--) |
1355 | 0 | { |
1356 | 0 | min = *contrib++; |
1357 | 0 | len = *contrib++; |
1358 | 0 | min *= n; |
1359 | 0 | while (len-- > 0) |
1360 | 0 | { |
1361 | 0 | int c = *contrib++; |
1362 | 0 | for (j = 0; j < n; j++) |
1363 | 0 | tmp[j] += src[min++] * c; |
1364 | 0 | if (forcealpha) |
1365 | 0 | tmp[j] += 255 * c; |
1366 | 0 | } |
1367 | 0 | for (j = 0; j < nf; j++) |
1368 | 0 | { |
1369 | 0 | *dst++ = (unsigned char)(tmp[j]>>8); |
1370 | 0 | tmp[j] = 128; |
1371 | 0 | } |
1372 | 0 | dst -= 2*nf; |
1373 | 0 | } |
1374 | 0 | dst += nf + dstride; |
1375 | 0 | } |
1376 | 211 | else |
1377 | 211 | { |
1378 | 530 | for (i=weights->count; i > 0; i--) |
1379 | 319 | { |
1380 | 319 | min = *contrib++; |
1381 | 319 | len = *contrib++; |
1382 | 319 | min *= n; |
1383 | 7.95k | while (len-- > 0) |
1384 | 7.63k | { |
1385 | 7.63k | int c = *contrib++; |
1386 | 15.2k | for (j = 0; j < n; j++) |
1387 | 7.63k | tmp[j] += src[min++] * c; |
1388 | 7.63k | if (forcealpha) |
1389 | 0 | tmp[j] += 255 * c; |
1390 | 7.63k | } |
1391 | 638 | for (j = 0; j < nf; j++) |
1392 | 319 | { |
1393 | 319 | *dst++ = (unsigned char)(tmp[j]>>8); |
1394 | 319 | tmp[j] = 128; |
1395 | 319 | } |
1396 | 319 | } |
1397 | 211 | dst += dstride - weights->count * nf; |
1398 | 211 | } |
1399 | | /* And then duplicate it h times */ |
1400 | 211 | nf *= weights->count; |
1401 | 215 | while (--h > 0) |
1402 | 4 | { |
1403 | 4 | memcpy(dst, dst-dstride, nf); |
1404 | 4 | dst += dstride; |
1405 | 4 | } |
1406 | 211 | } |
1407 | | |
1408 | | static void |
1409 | | scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha) |
1410 | 35 | { |
1411 | 35 | const int *contrib = &weights->index[weights->index[0]]; |
1412 | 35 | int min, len, i, j; |
1413 | 35 | int tmp[FZ_MAX_COLORS]; |
1414 | 35 | int nf = n + forcealpha; |
1415 | | |
1416 | 124 | for (j = 0; j < nf; j++) |
1417 | 89 | tmp[j] = 128; |
1418 | 35 | if (weights->flip) |
1419 | 0 | { |
1420 | 0 | src_w = (src_w-1)*sstride; |
1421 | 0 | for (i=weights->count; i > 0; i--) |
1422 | 0 | { |
1423 | | /* Scale the next pixel in the column */ |
1424 | 0 | min = *contrib++; |
1425 | 0 | len = *contrib++; |
1426 | 0 | min = src_w-min*sstride; |
1427 | 0 | while (len-- > 0) |
1428 | 0 | { |
1429 | 0 | int c = *contrib++; |
1430 | 0 | for (j = 0; j < n; j++) |
1431 | 0 | tmp[j] += src[min+j] * c; |
1432 | 0 | if (forcealpha) |
1433 | 0 | tmp[j] += 255 * c; |
1434 | 0 | min -= sstride; |
1435 | 0 | } |
1436 | 0 | for (j = 0; j < nf; j++) |
1437 | 0 | { |
1438 | 0 | *dst++ = (unsigned char)(tmp[j]>>8); |
1439 | 0 | tmp[j] = 128; |
1440 | 0 | } |
1441 | | /* And then duplicate it across the row */ |
1442 | 0 | for (j = (w-1)*nf; j > 0; j--) |
1443 | 0 | { |
1444 | 0 | *dst = dst[-nf]; |
1445 | 0 | dst++; |
1446 | 0 | } |
1447 | 0 | dst += dstride - w*nf; |
1448 | 0 | } |
1449 | 0 | } |
1450 | 35 | else |
1451 | 35 | { |
1452 | 70 | for (i=weights->count; i > 0; i--) |
1453 | 35 | { |
1454 | | /* Scale the next pixel in the column */ |
1455 | 35 | min = *contrib++; |
1456 | 35 | len = *contrib++; |
1457 | 35 | min *= sstride; |
1458 | 27.0k | while (len-- > 0) |
1459 | 27.0k | { |
1460 | 27.0k | int c = *contrib++; |
1461 | 107k | for (j = 0; j < n; j++) |
1462 | 80.8k | tmp[j] += src[min+j] * c; |
1463 | 27.0k | if (forcealpha) |
1464 | 0 | tmp[j] += 255 * c; |
1465 | 27.0k | min += sstride; |
1466 | 27.0k | } |
1467 | 124 | for (j = 0; j < nf; j++) |
1468 | 89 | { |
1469 | 89 | *dst++ = (unsigned char)(tmp[j]>>8); |
1470 | 89 | tmp[j] = 128; |
1471 | 89 | } |
1472 | | /* And then duplicate it across the row */ |
1473 | 35 | for (j = (w-1)*nf; j > 0; j--) |
1474 | 0 | { |
1475 | 0 | *dst = dst[-nf]; |
1476 | 0 | dst++; |
1477 | 0 | } |
1478 | 35 | dst += dstride - w*nf; |
1479 | 35 | } |
1480 | 35 | } |
1481 | 35 | } |
1482 | | #endif /* SINGLE_PIXEL_SPECIALS */ |
1483 | | |
1484 | | static void |
1485 | | get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp) |
1486 | 1.26k | { |
1487 | 1.26k | const int *contrib = &rows->index[rows->index[0]]; |
1488 | 1.26k | int len, i, t, b; |
1489 | | |
1490 | | /* Calculate the edge alpha values */ |
1491 | 1.26k | contrib++; /* Skip min */ |
1492 | 1.26k | len = *contrib++; |
1493 | 1.26k | t = 0; |
1494 | 31.7k | while (len--) |
1495 | 30.4k | t += *contrib++; |
1496 | 3.22k | for (i=rows->count-2; i > 0; i--) |
1497 | 1.96k | { |
1498 | 1.96k | contrib++; /* Skip min */ |
1499 | 1.96k | len = *contrib++; |
1500 | 1.96k | contrib += len; |
1501 | 1.96k | } |
1502 | 1.26k | b = 0; |
1503 | 1.26k | if (i == 0) |
1504 | 1.23k | { |
1505 | 1.23k | contrib++; |
1506 | 1.23k | len = *contrib++; |
1507 | 38.0k | while (len--) |
1508 | 36.8k | b += *contrib++; |
1509 | 1.23k | } |
1510 | 1.26k | if (rows->flip && i == 0) |
1511 | 69 | { |
1512 | 69 | *tp = b; |
1513 | 69 | *bp = t; |
1514 | 69 | } |
1515 | 1.19k | else |
1516 | 1.19k | { |
1517 | 1.19k | *tp = t; |
1518 | 1.19k | *bp = b; |
1519 | 1.19k | } |
1520 | 1.26k | } |
1521 | | |
1522 | | static void |
1523 | | adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols) |
1524 | 630 | { |
1525 | 630 | int t, l, r, b, tl, tr, bl, br, x, y; |
1526 | 630 | unsigned char *dp = pix->samples; |
1527 | 630 | int w = pix->w; |
1528 | 630 | int n = pix->n; |
1529 | 630 | int span = w >= 2 ? (w-1)*n : 0; |
1530 | 630 | int stride = pix->stride; |
1531 | | |
1532 | 630 | get_alpha_edge_values(rows, &t, &b); |
1533 | 630 | get_alpha_edge_values(cols, &l, &r); |
1534 | | |
1535 | 630 | l = (255 * l + 128)>>8; |
1536 | 630 | r = (255 * r + 128)>>8; |
1537 | 630 | tl = (l * t + 128)>>8; |
1538 | 630 | tr = (r * t + 128)>>8; |
1539 | 630 | bl = (l * b + 128)>>8; |
1540 | 630 | br = (r * b + 128)>>8; |
1541 | 630 | t = (255 * t + 128)>>8; |
1542 | 630 | b = (255 * b + 128)>>8; |
1543 | 630 | dp += n-1; |
1544 | 630 | *dp = tl; |
1545 | 630 | dp += n; |
1546 | 1.61k | for (x = w-2; x > 0; x--) |
1547 | 981 | { |
1548 | 981 | *dp = t; |
1549 | 981 | dp += n; |
1550 | 981 | } |
1551 | 630 | if (x == 0) |
1552 | 607 | { |
1553 | 607 | *dp = tr; |
1554 | 607 | dp += n; |
1555 | 607 | } |
1556 | 630 | dp += stride - w*n; |
1557 | 1.61k | for (y = pix->h-2; y > 0; y--) |
1558 | 982 | { |
1559 | 982 | dp[span] = r; |
1560 | 982 | *dp = l; |
1561 | 982 | dp += stride; |
1562 | 982 | } |
1563 | 630 | if (y == 0) |
1564 | 630 | { |
1565 | 630 | *dp = bl; |
1566 | 630 | dp += n; |
1567 | 1.61k | for (x = w-2; x > 0; x--) |
1568 | 981 | { |
1569 | 981 | *dp = b; |
1570 | 981 | dp += n; |
1571 | 981 | } |
1572 | 630 | if (x == 0) |
1573 | 607 | { |
1574 | 607 | *dp = br; |
1575 | 607 | } |
1576 | 630 | } |
1577 | 630 | } |
1578 | | |
1579 | | fz_pixmap * |
1580 | | fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip) |
1581 | 0 | { |
1582 | 0 | return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); |
1583 | 0 | } |
1584 | | |
1585 | | fz_pixmap * |
1586 | | fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) |
1587 | 37.6k | { |
1588 | 37.6k | fz_scale_filter *filter = &fz_scale_filter_simple; |
1589 | 37.6k | fz_weights *contrib_rows = NULL; |
1590 | 37.6k | fz_weights *contrib_cols = NULL; |
1591 | 37.6k | fz_pixmap *output = NULL; |
1592 | 37.6k | unsigned char *temp = NULL; |
1593 | 37.6k | int max_row, temp_span, temp_rows, row; |
1594 | 37.6k | int dst_w_int, dst_h_int, dst_x_int, dst_y_int; |
1595 | 37.6k | int flip_x, flip_y, forcealpha; |
1596 | 37.6k | fz_rect patch; |
1597 | | |
1598 | 37.6k | fz_var(contrib_cols); |
1599 | 37.6k | fz_var(contrib_rows); |
1600 | | |
1601 | | /* Avoid extreme scales where overflows become problematic. */ |
1602 | 37.6k | if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) |
1603 | 0 | return NULL; |
1604 | 37.6k | if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24)) |
1605 | 0 | return NULL; |
1606 | | |
1607 | | /* Clamp small ranges of w and h */ |
1608 | 37.6k | if (w <= -1) |
1609 | 8.49k | { |
1610 | | /* Large negative range. Don't clamp */ |
1611 | 8.49k | } |
1612 | 29.1k | else if (w < 0) |
1613 | 0 | { |
1614 | 0 | w = -1; |
1615 | 0 | } |
1616 | 29.1k | else if (w < 1) |
1617 | 568 | { |
1618 | 568 | w = 1; |
1619 | 568 | } |
1620 | 37.6k | if (h <= -1) |
1621 | 1.52k | { |
1622 | | /* Large negative range. Don't clamp */ |
1623 | 1.52k | } |
1624 | 36.1k | else if (h < 0) |
1625 | 67 | { |
1626 | 67 | h = -1; |
1627 | 67 | } |
1628 | 36.0k | else if (h < 1) |
1629 | 522 | { |
1630 | 522 | h = 1; |
1631 | 522 | } |
1632 | | |
1633 | | /* If the src has an alpha, we'll make the dst have an alpha automatically. |
1634 | | * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */ |
1635 | 37.6k | forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h); |
1636 | | |
1637 | | /* Find the destination bbox, width/height, and sub pixel offset, |
1638 | | * allowing for whether we're flipping or not. */ |
1639 | | /* The (x,y) position given describes where the top left corner |
1640 | | * of the source image should be mapped to (i.e. where (0,0) in image |
1641 | | * space ends up). Also there are differences in the way we scale |
1642 | | * horizontally and vertically. When scaling rows horizontally, we |
1643 | | * always read forwards through the source, and store either forwards |
1644 | | * or in reverse as required. When scaling vertically, we always store |
1645 | | * out forwards, but may feed source rows in in a different order. |
1646 | | * |
1647 | | * Consider the image rectangle 'r' to which the image is mapped, |
1648 | | * and the (possibly) larger rectangle 'R', given by expanding 'r' to |
1649 | | * complete pixels. |
1650 | | * |
1651 | | * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether |
1652 | | * the image is x flipped or not. Whatever happens 0 <= x < 1. |
1653 | | * y is always R.ymax - r.ymax. |
1654 | | */ |
1655 | | /* dst_x_int is calculated to be the left of the scaled image, and |
1656 | | * x (the sub pixel offset) is the distance in from either the left |
1657 | | * or right pixel expanded edge. */ |
1658 | 37.6k | flip_x = (w < 0); |
1659 | 37.6k | if (flip_x) |
1660 | 8.49k | { |
1661 | 8.49k | float tmp; |
1662 | 8.49k | w = -w; |
1663 | 8.49k | dst_x_int = floorf(x-w); |
1664 | 8.49k | tmp = ceilf(x); |
1665 | 8.49k | dst_w_int = (int)tmp; |
1666 | 8.49k | x = tmp - x; |
1667 | 8.49k | dst_w_int -= dst_x_int; |
1668 | 8.49k | } |
1669 | 29.1k | else |
1670 | 29.1k | { |
1671 | 29.1k | dst_x_int = floorf(x); |
1672 | 29.1k | x -= dst_x_int; |
1673 | 29.1k | dst_w_int = (int)ceilf(x + w); |
1674 | 29.1k | } |
1675 | | /* dst_y_int is calculated to be the top of the scaled image, and |
1676 | | * y (the sub pixel offset) is the distance in from either the top |
1677 | | * or bottom pixel expanded edge. |
1678 | | */ |
1679 | 37.6k | flip_y = (h < 0); |
1680 | 37.6k | if (flip_y) |
1681 | 1.59k | { |
1682 | 1.59k | float tmp; |
1683 | 1.59k | h = -h; |
1684 | 1.59k | dst_y_int = floorf(y-h); |
1685 | 1.59k | tmp = ceilf(y); |
1686 | 1.59k | dst_h_int = (int)tmp; |
1687 | 1.59k | y = tmp - y; |
1688 | 1.59k | dst_h_int -= dst_y_int; |
1689 | 1.59k | } |
1690 | 36.0k | else |
1691 | 36.0k | { |
1692 | 36.0k | dst_y_int = floorf(y); |
1693 | 36.0k | y -= dst_y_int; |
1694 | 36.0k | dst_h_int = (int)ceilf(y + h); |
1695 | 36.0k | } |
1696 | | |
1697 | 37.6k | fz_valgrind_pixmap(src); |
1698 | | |
1699 | | /* Step 0: Calculate the patch */ |
1700 | 37.6k | patch.x0 = 0; |
1701 | 37.6k | patch.y0 = 0; |
1702 | 37.6k | patch.x1 = dst_w_int; |
1703 | 37.6k | patch.y1 = dst_h_int; |
1704 | 37.6k | if (clip) |
1705 | 30.3k | { |
1706 | 30.3k | if (flip_x) |
1707 | 8.49k | { |
1708 | 8.49k | if (dst_x_int + dst_w_int > clip->x1) |
1709 | 97 | patch.x0 = dst_x_int + dst_w_int - clip->x1; |
1710 | 8.49k | if (clip->x0 > dst_x_int) |
1711 | 8 | { |
1712 | 8 | patch.x1 = dst_w_int - (clip->x0 - dst_x_int); |
1713 | 8 | dst_x_int = clip->x0; |
1714 | 8 | } |
1715 | 8.49k | } |
1716 | 21.8k | else |
1717 | 21.8k | { |
1718 | 21.8k | if (dst_x_int + dst_w_int > clip->x1) |
1719 | 914 | patch.x1 = clip->x1 - dst_x_int; |
1720 | 21.8k | if (clip->x0 > dst_x_int) |
1721 | 702 | { |
1722 | 702 | patch.x0 = clip->x0 - dst_x_int; |
1723 | 702 | dst_x_int += patch.x0; |
1724 | 702 | } |
1725 | 21.8k | } |
1726 | | |
1727 | 30.3k | if (flip_y) |
1728 | 1.59k | { |
1729 | 1.59k | if (dst_y_int + dst_h_int > clip->y1) |
1730 | 19 | patch.y1 = clip->y1 - dst_y_int; |
1731 | 1.59k | if (clip->y0 > dst_y_int) |
1732 | 32 | { |
1733 | 32 | patch.y0 = clip->y0 - dst_y_int; |
1734 | 32 | dst_y_int = clip->y0; |
1735 | 32 | } |
1736 | 1.59k | } |
1737 | 28.7k | else |
1738 | 28.7k | { |
1739 | 28.7k | if (dst_y_int + dst_h_int > clip->y1) |
1740 | 1.77k | patch.y1 = clip->y1 - dst_y_int; |
1741 | 28.7k | if (clip->y0 > dst_y_int) |
1742 | 882 | { |
1743 | 882 | patch.y0 = clip->y0 - dst_y_int; |
1744 | 882 | dst_y_int += patch.y0; |
1745 | 882 | } |
1746 | 28.7k | } |
1747 | 30.3k | } |
1748 | 37.6k | if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) |
1749 | 155 | return NULL; |
1750 | | |
1751 | 75.0k | fz_try(ctx) |
1752 | 75.0k | { |
1753 | | /* Step 1: Calculate the weights for columns and rows */ |
1754 | 37.5k | #ifdef SINGLE_PIXEL_SPECIALS |
1755 | 37.5k | if (src->w == 1) |
1756 | 37 | contrib_cols = NULL; |
1757 | 37.4k | else |
1758 | 37.4k | #endif /* SINGLE_PIXEL_SPECIALS */ |
1759 | 37.4k | contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols"); |
1760 | 37.5k | #ifdef SINGLE_PIXEL_SPECIALS |
1761 | 37.5k | if (src->h == 1) |
1762 | 213 | contrib_rows = NULL; |
1763 | 37.3k | else |
1764 | 37.3k | #endif /* SINGLE_PIXEL_SPECIALS */ |
1765 | 37.3k | contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows"); |
1766 | | |
1767 | 37.5k | output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha); |
1768 | 37.5k | } |
1769 | 75.0k | fz_catch(ctx) |
1770 | 0 | { |
1771 | 0 | if (!cache_x) |
1772 | 0 | fz_free(ctx, contrib_cols); |
1773 | 0 | if (!cache_y) |
1774 | 0 | fz_free(ctx, contrib_rows); |
1775 | 0 | fz_rethrow(ctx); |
1776 | 0 | } |
1777 | 37.5k | output->x = dst_x_int; |
1778 | 37.5k | output->y = dst_y_int; |
1779 | | |
1780 | | /* Step 2: Apply the weights */ |
1781 | 37.5k | #ifdef SINGLE_PIXEL_SPECIALS |
1782 | 37.5k | if (!contrib_rows) |
1783 | 213 | { |
1784 | | /* Only 1 source pixel high. */ |
1785 | 213 | if (!contrib_cols) |
1786 | 2 | { |
1787 | | /* Only 1 pixel in the entire image! */ |
1788 | 2 | duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride); |
1789 | 2 | fz_valgrind_pixmap(output); |
1790 | 2 | } |
1791 | 211 | else |
1792 | 211 | { |
1793 | | /* Scale the row once, then copy it. */ |
1794 | 211 | scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha); |
1795 | 211 | fz_valgrind_pixmap(output); |
1796 | 211 | } |
1797 | 213 | } |
1798 | 37.3k | else if (!contrib_cols) |
1799 | 35 | { |
1800 | | /* Only 1 source pixel wide. Scale the col and duplicate. */ |
1801 | 35 | scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha); |
1802 | 35 | fz_valgrind_pixmap(output); |
1803 | 35 | } |
1804 | 37.2k | else |
1805 | 37.2k | #endif /* SINGLE_PIXEL_SPECIALS */ |
1806 | 37.2k | { |
1807 | 37.2k | void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights); |
1808 | 37.2k | void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row); |
1809 | | |
1810 | 37.2k | temp_span = contrib_cols->count * src->n; |
1811 | 37.2k | temp_rows = contrib_rows->max_len; |
1812 | 37.2k | if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) |
1813 | 0 | goto cleanup; |
1814 | 74.5k | fz_try(ctx) |
1815 | 74.5k | { |
1816 | 37.2k | temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char)); |
1817 | 37.2k | } |
1818 | 74.5k | fz_catch(ctx) |
1819 | 0 | { |
1820 | 0 | fz_drop_pixmap(ctx, output); |
1821 | 0 | if (!cache_x) |
1822 | 0 | fz_free(ctx, contrib_cols); |
1823 | 0 | if (!cache_y) |
1824 | 0 | fz_free(ctx, contrib_rows); |
1825 | 0 | fz_rethrow(ctx); |
1826 | 0 | } |
1827 | 37.2k | switch (src->n) |
1828 | 37.2k | { |
1829 | 0 | default: |
1830 | 0 | row_scale_in = scale_row_to_temp; |
1831 | 0 | break; |
1832 | 29.0k | case 1: /* Image mask case or Greyscale case */ |
1833 | 29.0k | row_scale_in = scale_row_to_temp1; |
1834 | 29.0k | break; |
1835 | 7 | case 2: /* Greyscale with alpha case */ |
1836 | 7 | row_scale_in = scale_row_to_temp2; |
1837 | 7 | break; |
1838 | 8.23k | case 3: /* RGB case */ |
1839 | 8.23k | row_scale_in = scale_row_to_temp3; |
1840 | 8.23k | break; |
1841 | 36 | case 4: /* RGBA or CMYK case */ |
1842 | 36 | row_scale_in = scale_row_to_temp4; |
1843 | 36 | break; |
1844 | 37.2k | } |
1845 | 37.2k | row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp; |
1846 | 37.2k | max_row = contrib_rows->index[contrib_rows->index[0]]; |
1847 | 3.39M | for (row = 0; row < contrib_rows->count; row++) |
1848 | 3.36M | { |
1849 | | /* |
1850 | | Which source rows do we need to have scaled into the |
1851 | | temporary buffer in order to be able to do the final |
1852 | | scale? |
1853 | | */ |
1854 | 3.36M | int row_index = contrib_rows->index[row]; |
1855 | 3.36M | int row_min = contrib_rows->index[row_index++]; |
1856 | 3.36M | int row_len = contrib_rows->index[row_index]; |
1857 | 8.63M | while (max_row < row_min+row_len) |
1858 | 5.27M | { |
1859 | | /* Scale another row */ |
1860 | 5.27M | assert(max_row < src->h); |
1861 | 5.27M | (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols); |
1862 | 5.27M | max_row++; |
1863 | 5.27M | } |
1864 | | |
1865 | 3.36M | (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row); |
1866 | 3.36M | } |
1867 | 37.2k | fz_free(ctx, temp); |
1868 | | |
1869 | 37.2k | if (forcealpha) |
1870 | 630 | adjust_alpha_edges(output, contrib_rows, contrib_cols); |
1871 | | |
1872 | 37.2k | fz_valgrind_pixmap(output); |
1873 | 37.2k | } |
1874 | | |
1875 | 37.5k | cleanup: |
1876 | 37.5k | if (!cache_y) |
1877 | 0 | fz_free(ctx, contrib_rows); |
1878 | 37.5k | if (!cache_x) |
1879 | 0 | fz_free(ctx, contrib_cols); |
1880 | | |
1881 | 37.5k | return output; |
1882 | 37.5k | } |
1883 | | |
1884 | | void |
1885 | | fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc) |
1886 | 36.5k | { |
1887 | 36.5k | if (!sc) |
1888 | 0 | return; |
1889 | 36.5k | fz_free(ctx, sc->weights); |
1890 | 36.5k | fz_free(ctx, sc); |
1891 | 36.5k | } |
1892 | | |
1893 | | fz_scale_cache * |
1894 | | fz_new_scale_cache(fz_context *ctx) |
1895 | 36.5k | { |
1896 | 36.5k | return fz_malloc_struct(ctx, fz_scale_cache); |
1897 | 36.5k | } |