Coverage Report

Created: 2024-05-20 06:23

/src/mupdf/source/fitz/draw-scale-simple.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2021 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
/*
24
This code does smooth scaling of a pixmap.
25
26
This function returns a new pixmap representing the area starting at (0,0)
27
given by taking the source pixmap src, scaling it to width w, and height h,
28
and then positioning it at (frac(x),frac(y)).
29
30
This is a cut-down version of draw_scale.c that only copes with filters
31
that return values strictly in the 0..1 range, and uses bytes for
32
intermediate results rather than ints.
33
*/
34
35
#include "mupdf/fitz.h"
36
37
#include "draw-imp.h"
38
#include "pixmap-imp.h"
39
40
#include <math.h>
41
#include <string.h>
42
#include <assert.h>
43
#include <limits.h>
44
45
/* Do we special case handling of single pixel high/wide images? The
46
 * 'purest' handling is given by not special casing them, but certain
47
 * files that use such images 'stack' them to give full images. Not
48
 * special casing them results in then being fainter and giving noticeable
49
 * rounding errors.
50
 */
51
#define SINGLE_PIXEL_SPECIALS
52
53
/*
54
Consider a row of source samples, src, of width src_w, positioned at x,
55
scaled to width dst_w.
56
57
src[i] is centred at: x + (i + 0.5)*dst_w/src_w
58
59
Therefore the distance between the centre of the jth output pixel and
60
the centre of the ith source sample is:
61
62
dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
63
64
When scaling up, therefore:
65
66
dst[j] = SUM(filter(dist[j,i]) * src[i])
67
  (for all ints i)
68
69
This can be simplified by noticing that filters are only non zero within
70
a given filter width (henceforth called W). So:
71
72
dst[j] = SUM(filter(dist[j,i]) * src[i])
73
  (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
74
75
When scaling down, each filtered source sample is stretched to be wider
76
to avoid aliasing issues. This effectively reduces the distance between
77
centres.
78
79
dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
80
  (where F = dst_w/src_w)
81
  (for ints i, s.t. (j-W)/F < i < (j+W)/F)
82
83
*/
84
85
typedef struct fz_scale_filter
86
{
87
  int width;
88
  float (*fn)(struct fz_scale_filter *, float);
89
} fz_scale_filter;
90
91
/* Image scale filters */
92
93
static float
94
triangle(fz_scale_filter *filter, float f)
95
0
{
96
0
  if (f >= 1)
97
0
    return 0;
98
0
  return 1-f;
99
0
}
100
101
static float
102
box(fz_scale_filter *filter, float f)
103
0
{
104
0
  if (f >= 0.5f)
105
0
    return 0;
106
0
  return 1;
107
0
}
108
109
static float
110
simple(fz_scale_filter *filter, float x)
111
21.5M
{
112
21.5M
  if (x >= 1)
113
32.1k
    return 0;
114
21.4M
  return 1 + (2*x - 3)*x*x;
115
21.5M
}
116
117
fz_scale_filter fz_scale_filter_box = { 1, box };
118
fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
119
fz_scale_filter fz_scale_filter_simple = { 1, simple };
120
121
/*
122
We build ourselves a set of tables to contain the precalculated weights
123
for a given set of scale settings.
124
125
The first dst_w entries in index are the index into index of the
126
sets of weight for each destination pixel.
127
128
Each of the sets of weights is a set of values consisting of:
129
  the minimum source pixel index used for this destination pixel
130
  the number of weights used for this destination pixel
131
  the weights themselves
132
133
So to calculate dst[i] we do the following:
134
135
  weights = &index[index[i]];
136
  min = *weights++;
137
  len = *weights++;
138
  dst[i] = 0;
139
  while (--len > 0)
140
    dst[i] += src[min++] * *weights++
141
142
in addition, we guarantee that at the end of this process weights will now
143
point to the weights value for dst pixel i+1.
144
145
In the simplest version of this algorithm, we would scale the whole image
146
horizontally first into a temporary buffer, then scale that temporary
147
buffer again vertically to give us our result. Using such a simple
148
algorithm would mean that could use the same style of weights for both
149
horizontal and vertical scaling.
150
151
Unfortunately, this would also require a large temporary buffer,
152
particularly in the case where we are scaling up.
153
154
We therefore modify the algorithm as follows; we scale scanlines from the
155
source image horizontally into a temporary buffer, until we have all the
156
contributors for a given output scanline. We then produce that output
157
scanline from the temporary buffer. In this way we restrict the height
158
of the temporary buffer to a small fraction of the final size.
159
160
Unfortunately, this means that the pseudo code for recombining a
161
scanline of fully scaled pixels is as follows:
162
163
  weights = &index[index[y]];
164
  min = *weights++;
165
  len = *weights++;
166
  for (x=0 to dst_w)
167
    min2 = min
168
    len2 = len
169
    weights2 = weights
170
    dst[x] = 0;
171
    while (--len2 > 0)
172
      dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
173
174
i.e. it requires a % operation for every source pixel - this is typically
175
expensive.
176
177
To avoid this, we alter the order in which vertical weights are stored,
178
so that they are ordered in the same order as the temporary buffer lines
179
would appear. This simplifies the algorithm to:
180
181
  weights = &index[index[y]];
182
  min = *weights++;
183
  len = *weights++;
184
  for (x=0 to dst_w)
185
    min2 = 0
186
    len2 = len
187
    weights2 = weights
188
    dst[x] = 0;
189
    while (--len2 > 0)
190
      dst[x] += temp[i][min2++] * *weights2++
191
192
This means that len may be larger than it needs to be (due to the
193
possible inclusion of a zero weight row or two), but in practise this
194
is only an increase of 1 or 2 at worst.
195
196
We implement this by generating the weights as normal (but ensuring we
197
leave enough space) and then reordering afterwards.
198
199
*/
200
201
/* This structure is accessed from ARM code - bear this in mind before
202
 * altering it! */
203
typedef struct
204
{
205
  int flip; /* true if outputting reversed */
206
  int count;  /* number of output pixels we have records for in this table */
207
  int max_len;  /* Maximum number of weights for any one output pixel */
208
  int n;    /* number of components (src->n) */
209
  int new_line; /* True if no weights for the current output pixel */
210
  int patch_l;  /* How many output pixels we skip over */
211
  int index[1];
212
} fz_weights;
213
214
struct fz_scale_cache
215
{
216
  int src_w;
217
  float x;
218
  float dst_w;
219
  fz_scale_filter *filter;
220
  int vertical;
221
  int dst_w_int;
222
  int patch_l;
223
  int patch_r;
224
  int n;
225
  int flip;
226
  fz_weights *weights;
227
};
228
229
static fz_weights *
230
new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
231
44.9k
{
232
44.9k
  int max_len;
233
44.9k
  fz_weights *weights;
234
235
44.9k
  if (src_w > dst_w)
236
44.9k
  {
237
    /* Scaling down, so there will be a maximum of
238
     * 2*filterwidth*src_w/dst_w src pixels
239
     * contributing to each dst pixel. */
240
44.9k
    max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
241
44.9k
    if (max_len > src_w)
242
6.36k
      max_len = src_w;
243
44.9k
  }
244
0
  else
245
0
  {
246
    /* Scaling up, so there will be a maximum of
247
     * 2*filterwidth src pixels contributing to each dst pixel.
248
     */
249
0
    max_len = 2 * filter->width;
250
0
  }
251
  /* We need the size of the struct,
252
   * plus patch_w*sizeof(int) for the index
253
   * plus (2+max_len)*sizeof(int) for the weights
254
   * plus room for an extra set of weights for reordering.
255
   */
256
44.9k
  weights = fz_malloc(ctx, sizeof(*weights)+(size_t)(max_len+3)*(patch_w+1)*sizeof(int));
257
44.9k
  if (!weights)
258
0
    return NULL;
259
44.9k
  weights->count = -1;
260
44.9k
  weights->max_len = max_len;
261
44.9k
  weights->index[0] = patch_w;
262
44.9k
  weights->n = n;
263
44.9k
  weights->patch_l = patch_l;
264
44.9k
  weights->flip = flip;
265
44.9k
  return weights;
266
44.9k
}
267
268
/* j is destination pixel in the patch_l..patch_l+patch_w range */
269
static void
270
init_weights(fz_weights *weights, int j)
271
7.23M
{
272
7.23M
  int index;
273
274
7.23M
  j -= weights->patch_l;
275
7.23M
  assert(weights->count == j-1);
276
7.23M
  weights->count++;
277
7.23M
  weights->new_line = 1;
278
7.23M
  if (j == 0)
279
44.9k
    index = weights->index[0];
280
7.19M
  else
281
7.19M
  {
282
7.19M
    index = weights->index[j-1];
283
7.19M
    index += 2 + weights->index[index+1];
284
7.19M
  }
285
7.23M
  weights->index[j] = index; /* row pointer */
286
7.23M
  weights->index[index] = 0; /* min */
287
7.23M
  weights->index[index+1] = 0; /* len */
288
7.23M
}
289
290
static void
291
insert_weight(fz_weights *weights, int j, int i, int weight)
292
19.6M
{
293
19.6M
  int min, len, index;
294
295
  /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
296
19.6M
  j -= weights->patch_l;
297
19.6M
  if (weights->new_line)
298
7.23M
  {
299
    /* New line */
300
7.23M
    weights->new_line = 0;
301
7.23M
    index = weights->index[j]; /* row pointer */
302
7.23M
    weights->index[index] = i; /* min */
303
7.23M
    weights->index[index+1] = 0; /* len */
304
7.23M
  }
305
19.6M
  index = weights->index[j];
306
19.6M
  min = weights->index[index++];
307
19.6M
  len = weights->index[index++];
308
19.6M
  while (i < min)
309
0
  {
310
    /* This only happens in rare cases, but we need to insert
311
     * one earlier. In exceedingly rare cases we may need to
312
     * insert more than one earlier. */
313
0
    int k;
314
315
0
    for (k = len; k > 0; k--)
316
0
    {
317
0
      weights->index[index+k] = weights->index[index+k-1];
318
0
    }
319
0
    weights->index[index] = 0;
320
0
    min--;
321
0
    len++;
322
0
    weights->index[index-2] = min;
323
0
    weights->index[index-1] = len;
324
0
  }
325
19.6M
  if (i-min >= len)
326
19.6M
  {
327
    /* The usual case */
328
19.6M
    while (i-min >= ++len)
329
0
    {
330
0
      weights->index[index+len-1] = 0;
331
0
    }
332
19.6M
    assert(len-1 == i-min);
333
19.6M
    weights->index[index+i-min] = weight;
334
19.6M
    weights->index[index-1] = len;
335
19.6M
    assert(len <= weights->max_len);
336
19.6M
  }
337
0
  else
338
0
  {
339
    /* Infrequent case */
340
0
    weights->index[index+i-min] += weight;
341
0
  }
342
19.6M
}
343
344
static void
345
add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
346
  float x, float F, float G, int src_w, float dst_w)
347
21.5M
{
348
21.5M
  float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
349
21.5M
  float f;
350
21.5M
  int weight;
351
352
21.5M
  dist *= G;
353
21.5M
  if (dist < 0)
354
10.7M
    dist = -dist;
355
21.5M
  f = filter->fn(filter, dist)*F;
356
21.5M
  weight = (int)(256*f+0.5f);
357
358
  /* Ensure i is in range */
359
21.5M
  if (i < 0 || i >= src_w)
360
763k
    return;
361
20.7M
  if (weight != 0)
362
19.6M
    insert_weight(weights, j, i, weight);
363
20.7M
}
364
365
static void
366
reorder_weights(fz_weights *weights, int j, int src_w)
367
2.93M
{
368
2.93M
  int idx = weights->index[j - weights->patch_l];
369
2.93M
  int min = weights->index[idx++];
370
2.93M
  int len = weights->index[idx++];
371
2.93M
  int max = weights->max_len;
372
2.93M
  int tmp = idx+max;
373
2.93M
  int i, off;
374
375
  /* Copy into the temporary area */
376
2.93M
  memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
377
378
  /* Pad out if required */
379
2.93M
  assert(len <= max);
380
2.93M
  assert(min+len <= src_w);
381
2.93M
  off = 0;
382
2.93M
  if (len < max)
383
1.33M
  {
384
1.33M
    memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
385
1.33M
    len = max;
386
1.33M
    if (min + len > src_w)
387
16.1k
    {
388
16.1k
      off = min + len - src_w;
389
16.1k
      min = src_w - len;
390
16.1k
      weights->index[idx-2] = min;
391
16.1k
    }
392
1.33M
    weights->index[idx-1] = len;
393
1.33M
  }
394
395
  /* Copy back into the proper places */
396
12.6M
  for (i = 0; i < len; i++)
397
9.68M
  {
398
9.68M
    weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
399
9.68M
  }
400
2.93M
}
401
402
/* Due to rounding and edge effects, the sums for the weights sometimes don't
403
 * add up to 256. This causes visible rendering effects. Therefore, we take
404
 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
405
 * 256 for all pixels that are completely covered. See bug #691629. */
406
static void
407
check_weights(fz_weights *weights, int j, int w, float x, float wf)
408
7.23M
{
409
7.23M
  int idx, len;
410
7.23M
  int sum = 0;
411
7.23M
  int max = -256;
412
7.23M
  int maxidx = 0;
413
7.23M
  int i;
414
415
7.23M
  idx = weights->index[j - weights->patch_l];
416
7.23M
  idx++; /* min */
417
7.23M
  len = weights->index[idx++];
418
419
26.9M
  for(i=0; i < len; i++)
420
19.6M
  {
421
19.6M
    int v = weights->index[idx++];
422
19.6M
    sum += v;
423
19.6M
    if (v > max)
424
12.7M
    {
425
12.7M
      max = v;
426
12.7M
      maxidx = idx;
427
12.7M
    }
428
19.6M
  }
429
  /* If we aren't the first or last pixel, OR if the sum is too big
430
   * then adjust it. */
431
7.23M
  if (((j != 0) && (j != w-1)) || (sum > 256))
432
7.15M
    weights->index[maxidx-1] += 256-sum;
433
  /* Otherwise, if we are the first pixel, and it's fully covered, then
434
   * adjust it. */
435
80.6k
  else if ((j == 0) && (x < 0.0001f) && (sum != 256))
436
39.4k
    weights->index[maxidx-1] += 256-sum;
437
  /* Finally, if we are the last pixel, and it's fully covered, then
438
   * adjust it. */
439
41.2k
  else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256))
440
32.4k
    weights->index[maxidx-1] += 256-sum;
441
7.23M
}
442
443
static int
444
window_fix(int l, int *rp, float window, float centre)
445
169
{
446
169
  int r = *rp;
447
338
  while (centre - l > window)
448
169
    l++;
449
338
  while (r - centre > window)
450
169
    r--;
451
169
  *rp = r;
452
169
  return l;
453
169
}
454
455
static fz_weights *
456
make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
457
74.8k
{
458
74.8k
  fz_weights *weights;
459
74.8k
  float F, G;
460
74.8k
  float window;
461
74.8k
  int j;
462
463
74.8k
  if (cache)
464
74.8k
  {
465
74.8k
    if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
466
74.8k
      cache->filter == filter && cache->vertical == vertical &&
467
74.8k
      cache->dst_w_int == dst_w_int &&
468
74.8k
      cache->patch_l == patch_l && cache->patch_r == patch_r &&
469
74.8k
      cache->n == n && cache->flip == flip)
470
29.8k
    {
471
29.8k
      return cache->weights;
472
29.8k
    }
473
44.9k
    cache->src_w = src_w;
474
44.9k
    cache->x = x;
475
44.9k
    cache->dst_w = dst_w;
476
44.9k
    cache->filter = filter;
477
44.9k
    cache->vertical = vertical;
478
44.9k
    cache->dst_w_int = dst_w_int;
479
44.9k
    cache->patch_l = patch_l;
480
44.9k
    cache->patch_r = patch_r;
481
44.9k
    cache->n = n;
482
44.9k
    cache->flip = flip;
483
44.9k
    fz_free(ctx, cache->weights);
484
44.9k
    cache->weights = NULL;
485
44.9k
  }
486
487
44.9k
  if (dst_w < src_w)
488
44.9k
  {
489
    /* Scaling down */
490
44.9k
    F = dst_w / src_w;
491
44.9k
    G = 1;
492
44.9k
  }
493
0
  else
494
0
  {
495
    /* Scaling up */
496
0
    F = 1;
497
0
    G = src_w / dst_w;
498
0
  }
499
44.9k
  window = filter->width / F;
500
44.9k
  weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
501
44.9k
  if (!weights)
502
0
    return NULL;
503
7.28M
  for (j = patch_l; j < patch_r; j++)
504
7.23M
  {
505
    /* find the position of the centre of dst[j] in src space */
506
7.23M
    float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
507
7.23M
    int l, r;
508
7.23M
    l = ceilf(centre - window);
509
7.23M
    r = floorf(centre + window);
510
511
    /* Now, due to the vagaries of floating point, if centre is large, l
512
     * and r can actually end up further than 2*window apart. All we care
513
     * about in this case is that we don't crash! We want a cheap correction
514
     * that avoids the assert and doesn't cost too much in the normal case.
515
     * This should do. */
516
7.23M
    if (r - l > 2 * window)
517
169
      l = window_fix(l, &r, window, centre);
518
519
7.23M
    init_weights(weights, j);
520
28.7M
    for (; l <= r; l++)
521
21.5M
    {
522
21.5M
      add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
523
21.5M
    }
524
7.23M
    if (weights->new_line)
525
227
    {
526
      /* In very rare cases (bug 706764) we might not actually
527
       * have generated any non-zero weights for this destination
528
       * pixel. Just use the central pixel. */
529
227
      int src_x = floorf(centre);
530
227
      if (src_x >= src_w)
531
0
        src_x = src_w-1;
532
227
      if (src_x < 0)
533
2
        src_x = 0;
534
227
      insert_weight(weights, j, src_x, 1);
535
227
    }
536
7.23M
    check_weights(weights, j, dst_w_int, x, dst_w);
537
7.23M
    if (vertical)
538
2.93M
    {
539
2.93M
      reorder_weights(weights, j, src_w);
540
2.93M
    }
541
7.23M
  }
542
44.9k
  weights->count++; /* weights->count = dst_w_int now */
543
44.9k
  if (cache)
544
44.9k
  {
545
44.9k
    cache->weights = weights;
546
44.9k
  }
547
44.9k
  return weights;
548
44.9k
}
549
550
static void
551
scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
552
0
{
553
0
  const int *contrib = &weights->index[weights->index[0]];
554
0
  int len, i, j, n;
555
0
  const unsigned char *min;
556
0
  int tmp[FZ_MAX_COLORS];
557
0
  int *t = tmp;
558
559
0
  n = weights->n;
560
0
  for (j = 0; j < n; j++)
561
0
    tmp[j] = 128;
562
0
  if (weights->flip)
563
0
  {
564
0
    dst += (weights->count-1)*n;
565
0
    for (i=weights->count; i > 0; i--)
566
0
    {
567
0
      min = &src[n * *contrib++];
568
0
      len = *contrib++;
569
0
      while (len-- > 0)
570
0
      {
571
0
        for (j = n; j > 0; j--)
572
0
          *t++ += *min++ * *contrib;
573
0
        t -= n;
574
0
        contrib++;
575
0
      }
576
0
      for (j = n; j > 0; j--)
577
0
      {
578
0
        *dst++ = (unsigned char)(*t>>8);
579
0
        *t++ = 128;
580
0
      }
581
0
      t -= n;
582
0
      dst -= n*2;
583
0
    }
584
0
  }
585
0
  else
586
0
  {
587
0
    for (i=weights->count; i > 0; i--)
588
0
    {
589
0
      min = &src[n * *contrib++];
590
0
      len = *contrib++;
591
0
      while (len-- > 0)
592
0
      {
593
0
        for (j = n; j > 0; j--)
594
0
          *t++ += *min++ * *contrib;
595
0
        t -= n;
596
0
        contrib++;
597
0
      }
598
0
      for (j = n; j > 0; j--)
599
0
      {
600
0
        *dst++ = (unsigned char)(*t>>8);
601
0
        *t++ = 128;
602
0
      }
603
0
      t -= n;
604
0
    }
605
0
  }
606
0
}
607
608
#ifdef ARCH_ARM
609
610
static void
611
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
612
__attribute__((naked));
613
614
static void
615
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
616
__attribute__((naked));
617
618
static void
619
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
620
__attribute__((naked));
621
622
static void
623
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
624
__attribute__((naked));
625
626
static void
627
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
628
__attribute__((naked));
629
630
static void
631
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
632
__attribute__((naked));
633
634
static void
635
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
636
{
637
  asm volatile(
638
  ENTER_ARM
639
    ".syntax unified\n"
640
  "stmfd  r13!,{r4-r7,r9,r14}       \n"
641
  "@ r0 = dst           \n"
642
  "@ r1 = src           \n"
643
  "@ r2 = weights           \n"
644
  "ldr  r12,[r2],#4   @ r12= flip   \n"
645
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
646
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
647
  "cmp  r12,#0      @ if (flip)   \n"
648
  "beq  5f      @ {     \n"
649
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
650
  "add  r0, r0, r3    @ dst += count    \n"
651
  "1:             \n"
652
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
653
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
654
  "mov  r5, #128    @ r5 = a = 128    \n"
655
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
656
  "subs r9, r9, #1    @ len--     \n"
657
  "blt  3f      @ while (len >= 0)  \n"
658
  "2:       @ {     \n"
659
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
660
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
661
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
662
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
663
  "mlagt  r5, r6, r7, r5    @ g += r6 * r7    \n"
664
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
665
  "mla  r5, r12,r14,r5    @ g += r14 * r12  \n"
666
  "bge  2b      @ }     \n"
667
  "3:             \n"
668
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
669
  "strb r5,[r0, #-1]!   @ *--dst=a    \n"
670
  "subs r3, r3, #1    @ i--     \n"
671
  "bgt  1b      @       \n"
672
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
673
  "5:"
674
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
675
  "6:"
676
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
677
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
678
  "mov  r5, #128    @ r5 = a = 128    \n"
679
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
680
  "subs r9, r9, #1    @ len--     \n"
681
  "blt  9f      @ while (len > 0) \n"
682
  "7:       @ {     \n"
683
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
684
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
685
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
686
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
687
  "mlagt  r5, r6,r7,r5    @ a += r6 * r7    \n"
688
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
689
  "mla  r5, r12,r14,r5    @ a += r14 * r12  \n"
690
  "bge  7b      @ }     \n"
691
  "9:             \n"
692
  "mov  r5, r5, LSR #8    @ a >>= 8   \n"
693
  "strb r5, [r0], #1    @ *dst++=a    \n"
694
  "subs r3, r3, #1    @ i--     \n"
695
  "bgt  6b      @       \n"
696
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
697
  ENTER_THUMB
698
  );
699
}
700
701
static void
702
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
703
{
704
  asm volatile(
705
  ENTER_ARM
706
  "stmfd  r13!,{r4-r6,r9-r11,r14}       \n"
707
  "@ r0 = dst           \n"
708
  "@ r1 = src           \n"
709
  "@ r2 = weights           \n"
710
  "ldr  r12,[r2],#4   @ r12= flip   \n"
711
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
712
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
713
  "cmp  r12,#0      @ if (flip)   \n"
714
  "beq  4f      @ {     \n"
715
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
716
  "add  r0, r0, r3, LSL #1  @ dst += 2*count  \n"
717
  "1:             \n"
718
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
719
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
720
  "mov  r5, #128    @ r5 = g = 128    \n"
721
  "mov  r6, #128    @ r6 = a = 128    \n"
722
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
723
  "cmp  r9, #0      @ while (len-- > 0) \n"
724
  "beq  3f      @ {     \n"
725
  "2:             \n"
726
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
727
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
728
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
729
  "subs r9, r9, #1    @ r9 = len--    \n"
730
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
731
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
732
  "bgt  2b      @ }     \n"
733
  "3:             \n"
734
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
735
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
736
  "strb r5, [r0, #-2]!    @ *--dst=a    \n"
737
  "strb r6, [r0, #1]    @ *--dst=g    \n"
738
  "subs r3, r3, #1    @ i--     \n"
739
  "bgt  1b      @       \n"
740
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
741
  "4:"
742
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
743
  "5:"
744
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
745
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
746
  "mov  r5, #128    @ r5 = g = 128    \n"
747
  "mov  r6, #128    @ r6 = a = 128    \n"
748
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
749
  "cmp  r9, #0      @ while (len-- > 0) \n"
750
  "beq  7f      @ {     \n"
751
  "6:             \n"
752
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
753
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
754
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
755
  "subs r9, r9, #1    @ r9 = len--    \n"
756
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
757
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
758
  "bgt  6b      @ }     \n"
759
  "7:             \n"
760
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
761
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
762
  "strb r5, [r0], #1    @ *dst++=g    \n"
763
  "strb r6, [r0], #1    @ *dst++=a    \n"
764
  "subs r3, r3, #1    @ i--     \n"
765
  "bgt  5b      @       \n"
766
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
767
  ENTER_THUMB
768
  );
769
}
770
771
static void
772
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
773
{
774
  asm volatile(
775
  ENTER_ARM
776
  "stmfd  r13!,{r4-r11,r14}       \n"
777
  "@ r0 = dst           \n"
778
  "@ r1 = src           \n"
779
  "@ r2 = weights           \n"
780
  "ldr  r12,[r2],#4   @ r12= flip   \n"
781
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
782
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
783
  "cmp  r12,#0      @ if (flip)   \n"
784
  "beq  4f      @ {     \n"
785
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
786
  "add  r0, r0, r3, LSL #1  @     \n"
787
  "add  r0, r0, r3    @ dst += 3*count  \n"
788
  "1:             \n"
789
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
790
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
791
  "mov  r5, #128    @ r5 = r = 128    \n"
792
  "mov  r6, #128    @ r6 = g = 128    \n"
793
  "add  r7, r1, r4, LSL #1  @     \n"
794
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
795
  "mov  r7, #128    @ r7 = b = 128    \n"
796
  "cmp  r9, #0      @ while (len-- > 0) \n"
797
  "beq  3f      @ {     \n"
798
  "2:             \n"
799
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
800
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
801
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
802
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
803
  "subs r9, r9, #1    @ r9 = len--    \n"
804
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
805
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
806
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
807
  "bgt  2b      @ }     \n"
808
  "3:             \n"
809
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
810
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
811
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
812
  "strb r5, [r0, #-3]!    @ *--dst=r    \n"
813
  "strb r6, [r0, #1]    @ *--dst=g    \n"
814
  "strb r7, [r0, #2]    @ *--dst=b    \n"
815
  "subs r3, r3, #1    @ i--     \n"
816
  "bgt  1b      @       \n"
817
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
818
  "4:"
819
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
820
  "5:"
821
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
822
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
823
  "mov  r5, #128    @ r5 = r = 128    \n"
824
  "mov  r6, #128    @ r6 = g = 128    \n"
825
  "add  r7, r1, r4, LSL #1  @ r7 = min = &src[2*r4] \n"
826
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
827
  "mov  r7, #128    @ r7 = b = 128    \n"
828
  "cmp  r9, #0      @ while (len-- > 0) \n"
829
  "beq  7f      @ {     \n"
830
  "6:             \n"
831
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
832
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
833
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
834
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
835
  "subs r9, r9, #1    @ r9 = len--    \n"
836
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
837
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
838
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
839
  "bgt  6b      @ }     \n"
840
  "7:             \n"
841
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
842
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
843
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
844
  "strb r5, [r0], #1    @ *dst++=r    \n"
845
  "strb r6, [r0], #1    @ *dst++=g    \n"
846
  "strb r7, [r0], #1    @ *dst++=b    \n"
847
  "subs r3, r3, #1    @ i--     \n"
848
  "bgt  5b      @       \n"
849
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
850
  ENTER_THUMB
851
  );
852
}
853
854
static void
855
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
856
{
857
  asm volatile(
858
  ENTER_ARM
859
  "stmfd  r13!,{r4-r11,r14}       \n"
860
  "@ r0 = dst           \n"
861
  "@ r1 = src           \n"
862
  "@ r2 = weights           \n"
863
  "ldr  r12,[r2],#4   @ r12= flip   \n"
864
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
865
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
866
  "ldr  r5,=0x00800080    @ r5 = rounding   \n"
867
  "ldr  r6,=0x00FF00FF    @ r7 = 0x00FF00FF \n"
868
  "cmp  r12,#0      @ if (flip)   \n"
869
  "beq  4f      @ {     \n"
870
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
871
  "add  r0, r0, r3, LSL #2  @ dst += 4*count  \n"
872
  "1:             \n"
873
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
874
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
875
  "mov  r7, r5      @ r7 = b = rounding \n"
876
  "mov  r8, r5      @ r8 = a = rounding \n"
877
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
878
  "cmp  r9, #0      @ while (len-- > 0) \n"
879
  "beq  3f      @ {     \n"
880
  "2:             \n"
881
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
882
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
883
  "subs r9, r9, #1    @ r9 = len--    \n"
884
  "and  r12,r6, r11   @ r12 = __22__00  \n"
885
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
886
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
887
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
888
  "bgt  2b      @ }     \n"
889
  "3:             \n"
890
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
891
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
892
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
893
  "str  r7, [r0, #-4]!    @ *--dst=r    \n"
894
  "subs r3, r3, #1    @ i--     \n"
895
  "bgt  1b      @       \n"
896
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
897
  "4:             \n"
898
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
899
  "5:             \n"
900
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
901
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
902
  "mov  r7, r5      @ r7 = b = rounding \n"
903
  "mov  r8, r5      @ r8 = a = rounding \n"
904
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
905
  "cmp  r9, #0      @ while (len-- > 0) \n"
906
  "beq  7f      @ {     \n"
907
  "6:             \n"
908
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
909
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
910
  "subs r9, r9, #1    @ r9 = len--    \n"
911
  "and  r12,r6, r11   @ r12 = __22__00  \n"
912
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
913
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
914
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
915
  "bgt  6b      @ }     \n"
916
  "7:             \n"
917
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
918
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
919
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
920
  "str  r7, [r0], #4    @ *dst++=r    \n"
921
  "subs r3, r3, #1    @ i--     \n"
922
  "bgt  5b      @       \n"
923
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
924
  ENTER_THUMB
925
  );
926
}
927
928
static void
929
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
930
{
931
  asm volatile(
932
  ENTER_ARM
933
  "stmfd  r13!,{r4-r11,r14}       \n"
934
  "@ r0 = dst           \n"
935
  "@ r1 = src           \n"
936
  "@ r2 = &weights->index[0]        \n"
937
  "@ r3 = width           \n"
938
  "@ r12= row           \n"
939
  "ldr  r14,[r13,#4*9]    @ r14= n    \n"
940
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
941
  "add  r2, r2, #24   @ r2 = weights->index \n"
942
  "mul    r3, r14, r3   @ r3 = width *= n       \n"
943
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
944
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
945
  "subs r6, r3, #4    @ r6 = x = width-4  \n"
946
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
947
  "       @ r14= len = *contrib \n"
948
  "blt  4f      @ while (x >= 0) {  \n"
949
#ifndef ARCH_UNALIGNED_OK
950
  "tst  r3, #3      @ if ((r3 & 3)    \n"
951
  "tsteq  r1, #3      @ || (r1 & 3))  \n"
952
  "bne  4f      @ can't do fast code  \n"
953
#endif
954
  "ldr  r9, =0x00FF00FF   @ r9 = 0x00FF00FF \n"
955
  "1:             \n"
956
  "ldr  r7, =0x00800080   @ r5 = val0 = round \n"
957
  "stmfd  r13!,{r1,r2,r7}   @ stash r1,r2,r5  \n"
958
  "       @ r1 = min = src  \n"
959
  "       @ r2 = contrib2-4 \n"
960
  "movs r8, r14     @ r8 = len2 = len \n"
961
  "mov  r5, r7      @ r7 = val1 = round \n"
962
  "ble  3f      @ while (len2-- > 0) {  \n"
963
  "2:             \n"
964
  "ldr  r12,[r1], r3    @ r12 = *min  r5 = min += width\n"
965
  "ldr  r10,[r2, #4]!   @ r10 = *contrib2++ \n"
966
  "subs r8, r8, #1    @ len2--    \n"
967
  "and  r11,r9, r12   @ r11= __22__00   \n"
968
  "and  r12,r9, r12,LSR #8  @ r12= __33__11   \n"
969
  "mla  r5, r10,r11,r5    @ r5 = val0 += r11 * r10\n"
970
  "mla  r7, r10,r12,r7    @ r7 = val1 += r12 * r10\n"
971
  "bgt  2b      @ }     \n"
972
  "and  r5, r9, r5, LSR #8  @ r5 = __22__00   \n"
973
  "and  r7, r7, r9, LSL #8  @ r7 = 33__11__   \n"
974
  "orr  r5, r5, r7    @ r5 = 33221100   \n"
975
  "3:             \n"
976
  "ldmfd  r13!,{r1,r2,r7}   @ restore r1,r2,r7  \n"
977
  "subs r6, r6, #4    @ x--     \n"
978
  "add  r1, r1, #4    @ src++     \n"
979
  "str  r5, [r0], #4    @ *dst++ = val    \n"
980
  "bge  1b      @       \n"
981
  "4:       @ } (Less than 4 to go) \n"
982
  "adds r6, r6, #4    @ r6 = x += 4   \n"
983
  "beq  8f      @ if (x == 0) done  \n"
984
  "5:             \n"
985
  "mov  r5, r1      @ r5 = min = src  \n"
986
  "mov  r7, #128    @ r7 = val = 128  \n"
987
  "movs r8, r14     @ r8 = len2 = len \n"
988
  "add  r9, r2, #4    @ r9 = contrib2   \n"
989
  "ble  7f      @ while (len2-- > 0) {  \n"
990
  "6:             \n"
991
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
992
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
993
  "subs r8, r8, #1    @ len2--    \n"
994
  "@ stall r12            \n"
995
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
996
  "bgt  6b      @ }     \n"
997
  "7:             \n"
998
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
999
  "subs r6, r6, #1    @ x--     \n"
1000
  "add  r1, r1, #1    @ src++     \n"
1001
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1002
  "bgt  5b      @       \n"
1003
  "8:             \n"
1004
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1005
  ".ltorg             \n"
1006
  ENTER_THUMB
1007
  );
1008
}
1009
1010
static void
1011
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
1012
{
1013
  asm volatile(
1014
  ENTER_ARM
1015
  "stmfd  r13!,{r4-r11,r14}       \n"
1016
  "mov  r11,#255    @ r11= 255    \n"
1017
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
1018
  "@ r0 = dst           \n"
1019
  "@ r1 = src           \n"
1020
  "@ r2 = &weights->index[0]        \n"
1021
  "@ r3 = width           \n"
1022
  "@ r11= 255           \n"
1023
  "@ r12= row           \n"
1024
  "add  r2, r2, #24   @ r2 = weights->index \n"
1025
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
1026
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
1027
  "mov  r6, r3      @ r6 = x = width  \n"
1028
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
1029
  "       @ r14= len = *contrib \n"
1030
  "5:             \n"
1031
  "ldr  r4,[r13,#4*9]   @ r10= nn = n   \n"
1032
  "1:             \n"
1033
  "mov  r5, r1      @ r5 = min = src  \n"
1034
  "mov  r7, #128    @ r7 = val = 128  \n"
1035
  "movs r8, r14     @ r8 = len2 = len \n"
1036
  "add  r9, r2, #4    @ r9 = contrib2   \n"
1037
  "ble  7f      @ while (len2-- > 0) {  \n"
1038
  "6:             \n"
1039
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
1040
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
1041
  "subs r8, r8, #1    @ len2--    \n"
1042
  "@ stall r12            \n"
1043
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
1044
  "bgt  6b      @ }     \n"
1045
  "7:             \n"
1046
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
1047
  "subs r4, r4, #1    @ r4 = nn--   \n"
1048
  "add  r1, r1, #1    @ src++     \n"
1049
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1050
  "bgt  1b      @       \n"
1051
  "subs r6, r6, #1    @ x--     \n"
1052
  "strb r11,[r0], #1    @ *dst++ = 255    \n"
1053
  "bgt  5b      @       \n"
1054
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1055
  ".ltorg             \n"
1056
  ENTER_THUMB
1057
  );
1058
}
1059
#else
1060
1061
static void
1062
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1063
3.54M
{
1064
3.54M
  const int *contrib = &weights->index[weights->index[0]];
1065
3.54M
  int len, i;
1066
3.54M
  const unsigned char *min;
1067
1068
3.54M
  assert(weights->n == 1);
1069
3.54M
  if (weights->flip)
1070
84.9k
  {
1071
84.9k
    dst += weights->count;
1072
1.98M
    for (i=weights->count; i > 0; i--)
1073
1.89M
    {
1074
1.89M
      int val = 128;
1075
1.89M
      min = &src[*contrib++];
1076
1.89M
      len = *contrib++;
1077
6.62M
      while (len-- > 0)
1078
4.72M
      {
1079
4.72M
        val += *min++ * *contrib++;
1080
4.72M
      }
1081
1.89M
      *--dst = (unsigned char)(val>>8);
1082
1.89M
    }
1083
84.9k
  }
1084
3.46M
  else
1085
3.46M
  {
1086
734M
    for (i=weights->count; i > 0; i--)
1087
730M
    {
1088
730M
      int val = 128;
1089
730M
      min = &src[*contrib++];
1090
730M
      len = *contrib++;
1091
2.67G
      while (len-- > 0)
1092
1.94G
      {
1093
1.94G
        val += *min++ * *contrib++;
1094
1.94G
      }
1095
730M
      *dst++ = (unsigned char)(val>>8);
1096
730M
    }
1097
3.46M
  }
1098
3.54M
}
1099
1100
static void
1101
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1102
1.29k
{
1103
1.29k
  const int *contrib = &weights->index[weights->index[0]];
1104
1.29k
  int len, i;
1105
1.29k
  const unsigned char *min;
1106
1107
1.29k
  assert(weights->n == 2);
1108
1.29k
  if (weights->flip)
1109
0
  {
1110
0
    dst += 2*weights->count;
1111
0
    for (i=weights->count; i > 0; i--)
1112
0
    {
1113
0
      int c1 = 128;
1114
0
      int c2 = 128;
1115
0
      min = &src[2 * *contrib++];
1116
0
      len = *contrib++;
1117
0
      while (len-- > 0)
1118
0
      {
1119
0
        c1 += *min++ * *contrib;
1120
0
        c2 += *min++ * *contrib++;
1121
0
      }
1122
0
      *--dst = (unsigned char)(c2>>8);
1123
0
      *--dst = (unsigned char)(c1>>8);
1124
0
    }
1125
0
  }
1126
1.29k
  else
1127
1.29k
  {
1128
1.19M
    for (i=weights->count; i > 0; i--)
1129
1.19M
    {
1130
1.19M
      int c1 = 128;
1131
1.19M
      int c2 = 128;
1132
1.19M
      min = &src[2 * *contrib++];
1133
1.19M
      len = *contrib++;
1134
4.29M
      while (len-- > 0)
1135
3.10M
      {
1136
3.10M
        c1 += *min++ * *contrib;
1137
3.10M
        c2 += *min++ * *contrib++;
1138
3.10M
      }
1139
1.19M
      *dst++ = (unsigned char)(c1>>8);
1140
1.19M
      *dst++ = (unsigned char)(c2>>8);
1141
1.19M
    }
1142
1.29k
  }
1143
1.29k
}
1144
1145
static void
1146
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1147
1.64M
{
1148
1.64M
  const int *contrib = &weights->index[weights->index[0]];
1149
1.64M
  int len, i;
1150
1.64M
  const unsigned char *min;
1151
1152
1.64M
  assert(weights->n == 3);
1153
1.64M
  if (weights->flip)
1154
4.15k
  {
1155
4.15k
    dst += 3*weights->count;
1156
746k
    for (i=weights->count; i > 0; i--)
1157
742k
    {
1158
742k
      int c1 = 128;
1159
742k
      int c2 = 128;
1160
742k
      int c3 = 128;
1161
742k
      min = &src[3 * *contrib++];
1162
742k
      len = *contrib++;
1163
3.06M
      while (len-- > 0)
1164
2.31M
      {
1165
2.31M
        int c = *contrib++;
1166
2.31M
        c1 += *min++ * c;
1167
2.31M
        c2 += *min++ * c;
1168
2.31M
        c3 += *min++ * c;
1169
2.31M
      }
1170
742k
      *--dst = (unsigned char)(c3>>8);
1171
742k
      *--dst = (unsigned char)(c2>>8);
1172
742k
      *--dst = (unsigned char)(c1>>8);
1173
742k
    }
1174
4.15k
  }
1175
1.63M
  else
1176
1.63M
  {
1177
307M
    for (i=weights->count; i > 0; i--)
1178
306M
    {
1179
306M
      int c1 = 128;
1180
306M
      int c2 = 128;
1181
306M
      int c3 = 128;
1182
306M
      min = &src[3 * *contrib++];
1183
306M
      len = *contrib++;
1184
1.12G
      while (len-- > 0)
1185
814M
      {
1186
814M
        int c = *contrib++;
1187
814M
        c1 += *min++ * c;
1188
814M
        c2 += *min++ * c;
1189
814M
        c3 += *min++ * c;
1190
814M
      }
1191
306M
      *dst++ = (unsigned char)(c1>>8);
1192
306M
      *dst++ = (unsigned char)(c2>>8);
1193
306M
      *dst++ = (unsigned char)(c3>>8);
1194
306M
    }
1195
1.63M
  }
1196
1.64M
}
1197
1198
static void
1199
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1200
77.5k
{
1201
77.5k
  const int *contrib = &weights->index[weights->index[0]];
1202
77.5k
  int len, i;
1203
77.5k
  const unsigned char *min;
1204
1205
77.5k
  assert(weights->n == 4);
1206
77.5k
  if (weights->flip)
1207
0
  {
1208
0
    dst += 4*weights->count;
1209
0
    for (i=weights->count; i > 0; i--)
1210
0
    {
1211
0
      int r = 128;
1212
0
      int g = 128;
1213
0
      int b = 128;
1214
0
      int a = 128;
1215
0
      min = &src[4 * *contrib++];
1216
0
      len = *contrib++;
1217
0
      while (len-- > 0)
1218
0
      {
1219
0
        r += *min++ * *contrib;
1220
0
        g += *min++ * *contrib;
1221
0
        b += *min++ * *contrib;
1222
0
        a += *min++ * *contrib++;
1223
0
      }
1224
0
      *--dst = (unsigned char)(a>>8);
1225
0
      *--dst = (unsigned char)(b>>8);
1226
0
      *--dst = (unsigned char)(g>>8);
1227
0
      *--dst = (unsigned char)(r>>8);
1228
0
    }
1229
0
  }
1230
77.5k
  else
1231
77.5k
  {
1232
18.2M
    for (i=weights->count; i > 0; i--)
1233
18.1M
    {
1234
18.1M
      int r = 128;
1235
18.1M
      int g = 128;
1236
18.1M
      int b = 128;
1237
18.1M
      int a = 128;
1238
18.1M
      min = &src[4 * *contrib++];
1239
18.1M
      len = *contrib++;
1240
67.2M
      while (len-- > 0)
1241
49.0M
      {
1242
49.0M
        r += *min++ * *contrib;
1243
49.0M
        g += *min++ * *contrib;
1244
49.0M
        b += *min++ * *contrib;
1245
49.0M
        a += *min++ * *contrib++;
1246
49.0M
      }
1247
18.1M
      *dst++ = (unsigned char)(r>>8);
1248
18.1M
      *dst++ = (unsigned char)(g>>8);
1249
18.1M
      *dst++ = (unsigned char)(b>>8);
1250
18.1M
      *dst++ = (unsigned char)(a>>8);
1251
18.1M
    }
1252
77.5k
  }
1253
77.5k
}
1254
1255
static void
1256
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1257
3.35M
{
1258
3.35M
  const int *contrib = &weights->index[weights->index[row]];
1259
3.35M
  int len, x;
1260
3.35M
  int width = w * n;
1261
1262
3.35M
  contrib++; /* Skip min */
1263
3.35M
  len = *contrib++;
1264
1.28G
  for (x=width; x > 0; x--)
1265
1.27G
  {
1266
1.27G
    const unsigned char *min = src;
1267
1.27G
    int val = 128;
1268
1.27G
    int len2 = len;
1269
1.27G
    const int *contrib2 = contrib;
1270
1271
5.21G
    while (len2-- > 0)
1272
3.93G
    {
1273
3.93G
      val += *min * *contrib2++;
1274
3.93G
      min += width;
1275
3.93G
    }
1276
1.27G
    *dst++ = (unsigned char)(val>>8);
1277
1.27G
    src++;
1278
1.27G
  }
1279
3.35M
}
1280
1281
static void
1282
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1283
2.24k
{
1284
2.24k
  const int *contrib = &weights->index[weights->index[row]];
1285
2.24k
  int len, x;
1286
2.24k
  int width = w * n;
1287
1288
2.24k
  contrib++; /* Skip min */
1289
2.24k
  len = *contrib++;
1290
19.4k
  for (x=w; x > 0; x--)
1291
17.2k
  {
1292
17.2k
    int nn;
1293
34.4k
    for (nn = n; nn > 0; nn--)
1294
17.2k
    {
1295
17.2k
      const unsigned char *min = src;
1296
17.2k
      int val = 128;
1297
17.2k
      int len2 = len;
1298
17.2k
      const int *contrib2 = contrib;
1299
1300
156k
      while (len2-- > 0)
1301
139k
      {
1302
139k
        val += *min * *contrib2++;
1303
139k
        min += width;
1304
139k
      }
1305
17.2k
      *dst++ = (unsigned char)(val>>8);
1306
17.2k
      src++;
1307
17.2k
    }
1308
17.2k
    *dst++ = 255;
1309
17.2k
  }
1310
2.24k
}
1311
#endif
1312
1313
#ifdef SINGLE_PIXEL_SPECIALS
1314
static void
1315
duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride)
1316
2
{
1317
2
  int i;
1318
1319
4
  for (i = n; i > 0; i--)
1320
2
    *dst++ = *src++;
1321
2
  if (forcealpha)
1322
0
    *dst++ = 255;
1323
2
  n += forcealpha;
1324
3
  for (i = w-1; i > 0; i--)
1325
1
  {
1326
1
    memcpy(dst, dst-n, n);
1327
1
    dst += n;
1328
1
  }
1329
2
  w *= n;
1330
2
  dst -= w;
1331
2
  h--;
1332
2
  while (h--)
1333
0
  {
1334
0
    memcpy(dst+stride, dst, w);
1335
0
    dst += stride;
1336
0
  }
1337
2
}
1338
1339
static void
1340
scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha)
1341
211
{
1342
211
  const int *contrib = &weights->index[weights->index[0]];
1343
211
  int min, len, i, j, n, nf;
1344
211
  int tmp[FZ_MAX_COLORS];
1345
1346
211
  n = weights->n;
1347
211
  nf = n + forcealpha;
1348
  /* Scale a single row */
1349
422
  for (j = 0; j < nf; j++)
1350
211
    tmp[j] = 128;
1351
211
  if (weights->flip)
1352
0
  {
1353
0
    dst += (weights->count-1)*nf;
1354
0
    for (i=weights->count; i > 0; i--)
1355
0
    {
1356
0
      min = *contrib++;
1357
0
      len = *contrib++;
1358
0
      min *= n;
1359
0
      while (len-- > 0)
1360
0
      {
1361
0
        int c = *contrib++;
1362
0
        for (j = 0; j < n; j++)
1363
0
          tmp[j] += src[min++] * c;
1364
0
        if (forcealpha)
1365
0
          tmp[j] += 255 * c;
1366
0
      }
1367
0
      for (j = 0; j < nf; j++)
1368
0
      {
1369
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1370
0
        tmp[j] = 128;
1371
0
      }
1372
0
      dst -= 2*nf;
1373
0
    }
1374
0
    dst += nf + dstride;
1375
0
  }
1376
211
  else
1377
211
  {
1378
530
    for (i=weights->count; i > 0; i--)
1379
319
    {
1380
319
      min = *contrib++;
1381
319
      len = *contrib++;
1382
319
      min *= n;
1383
7.95k
      while (len-- > 0)
1384
7.63k
      {
1385
7.63k
        int c = *contrib++;
1386
15.2k
        for (j = 0; j < n; j++)
1387
7.63k
          tmp[j] += src[min++] * c;
1388
7.63k
        if (forcealpha)
1389
0
          tmp[j] += 255 * c;
1390
7.63k
      }
1391
638
      for (j = 0; j < nf; j++)
1392
319
      {
1393
319
        *dst++ = (unsigned char)(tmp[j]>>8);
1394
319
        tmp[j] = 128;
1395
319
      }
1396
319
    }
1397
211
    dst += dstride - weights->count * nf;
1398
211
  }
1399
  /* And then duplicate it h times */
1400
211
  nf *= weights->count;
1401
215
  while (--h > 0)
1402
4
  {
1403
4
    memcpy(dst, dst-dstride, nf);
1404
4
    dst += dstride;
1405
4
  }
1406
211
}
1407
1408
static void
1409
scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha)
1410
35
{
1411
35
  const int *contrib = &weights->index[weights->index[0]];
1412
35
  int min, len, i, j;
1413
35
  int tmp[FZ_MAX_COLORS];
1414
35
  int nf = n + forcealpha;
1415
1416
124
  for (j = 0; j < nf; j++)
1417
89
    tmp[j] = 128;
1418
35
  if (weights->flip)
1419
0
  {
1420
0
    src_w = (src_w-1)*sstride;
1421
0
    for (i=weights->count; i > 0; i--)
1422
0
    {
1423
      /* Scale the next pixel in the column */
1424
0
      min = *contrib++;
1425
0
      len = *contrib++;
1426
0
      min = src_w-min*sstride;
1427
0
      while (len-- > 0)
1428
0
      {
1429
0
        int c = *contrib++;
1430
0
        for (j = 0; j < n; j++)
1431
0
          tmp[j] += src[min+j] * c;
1432
0
        if (forcealpha)
1433
0
          tmp[j] += 255 * c;
1434
0
        min -= sstride;
1435
0
      }
1436
0
      for (j = 0; j < nf; j++)
1437
0
      {
1438
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1439
0
        tmp[j] = 128;
1440
0
      }
1441
      /* And then duplicate it across the row */
1442
0
      for (j = (w-1)*nf; j > 0; j--)
1443
0
      {
1444
0
        *dst = dst[-nf];
1445
0
        dst++;
1446
0
      }
1447
0
      dst += dstride - w*nf;
1448
0
    }
1449
0
  }
1450
35
  else
1451
35
  {
1452
70
    for (i=weights->count; i > 0; i--)
1453
35
    {
1454
      /* Scale the next pixel in the column */
1455
35
      min = *contrib++;
1456
35
      len = *contrib++;
1457
35
      min *= sstride;
1458
27.0k
      while (len-- > 0)
1459
27.0k
      {
1460
27.0k
        int c = *contrib++;
1461
107k
        for (j = 0; j < n; j++)
1462
80.8k
          tmp[j] += src[min+j] * c;
1463
27.0k
        if (forcealpha)
1464
0
          tmp[j] += 255 * c;
1465
27.0k
        min += sstride;
1466
27.0k
      }
1467
124
      for (j = 0; j < nf; j++)
1468
89
      {
1469
89
        *dst++ = (unsigned char)(tmp[j]>>8);
1470
89
        tmp[j] = 128;
1471
89
      }
1472
      /* And then duplicate it across the row */
1473
35
      for (j = (w-1)*nf; j > 0; j--)
1474
0
      {
1475
0
        *dst = dst[-nf];
1476
0
        dst++;
1477
0
      }
1478
35
      dst += dstride - w*nf;
1479
35
    }
1480
35
  }
1481
35
}
1482
#endif /* SINGLE_PIXEL_SPECIALS */
1483
1484
static void
1485
get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp)
1486
1.26k
{
1487
1.26k
  const int *contrib = &rows->index[rows->index[0]];
1488
1.26k
  int len, i, t, b;
1489
1490
  /* Calculate the edge alpha values */
1491
1.26k
  contrib++; /* Skip min */
1492
1.26k
  len = *contrib++;
1493
1.26k
  t = 0;
1494
31.7k
  while (len--)
1495
30.4k
    t += *contrib++;
1496
3.22k
  for (i=rows->count-2; i > 0; i--)
1497
1.96k
  {
1498
1.96k
    contrib++; /* Skip min */
1499
1.96k
    len = *contrib++;
1500
1.96k
    contrib += len;
1501
1.96k
  }
1502
1.26k
  b = 0;
1503
1.26k
  if (i == 0)
1504
1.23k
  {
1505
1.23k
    contrib++;
1506
1.23k
    len = *contrib++;
1507
38.0k
    while (len--)
1508
36.8k
      b += *contrib++;
1509
1.23k
  }
1510
1.26k
  if (rows->flip && i == 0)
1511
69
  {
1512
69
    *tp = b;
1513
69
    *bp = t;
1514
69
  }
1515
1.19k
  else
1516
1.19k
  {
1517
1.19k
    *tp = t;
1518
1.19k
    *bp = b;
1519
1.19k
  }
1520
1.26k
}
1521
1522
static void
1523
adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols)
1524
630
{
1525
630
  int t, l, r, b, tl, tr, bl, br, x, y;
1526
630
  unsigned char *dp = pix->samples;
1527
630
  int w = pix->w;
1528
630
  int n = pix->n;
1529
630
  int span = w >= 2 ? (w-1)*n : 0;
1530
630
  int stride = pix->stride;
1531
1532
630
  get_alpha_edge_values(rows, &t, &b);
1533
630
  get_alpha_edge_values(cols, &l, &r);
1534
1535
630
  l = (255 * l + 128)>>8;
1536
630
  r = (255 * r + 128)>>8;
1537
630
  tl = (l * t + 128)>>8;
1538
630
  tr = (r * t + 128)>>8;
1539
630
  bl = (l * b + 128)>>8;
1540
630
  br = (r * b + 128)>>8;
1541
630
  t = (255 * t + 128)>>8;
1542
630
  b = (255 * b + 128)>>8;
1543
630
  dp += n-1;
1544
630
  *dp = tl;
1545
630
  dp += n;
1546
1.61k
  for (x = w-2; x > 0; x--)
1547
981
  {
1548
981
    *dp = t;
1549
981
    dp += n;
1550
981
  }
1551
630
  if (x == 0)
1552
607
  {
1553
607
    *dp = tr;
1554
607
    dp += n;
1555
607
  }
1556
630
  dp += stride - w*n;
1557
1.61k
  for (y = pix->h-2; y > 0; y--)
1558
982
  {
1559
982
    dp[span] = r;
1560
982
    *dp = l;
1561
982
    dp += stride;
1562
982
  }
1563
630
  if (y == 0)
1564
630
  {
1565
630
    *dp = bl;
1566
630
    dp += n;
1567
1.61k
    for (x = w-2; x > 0; x--)
1568
981
    {
1569
981
      *dp = b;
1570
981
      dp += n;
1571
981
    }
1572
630
    if (x == 0)
1573
607
    {
1574
607
      *dp = br;
1575
607
    }
1576
630
  }
1577
630
}
1578
1579
fz_pixmap *
1580
fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip)
1581
0
{
1582
0
  return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
1583
0
}
1584
1585
fz_pixmap *
1586
fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
1587
37.6k
{
1588
37.6k
  fz_scale_filter *filter = &fz_scale_filter_simple;
1589
37.6k
  fz_weights *contrib_rows = NULL;
1590
37.6k
  fz_weights *contrib_cols = NULL;
1591
37.6k
  fz_pixmap *output = NULL;
1592
37.6k
  unsigned char *temp = NULL;
1593
37.6k
  int max_row, temp_span, temp_rows, row;
1594
37.6k
  int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1595
37.6k
  int flip_x, flip_y, forcealpha;
1596
37.6k
  fz_rect patch;
1597
1598
37.6k
  fz_var(contrib_cols);
1599
37.6k
  fz_var(contrib_rows);
1600
1601
  /* Avoid extreme scales where overflows become problematic. */
1602
37.6k
  if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
1603
0
    return NULL;
1604
37.6k
  if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
1605
0
    return NULL;
1606
1607
  /* Clamp small ranges of w and h */
1608
37.6k
  if (w <= -1)
1609
8.49k
  {
1610
    /* Large negative range. Don't clamp */
1611
8.49k
  }
1612
29.1k
  else if (w < 0)
1613
0
  {
1614
0
    w = -1;
1615
0
  }
1616
29.1k
  else if (w < 1)
1617
568
  {
1618
568
    w = 1;
1619
568
  }
1620
37.6k
  if (h <= -1)
1621
1.52k
  {
1622
    /* Large negative range. Don't clamp */
1623
1.52k
  }
1624
36.1k
  else if (h < 0)
1625
67
  {
1626
67
    h = -1;
1627
67
  }
1628
36.0k
  else if (h < 1)
1629
522
  {
1630
522
    h = 1;
1631
522
  }
1632
1633
  /* If the src has an alpha, we'll make the dst have an alpha automatically.
1634
   * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */
1635
37.6k
  forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h);
1636
1637
  /* Find the destination bbox, width/height, and sub pixel offset,
1638
   * allowing for whether we're flipping or not. */
1639
  /* The (x,y) position given describes where the top left corner
1640
   * of the source image should be mapped to (i.e. where (0,0) in image
1641
   * space ends up). Also there are differences in the way we scale
1642
   * horizontally and vertically. When scaling rows horizontally, we
1643
   * always read forwards through the source, and store either forwards
1644
   * or in reverse as required. When scaling vertically, we always store
1645
   * out forwards, but may feed source rows in in a different order.
1646
   *
1647
   * Consider the image rectangle 'r' to which the image is mapped,
1648
   * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1649
   * complete pixels.
1650
   *
1651
   * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1652
   * the image is x flipped or not. Whatever happens 0 <= x < 1.
1653
   * y is always R.ymax - r.ymax.
1654
   */
1655
  /* dst_x_int is calculated to be the left of the scaled image, and
1656
   * x (the sub pixel offset) is the distance in from either the left
1657
   * or right pixel expanded edge. */
1658
37.6k
  flip_x = (w < 0);
1659
37.6k
  if (flip_x)
1660
8.49k
  {
1661
8.49k
    float tmp;
1662
8.49k
    w = -w;
1663
8.49k
    dst_x_int = floorf(x-w);
1664
8.49k
    tmp = ceilf(x);
1665
8.49k
    dst_w_int = (int)tmp;
1666
8.49k
    x = tmp - x;
1667
8.49k
    dst_w_int -= dst_x_int;
1668
8.49k
  }
1669
29.1k
  else
1670
29.1k
  {
1671
29.1k
    dst_x_int = floorf(x);
1672
29.1k
    x -= dst_x_int;
1673
29.1k
    dst_w_int = (int)ceilf(x + w);
1674
29.1k
  }
1675
  /* dst_y_int is calculated to be the top of the scaled image, and
1676
   * y (the sub pixel offset) is the distance in from either the top
1677
   * or bottom pixel expanded edge.
1678
   */
1679
37.6k
  flip_y = (h < 0);
1680
37.6k
  if (flip_y)
1681
1.59k
  {
1682
1.59k
    float tmp;
1683
1.59k
    h = -h;
1684
1.59k
    dst_y_int = floorf(y-h);
1685
1.59k
    tmp = ceilf(y);
1686
1.59k
    dst_h_int = (int)tmp;
1687
1.59k
    y = tmp - y;
1688
1.59k
    dst_h_int -= dst_y_int;
1689
1.59k
  }
1690
36.0k
  else
1691
36.0k
  {
1692
36.0k
    dst_y_int = floorf(y);
1693
36.0k
    y -= dst_y_int;
1694
36.0k
    dst_h_int = (int)ceilf(y + h);
1695
36.0k
  }
1696
1697
37.6k
  fz_valgrind_pixmap(src);
1698
1699
  /* Step 0: Calculate the patch */
1700
37.6k
  patch.x0 = 0;
1701
37.6k
  patch.y0 = 0;
1702
37.6k
  patch.x1 = dst_w_int;
1703
37.6k
  patch.y1 = dst_h_int;
1704
37.6k
  if (clip)
1705
30.3k
  {
1706
30.3k
    if (flip_x)
1707
8.49k
    {
1708
8.49k
      if (dst_x_int + dst_w_int > clip->x1)
1709
97
        patch.x0 = dst_x_int + dst_w_int - clip->x1;
1710
8.49k
      if (clip->x0 > dst_x_int)
1711
8
      {
1712
8
        patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
1713
8
        dst_x_int = clip->x0;
1714
8
      }
1715
8.49k
    }
1716
21.8k
    else
1717
21.8k
    {
1718
21.8k
      if (dst_x_int + dst_w_int > clip->x1)
1719
914
        patch.x1 = clip->x1 - dst_x_int;
1720
21.8k
      if (clip->x0 > dst_x_int)
1721
702
      {
1722
702
        patch.x0 = clip->x0 - dst_x_int;
1723
702
        dst_x_int += patch.x0;
1724
702
      }
1725
21.8k
    }
1726
1727
30.3k
    if (flip_y)
1728
1.59k
    {
1729
1.59k
      if (dst_y_int + dst_h_int > clip->y1)
1730
19
        patch.y1 = clip->y1 - dst_y_int;
1731
1.59k
      if (clip->y0 > dst_y_int)
1732
32
      {
1733
32
        patch.y0 = clip->y0 - dst_y_int;
1734
32
        dst_y_int = clip->y0;
1735
32
      }
1736
1.59k
    }
1737
28.7k
    else
1738
28.7k
    {
1739
28.7k
      if (dst_y_int + dst_h_int > clip->y1)
1740
1.77k
        patch.y1 = clip->y1 - dst_y_int;
1741
28.7k
      if (clip->y0 > dst_y_int)
1742
882
      {
1743
882
        patch.y0 = clip->y0 - dst_y_int;
1744
882
        dst_y_int += patch.y0;
1745
882
      }
1746
28.7k
    }
1747
30.3k
  }
1748
37.6k
  if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
1749
155
    return NULL;
1750
1751
75.0k
  fz_try(ctx)
1752
75.0k
  {
1753
    /* Step 1: Calculate the weights for columns and rows */
1754
37.5k
#ifdef SINGLE_PIXEL_SPECIALS
1755
37.5k
    if (src->w == 1)
1756
37
      contrib_cols = NULL;
1757
37.4k
    else
1758
37.4k
#endif /* SINGLE_PIXEL_SPECIALS */
1759
37.4k
      contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols");
1760
37.5k
#ifdef SINGLE_PIXEL_SPECIALS
1761
37.5k
    if (src->h == 1)
1762
213
      contrib_rows = NULL;
1763
37.3k
    else
1764
37.3k
#endif /* SINGLE_PIXEL_SPECIALS */
1765
37.3k
      contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows");
1766
1767
37.5k
    output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha);
1768
37.5k
  }
1769
75.0k
  fz_catch(ctx)
1770
0
  {
1771
0
    if (!cache_x)
1772
0
      fz_free(ctx, contrib_cols);
1773
0
    if (!cache_y)
1774
0
      fz_free(ctx, contrib_rows);
1775
0
    fz_rethrow(ctx);
1776
0
  }
1777
37.5k
  output->x = dst_x_int;
1778
37.5k
  output->y = dst_y_int;
1779
1780
  /* Step 2: Apply the weights */
1781
37.5k
#ifdef SINGLE_PIXEL_SPECIALS
1782
37.5k
  if (!contrib_rows)
1783
213
  {
1784
    /* Only 1 source pixel high. */
1785
213
    if (!contrib_cols)
1786
2
    {
1787
      /* Only 1 pixel in the entire image! */
1788
2
      duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride);
1789
2
      fz_valgrind_pixmap(output);
1790
2
    }
1791
211
    else
1792
211
    {
1793
      /* Scale the row once, then copy it. */
1794
211
      scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha);
1795
211
      fz_valgrind_pixmap(output);
1796
211
    }
1797
213
  }
1798
37.3k
  else if (!contrib_cols)
1799
35
  {
1800
    /* Only 1 source pixel wide. Scale the col and duplicate. */
1801
35
    scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha);
1802
35
    fz_valgrind_pixmap(output);
1803
35
  }
1804
37.2k
  else
1805
37.2k
#endif /* SINGLE_PIXEL_SPECIALS */
1806
37.2k
  {
1807
37.2k
    void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights);
1808
37.2k
    void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row);
1809
1810
37.2k
    temp_span = contrib_cols->count * src->n;
1811
37.2k
    temp_rows = contrib_rows->max_len;
1812
37.2k
    if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1813
0
      goto cleanup;
1814
74.5k
    fz_try(ctx)
1815
74.5k
    {
1816
37.2k
      temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char));
1817
37.2k
    }
1818
74.5k
    fz_catch(ctx)
1819
0
    {
1820
0
      fz_drop_pixmap(ctx, output);
1821
0
      if (!cache_x)
1822
0
        fz_free(ctx, contrib_cols);
1823
0
      if (!cache_y)
1824
0
        fz_free(ctx, contrib_rows);
1825
0
      fz_rethrow(ctx);
1826
0
    }
1827
37.2k
    switch (src->n)
1828
37.2k
    {
1829
0
    default:
1830
0
      row_scale_in = scale_row_to_temp;
1831
0
      break;
1832
29.0k
    case 1: /* Image mask case or Greyscale case */
1833
29.0k
      row_scale_in = scale_row_to_temp1;
1834
29.0k
      break;
1835
7
    case 2: /* Greyscale with alpha case */
1836
7
      row_scale_in = scale_row_to_temp2;
1837
7
      break;
1838
8.23k
    case 3: /* RGB case */
1839
8.23k
      row_scale_in = scale_row_to_temp3;
1840
8.23k
      break;
1841
36
    case 4: /* RGBA or CMYK case */
1842
36
      row_scale_in = scale_row_to_temp4;
1843
36
      break;
1844
37.2k
    }
1845
37.2k
    row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp;
1846
37.2k
    max_row = contrib_rows->index[contrib_rows->index[0]];
1847
3.39M
    for (row = 0; row < contrib_rows->count; row++)
1848
3.36M
    {
1849
      /*
1850
      Which source rows do we need to have scaled into the
1851
      temporary buffer in order to be able to do the final
1852
      scale?
1853
      */
1854
3.36M
      int row_index = contrib_rows->index[row];
1855
3.36M
      int row_min = contrib_rows->index[row_index++];
1856
3.36M
      int row_len = contrib_rows->index[row_index];
1857
8.63M
      while (max_row < row_min+row_len)
1858
5.27M
      {
1859
        /* Scale another row */
1860
5.27M
        assert(max_row < src->h);
1861
5.27M
        (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols);
1862
5.27M
        max_row++;
1863
5.27M
      }
1864
1865
3.36M
      (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row);
1866
3.36M
    }
1867
37.2k
    fz_free(ctx, temp);
1868
1869
37.2k
    if (forcealpha)
1870
630
      adjust_alpha_edges(output, contrib_rows, contrib_cols);
1871
1872
37.2k
    fz_valgrind_pixmap(output);
1873
37.2k
  }
1874
1875
37.5k
cleanup:
1876
37.5k
  if (!cache_y)
1877
0
    fz_free(ctx, contrib_rows);
1878
37.5k
  if (!cache_x)
1879
0
    fz_free(ctx, contrib_cols);
1880
1881
37.5k
  return output;
1882
37.5k
}
1883
1884
void
1885
fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
1886
36.5k
{
1887
36.5k
  if (!sc)
1888
0
    return;
1889
36.5k
  fz_free(ctx, sc->weights);
1890
36.5k
  fz_free(ctx, sc);
1891
36.5k
}
1892
1893
fz_scale_cache *
1894
fz_new_scale_cache(fz_context *ctx)
1895
36.5k
{
1896
36.5k
  return fz_malloc_struct(ctx, fz_scale_cache);
1897
36.5k
}