Coverage Report

Created: 2026-05-16 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/fitz/draw-scale-simple.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
/*
24
This code does smooth scaling of a pixmap.
25
26
This function returns a new pixmap representing the area starting at (0,0)
27
given by taking the source pixmap src, scaling it to width w, and height h,
28
and then positioning it at (frac(x),frac(y)).
29
30
This is a cut-down version of draw_scale.c that only copes with filters
31
that return values strictly in the 0..1 range, and uses bytes for
32
intermediate results rather than ints.
33
*/
34
35
#include "mupdf/fitz.h"
36
37
#include "draw-imp.h"
38
#include "pixmap-imp.h"
39
40
#include <math.h>
41
#include <string.h>
42
#include <assert.h>
43
#include <limits.h>
44
45
/* Do we special case handling of single pixel high/wide images? The
46
 * 'purest' handling is given by not special casing them, but certain
47
 * files that use such images 'stack' them to give full images. Not
48
 * special casing them results in then being fainter and giving noticeable
49
 * rounding errors.
50
 */
51
#define SINGLE_PIXEL_SPECIALS
52
53
/*
54
Consider a row of source samples, src, of width src_w, positioned at x,
55
scaled to width dst_w.
56
57
src[i] is centred at: x + (i + 0.5)*dst_w/src_w
58
59
Therefore the distance between the centre of the jth output pixel and
60
the centre of the ith source sample is:
61
62
dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
63
64
When scaling up, therefore:
65
66
dst[j] = SUM(filter(dist[j,i]) * src[i])
67
  (for all ints i)
68
69
This can be simplified by noticing that filters are only non zero within
70
a given filter width (henceforth called W). So:
71
72
dst[j] = SUM(filter(dist[j,i]) * src[i])
73
  (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
74
75
When scaling down, each filtered source sample is stretched to be wider
76
to avoid aliasing issues. This effectively reduces the distance between
77
centres.
78
79
dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
80
  (where F = dst_w/src_w)
81
  (for ints i, s.t. (j-W)/F < i < (j+W)/F)
82
83
*/
84
85
typedef struct fz_scale_filter
86
{
87
  int width;
88
  float (*fn)(struct fz_scale_filter *, float);
89
} fz_scale_filter;
90
91
/* Image scale filters */
92
93
static float
94
triangle(fz_scale_filter *filter, float f)
95
0
{
96
0
  if (f >= 1)
97
0
    return 0;
98
0
  return 1-f;
99
0
}
100
101
static float
102
box(fz_scale_filter *filter, float f)
103
0
{
104
0
  if (f >= 0.5f)
105
0
    return 0;
106
0
  return 1;
107
0
}
108
109
static float
110
simple(fz_scale_filter *filter, float x)
111
718k
{
112
718k
  if (x >= 1)
113
13.8k
    return 0;
114
704k
  return 1 + (2*x - 3)*x*x;
115
718k
}
116
117
fz_scale_filter fz_scale_filter_box = { 1, box };
118
fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
119
fz_scale_filter fz_scale_filter_simple = { 1, simple };
120
121
/*
122
We build ourselves a set of tables to contain the precalculated weights
123
for a given set of scale settings.
124
125
The first dst_w entries in index are the index into index of the
126
sets of weight for each destination pixel.
127
128
Each of the sets of weights is a set of values consisting of:
129
  the minimum source pixel index used for this destination pixel
130
  the number of weights used for this destination pixel
131
  the weights themselves
132
133
So to calculate dst[i] we do the following:
134
135
  weights = &index[index[i]];
136
  min = *weights++;
137
  len = *weights++;
138
  dst[i] = 0;
139
  while (--len > 0)
140
    dst[i] += src[min++] * *weights++
141
142
in addition, we guarantee that at the end of this process weights will now
143
point to the weights value for dst pixel i+1.
144
145
In the simplest version of this algorithm, we would scale the whole image
146
horizontally first into a temporary buffer, then scale that temporary
147
buffer again vertically to give us our result. Using such a simple
148
algorithm would mean that could use the same style of weights for both
149
horizontal and vertical scaling.
150
151
Unfortunately, this would also require a large temporary buffer,
152
particularly in the case where we are scaling up.
153
154
We therefore modify the algorithm as follows; we scale scanlines from the
155
source image horizontally into a temporary buffer, until we have all the
156
contributors for a given output scanline. We then produce that output
157
scanline from the temporary buffer. In this way we restrict the height
158
of the temporary buffer to a small fraction of the final size.
159
160
Unfortunately, this means that the pseudo code for recombining a
161
scanline of fully scaled pixels is as follows:
162
163
  weights = &index[index[y]];
164
  min = *weights++;
165
  len = *weights++;
166
  for (x=0 to dst_w)
167
    min2 = min
168
    len2 = len
169
    weights2 = weights
170
    dst[x] = 0;
171
    while (--len2 > 0)
172
      dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
173
174
i.e. it requires a % operation for every source pixel - this is typically
175
expensive.
176
177
To avoid this, we alter the order in which vertical weights are stored,
178
so that they are ordered in the same order as the temporary buffer lines
179
would appear. This simplifies the algorithm to:
180
181
  weights = &index[index[y]];
182
  min = *weights++;
183
  len = *weights++;
184
  for (x=0 to dst_w)
185
    min2 = 0
186
    len2 = len
187
    weights2 = weights
188
    dst[x] = 0;
189
    while (--len2 > 0)
190
      dst[x] += temp[i][min2++] * *weights2++
191
192
This means that len may be larger than it needs to be (due to the
193
possible inclusion of a zero weight row or two), but in practise this
194
is only an increase of 1 or 2 at worst.
195
196
We implement this by generating the weights as normal (but ensuring we
197
leave enough space) and then reordering afterwards.
198
199
*/
200
201
/* This structure is accessed from ARM code - bear this in mind before
202
 * altering it! */
203
typedef struct
204
{
205
  int flip; /* true if outputting reversed */
206
  int count;  /* number of output pixels we have records for in this table */
207
  int max_len;  /* Maximum number of weights for any one output pixel */
208
  int n;    /* number of components (src->n) */
209
  int new_line; /* True if no weights for the current output pixel */
210
  int patch_l;  /* How many output pixels we skip over */
211
  int index[FZ_FLEXIBLE_ARRAY];
212
} fz_weights;
213
214
struct fz_scale_cache
215
{
216
  int src_w;
217
  float x;
218
  float dst_w;
219
  fz_scale_filter *filter;
220
  int vertical;
221
  int dst_w_int;
222
  int patch_l;
223
  int patch_r;
224
  int n;
225
  int flip;
226
  fz_weights *weights;
227
};
228
229
static fz_weights *
230
new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
231
14.9k
{
232
14.9k
  int max_len;
233
14.9k
  fz_weights *weights;
234
235
14.9k
  if (src_w > dst_w)
236
14.9k
  {
237
    /* Scaling down, so there will be a maximum of
238
     * 2*filterwidth*src_w/dst_w src pixels
239
     * contributing to each dst pixel. */
240
14.9k
    max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
241
14.9k
    if (max_len > src_w)
242
2
      max_len = src_w;
243
14.9k
  }
244
0
  else
245
0
  {
246
    /* Scaling up, so there will be a maximum of
247
     * 2*filterwidth src pixels contributing to each dst pixel.
248
     */
249
0
    max_len = 2 * filter->width;
250
0
  }
251
  /* We need the size of the struct,
252
   * plus patch_w*sizeof(int) for the index
253
   * plus (2+max_len)*sizeof(int) for the weights
254
   * plus room for an extra set of weights for reordering.
255
   */
256
14.9k
  weights = fz_malloc_flexible(ctx, fz_weights, index, (max_len+3) * (patch_w+1));
257
14.9k
  if (!weights)
258
0
    return NULL;
259
14.9k
  weights->count = -1;
260
14.9k
  weights->max_len = max_len;
261
14.9k
  weights->index[0] = patch_w;
262
14.9k
  weights->n = n;
263
14.9k
  weights->patch_l = patch_l;
264
14.9k
  weights->flip = flip;
265
14.9k
  return weights;
266
14.9k
}
267
268
/* j is destination pixel in the patch_l..patch_l+patch_w range */
269
static void
270
init_weights(fz_weights *weights, int j)
271
188k
{
272
188k
  int index;
273
274
188k
  j -= weights->patch_l;
275
188k
  assert(weights->count == j-1);
276
188k
  weights->count++;
277
188k
  weights->new_line = 1;
278
188k
  if (j == 0)
279
14.9k
    index = weights->index[0];
280
173k
  else
281
173k
  {
282
173k
    index = weights->index[j-1];
283
173k
    index += 2 + weights->index[index+1];
284
173k
  }
285
188k
  weights->index[j] = index; /* row pointer */
286
188k
  weights->index[index] = 0; /* min */
287
188k
  weights->index[index+1] = 0; /* len */
288
188k
}
289
290
static void
291
insert_weight(fz_weights *weights, int j, int i, int weight)
292
665k
{
293
665k
  int min, len, index;
294
295
  /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
296
665k
  j -= weights->patch_l;
297
665k
  if (weights->new_line)
298
188k
  {
299
    /* New line */
300
188k
    weights->new_line = 0;
301
188k
    index = weights->index[j]; /* row pointer */
302
188k
    weights->index[index] = i; /* min */
303
188k
    weights->index[index+1] = 0; /* len */
304
188k
  }
305
665k
  index = weights->index[j];
306
665k
  min = weights->index[index++];
307
665k
  len = weights->index[index++];
308
665k
  while (i < min)
309
0
  {
310
    /* This only happens in rare cases, but we need to insert
311
     * one earlier. In exceedingly rare cases we may need to
312
     * insert more than one earlier. */
313
0
    int k;
314
315
0
    for (k = len; k > 0; k--)
316
0
    {
317
0
      weights->index[index+k] = weights->index[index+k-1];
318
0
    }
319
0
    weights->index[index] = 0;
320
0
    min--;
321
0
    len++;
322
0
    weights->index[index-2] = min;
323
0
    weights->index[index-1] = len;
324
0
  }
325
665k
  if (i-min >= len)
326
665k
  {
327
    /* The usual case */
328
665k
    while (i-min >= ++len)
329
0
    {
330
0
      weights->index[index+len-1] = 0;
331
0
    }
332
665k
    assert(len-1 == i-min);
333
665k
    weights->index[index+i-min] = weight;
334
665k
    weights->index[index-1] = len;
335
665k
    assert(len <= weights->max_len);
336
665k
  }
337
0
  else
338
0
  {
339
    /* Infrequent case */
340
0
    weights->index[index+i-min] += weight;
341
0
  }
342
665k
}
343
344
static void
345
add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
346
  float x, float F, float G, int src_w, float dst_w)
347
718k
{
348
718k
  float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
349
718k
  float f;
350
718k
  int weight;
351
352
718k
  dist *= G;
353
718k
  if (dist < 0)
354
356k
    dist = -dist;
355
718k
  f = filter->fn(filter, dist)*F;
356
718k
  weight = (int)(256*f+0.5f);
357
358
  /* Ensure i is in range */
359
718k
  if (i < 0 || i >= src_w)
360
24.8k
    return;
361
693k
  if (weight != 0)
362
665k
    insert_weight(weights, j, i, weight);
363
693k
}
364
365
static void
366
reorder_weights(fz_weights *weights, int j, int src_w)
367
116k
{
368
116k
  int idx = weights->index[j - weights->patch_l];
369
116k
  int min = weights->index[idx++];
370
116k
  int len = weights->index[idx++];
371
116k
  int max = weights->max_len;
372
116k
  int tmp = idx+max;
373
116k
  int i, off;
374
375
  /* Copy into the temporary area */
376
116k
  memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
377
378
  /* Pad out if required */
379
116k
  assert(len <= max);
380
116k
  assert(min+len <= src_w);
381
116k
  off = 0;
382
116k
  if (len < max)
383
98.0k
  {
384
98.0k
    memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
385
98.0k
    len = max;
386
98.0k
    if (min + len > src_w)
387
4.71k
    {
388
4.71k
      off = min + len - src_w;
389
4.71k
      min = src_w - len;
390
4.71k
      weights->index[idx-2] = min;
391
4.71k
    }
392
98.0k
    weights->index[idx-1] = len;
393
98.0k
  }
394
395
  /* Copy back into the proper places */
396
654k
  for (i = 0; i < len; i++)
397
537k
  {
398
537k
    weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
399
537k
  }
400
116k
}
401
402
/* Due to rounding and edge effects, the sums for the weights sometimes don't
403
 * add up to 256. This causes visible rendering effects. Therefore, we take
404
 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
405
 * 256 for all pixels that are completely covered. See bug #691629. */
406
static void
407
check_weights(fz_weights *weights, int j, int w, float x, float wf)
408
188k
{
409
188k
  int idx, len;
410
188k
  int sum = 0;
411
188k
  int max = -256;
412
188k
  int maxidx = 0;
413
188k
  int i;
414
415
188k
  idx = weights->index[j - weights->patch_l];
416
188k
  idx++; /* min */
417
188k
  len = weights->index[idx++];
418
419
854k
  for(i=0; i < len; i++)
420
665k
  {
421
665k
    int v = weights->index[idx++];
422
665k
    sum += v;
423
665k
    if (v > max)
424
402k
    {
425
402k
      max = v;
426
402k
      maxidx = idx;
427
402k
    }
428
665k
  }
429
  /* If we aren't the first or last pixel, OR if the sum is too big
430
   * then adjust it. */
431
188k
  if (((j != 0) && (j != w-1)) || (sum > 256))
432
164k
    weights->index[maxidx-1] += 256-sum;
433
  /* Otherwise, if we are the first pixel, and it's fully covered, then
434
   * adjust it. */
435
24.5k
  else if ((j == 0) && (x < 0.0001f) && (sum != 256))
436
14.2k
    weights->index[maxidx-1] += 256-sum;
437
  /* Finally, if we are the last pixel, and it's fully covered, then
438
   * adjust it. */
439
10.2k
  else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256))
440
10.0k
    weights->index[maxidx-1] += 256-sum;
441
188k
}
442
443
static int
444
window_fix(int l, int *rp, float window, float centre)
445
0
{
446
0
  int r = *rp;
447
0
  while (centre - l > window)
448
0
    l++;
449
0
  while (r - centre > window)
450
0
    r--;
451
0
  *rp = r;
452
0
  return l;
453
0
}
454
455
static fz_weights *
456
make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
457
15.0k
{
458
15.0k
  fz_weights *weights;
459
15.0k
  float F, G;
460
15.0k
  float window;
461
15.0k
  int j;
462
463
15.0k
  if (cache)
464
15.0k
  {
465
15.0k
    if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
466
10.6k
      cache->filter == filter && cache->vertical == vertical &&
467
10.6k
      cache->dst_w_int == dst_w_int &&
468
10.6k
      cache->patch_l == patch_l && cache->patch_r == patch_r &&
469
10.2k
      cache->n == n && cache->flip == flip)
470
84
    {
471
84
      return cache->weights;
472
84
    }
473
14.9k
    cache->src_w = src_w;
474
14.9k
    cache->x = x;
475
14.9k
    cache->dst_w = dst_w;
476
14.9k
    cache->filter = filter;
477
14.9k
    cache->vertical = vertical;
478
14.9k
    cache->dst_w_int = dst_w_int;
479
14.9k
    cache->patch_l = patch_l;
480
14.9k
    cache->patch_r = patch_r;
481
14.9k
    cache->n = n;
482
14.9k
    cache->flip = flip;
483
14.9k
    fz_free(ctx, cache->weights);
484
14.9k
    cache->weights = NULL;
485
14.9k
  }
486
487
14.9k
  if (dst_w < src_w)
488
14.9k
  {
489
    /* Scaling down */
490
14.9k
    F = dst_w / src_w;
491
14.9k
    G = 1;
492
14.9k
  }
493
0
  else
494
0
  {
495
    /* Scaling up */
496
0
    F = 1;
497
0
    G = src_w / dst_w;
498
0
  }
499
14.9k
  window = filter->width / F;
500
14.9k
  weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
501
14.9k
  if (!weights)
502
0
    return NULL;
503
203k
  for (j = patch_l; j < patch_r; j++)
504
188k
  {
505
    /* find the position of the centre of dst[j] in src space */
506
188k
    float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
507
188k
    int l, r;
508
188k
    l = ceilf(centre - window);
509
188k
    r = floorf(centre + window);
510
511
    /* Now, due to the vagaries of floating point, if centre is large, l
512
     * and r can actually end up further than 2*window apart. All we care
513
     * about in this case is that we don't crash! We want a cheap correction
514
     * that avoids the assert and doesn't cost too much in the normal case.
515
     * This should do. */
516
188k
    if (r - l > 2 * window)
517
0
      l = window_fix(l, &r, window, centre);
518
519
188k
    init_weights(weights, j);
520
907k
    for (; l <= r; l++)
521
718k
    {
522
718k
      add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
523
718k
    }
524
188k
    if (weights->new_line)
525
0
    {
526
      /* In very rare cases (bug 706764) we might not actually
527
       * have generated any non-zero weights for this destination
528
       * pixel. Just use the central pixel. */
529
0
      int src_x = floorf(centre);
530
0
      if (src_x >= src_w)
531
0
        src_x = src_w-1;
532
0
      if (src_x < 0)
533
0
        src_x = 0;
534
0
      insert_weight(weights, j, src_x, 1);
535
0
    }
536
188k
    check_weights(weights, j, dst_w_int, x, dst_w);
537
188k
    if (vertical)
538
116k
    {
539
116k
      reorder_weights(weights, j, src_w);
540
116k
    }
541
188k
  }
542
14.9k
  weights->count++; /* weights->count = dst_w_int now */
543
14.9k
  if (cache)
544
14.9k
  {
545
14.9k
    cache->weights = weights;
546
14.9k
  }
547
14.9k
  return weights;
548
14.9k
}
549
550
static void
551
scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
552
0
{
553
0
  const int *contrib = &weights->index[weights->index[0]];
554
0
  int len, i, j, n;
555
0
  const unsigned char *min;
556
0
  int tmp[FZ_MAX_COLORS];
557
0
  int *t = tmp;
558
559
0
  n = weights->n;
560
0
  for (j = 0; j < n; j++)
561
0
    tmp[j] = 128;
562
0
  if (weights->flip)
563
0
  {
564
0
    dst += (weights->count-1)*n;
565
0
    for (i=weights->count; i > 0; i--)
566
0
    {
567
0
      min = &src[n * *contrib++];
568
0
      len = *contrib++;
569
0
      while (len-- > 0)
570
0
      {
571
0
        for (j = n; j > 0; j--)
572
0
          *t++ += *min++ * *contrib;
573
0
        t -= n;
574
0
        contrib++;
575
0
      }
576
0
      for (j = n; j > 0; j--)
577
0
      {
578
0
        *dst++ = (unsigned char)(*t>>8);
579
0
        *t++ = 128;
580
0
      }
581
0
      t -= n;
582
0
      dst -= n*2;
583
0
    }
584
0
  }
585
0
  else
586
0
  {
587
0
    for (i=weights->count; i > 0; i--)
588
0
    {
589
0
      min = &src[n * *contrib++];
590
0
      len = *contrib++;
591
0
      while (len-- > 0)
592
0
      {
593
0
        for (j = n; j > 0; j--)
594
0
          *t++ += *min++ * *contrib;
595
0
        t -= n;
596
0
        contrib++;
597
0
      }
598
0
      for (j = n; j > 0; j--)
599
0
      {
600
0
        *dst++ = (unsigned char)(*t>>8);
601
0
        *t++ = 128;
602
0
      }
603
0
      t -= n;
604
0
    }
605
0
  }
606
0
}
607
608
#ifdef ARCH_ARM
609
610
static void
611
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
612
__attribute__((naked));
613
614
static void
615
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
616
__attribute__((naked));
617
618
static void
619
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
620
__attribute__((naked));
621
622
static void
623
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
624
__attribute__((naked));
625
626
static void
627
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
628
__attribute__((naked));
629
630
static void
631
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
632
__attribute__((naked));
633
634
static void
635
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
636
{
637
  asm volatile(
638
  ENTER_ARM
639
    ".syntax unified\n"
640
  "stmfd  r13!,{r4-r7,r9,r14}       \n"
641
  "@ r0 = dst           \n"
642
  "@ r1 = src           \n"
643
  "@ r2 = weights           \n"
644
  "ldr  r12,[r2],#4   @ r12= flip   \n"
645
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
646
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
647
  "cmp  r12,#0      @ if (flip)   \n"
648
  "beq  5f      @ {     \n"
649
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
650
  "add  r0, r0, r3    @ dst += count    \n"
651
  "1:             \n"
652
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
653
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
654
  "mov  r5, #128    @ r5 = a = 128    \n"
655
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
656
  "subs r9, r9, #1    @ len--     \n"
657
  "blt  3f      @ while (len >= 0)  \n"
658
  "2:       @ {     \n"
659
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
660
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
661
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
662
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
663
  "mlagt  r5, r6, r7, r5    @ g += r6 * r7    \n"
664
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
665
  "mla  r5, r12,r14,r5    @ g += r14 * r12  \n"
666
  "bge  2b      @ }     \n"
667
  "3:             \n"
668
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
669
  "strb r5,[r0, #-1]!   @ *--dst=a    \n"
670
  "subs r3, r3, #1    @ i--     \n"
671
  "bgt  1b      @       \n"
672
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
673
  "5:"
674
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
675
  "6:"
676
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
677
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
678
  "mov  r5, #128    @ r5 = a = 128    \n"
679
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
680
  "subs r9, r9, #1    @ len--     \n"
681
  "blt  9f      @ while (len > 0) \n"
682
  "7:       @ {     \n"
683
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
684
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
685
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
686
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
687
  "mlagt  r5, r6,r7,r5    @ a += r6 * r7    \n"
688
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
689
  "mla  r5, r12,r14,r5    @ a += r14 * r12  \n"
690
  "bge  7b      @ }     \n"
691
  "9:             \n"
692
  "mov  r5, r5, LSR #8    @ a >>= 8   \n"
693
  "strb r5, [r0], #1    @ *dst++=a    \n"
694
  "subs r3, r3, #1    @ i--     \n"
695
  "bgt  6b      @       \n"
696
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
697
  ENTER_THUMB
698
  );
699
}
700
701
static void
702
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
703
{
704
  asm volatile(
705
  ENTER_ARM
706
  "stmfd  r13!,{r4-r6,r9-r11,r14}       \n"
707
  "@ r0 = dst           \n"
708
  "@ r1 = src           \n"
709
  "@ r2 = weights           \n"
710
  "ldr  r12,[r2],#4   @ r12= flip   \n"
711
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
712
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
713
  "cmp  r12,#0      @ if (flip)   \n"
714
  "beq  4f      @ {     \n"
715
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
716
  "add  r0, r0, r3, LSL #1  @ dst += 2*count  \n"
717
  "1:             \n"
718
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
719
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
720
  "mov  r5, #128    @ r5 = g = 128    \n"
721
  "mov  r6, #128    @ r6 = a = 128    \n"
722
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
723
  "cmp  r9, #0      @ while (len-- > 0) \n"
724
  "beq  3f      @ {     \n"
725
  "2:             \n"
726
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
727
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
728
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
729
  "subs r9, r9, #1    @ r9 = len--    \n"
730
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
731
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
732
  "bgt  2b      @ }     \n"
733
  "3:             \n"
734
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
735
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
736
  "strb r5, [r0, #-2]!    @ *--dst=a    \n"
737
  "strb r6, [r0, #1]    @ *--dst=g    \n"
738
  "subs r3, r3, #1    @ i--     \n"
739
  "bgt  1b      @       \n"
740
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
741
  "4:"
742
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
743
  "5:"
744
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
745
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
746
  "mov  r5, #128    @ r5 = g = 128    \n"
747
  "mov  r6, #128    @ r6 = a = 128    \n"
748
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
749
  "cmp  r9, #0      @ while (len-- > 0) \n"
750
  "beq  7f      @ {     \n"
751
  "6:             \n"
752
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
753
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
754
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
755
  "subs r9, r9, #1    @ r9 = len--    \n"
756
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
757
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
758
  "bgt  6b      @ }     \n"
759
  "7:             \n"
760
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
761
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
762
  "strb r5, [r0], #1    @ *dst++=g    \n"
763
  "strb r6, [r0], #1    @ *dst++=a    \n"
764
  "subs r3, r3, #1    @ i--     \n"
765
  "bgt  5b      @       \n"
766
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
767
  ENTER_THUMB
768
  );
769
}
770
771
static void
772
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
773
{
774
  asm volatile(
775
  ENTER_ARM
776
  "stmfd  r13!,{r4-r11,r14}       \n"
777
  "@ r0 = dst           \n"
778
  "@ r1 = src           \n"
779
  "@ r2 = weights           \n"
780
  "ldr  r12,[r2],#4   @ r12= flip   \n"
781
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
782
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
783
  "cmp  r12,#0      @ if (flip)   \n"
784
  "beq  4f      @ {     \n"
785
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
786
  "add  r0, r0, r3, LSL #1  @     \n"
787
  "add  r0, r0, r3    @ dst += 3*count  \n"
788
  "1:             \n"
789
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
790
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
791
  "mov  r5, #128    @ r5 = r = 128    \n"
792
  "mov  r6, #128    @ r6 = g = 128    \n"
793
  "add  r7, r1, r4, LSL #1  @     \n"
794
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
795
  "mov  r7, #128    @ r7 = b = 128    \n"
796
  "cmp  r9, #0      @ while (len-- > 0) \n"
797
  "beq  3f      @ {     \n"
798
  "2:             \n"
799
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
800
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
801
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
802
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
803
  "subs r9, r9, #1    @ r9 = len--    \n"
804
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
805
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
806
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
807
  "bgt  2b      @ }     \n"
808
  "3:             \n"
809
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
810
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
811
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
812
  "strb r5, [r0, #-3]!    @ *--dst=r    \n"
813
  "strb r6, [r0, #1]    @ *--dst=g    \n"
814
  "strb r7, [r0, #2]    @ *--dst=b    \n"
815
  "subs r3, r3, #1    @ i--     \n"
816
  "bgt  1b      @       \n"
817
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
818
  "4:"
819
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
820
  "5:"
821
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
822
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
823
  "mov  r5, #128    @ r5 = r = 128    \n"
824
  "mov  r6, #128    @ r6 = g = 128    \n"
825
  "add  r7, r1, r4, LSL #1  @ r7 = min = &src[2*r4] \n"
826
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
827
  "mov  r7, #128    @ r7 = b = 128    \n"
828
  "cmp  r9, #0      @ while (len-- > 0) \n"
829
  "beq  7f      @ {     \n"
830
  "6:             \n"
831
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
832
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
833
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
834
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
835
  "subs r9, r9, #1    @ r9 = len--    \n"
836
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
837
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
838
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
839
  "bgt  6b      @ }     \n"
840
  "7:             \n"
841
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
842
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
843
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
844
  "strb r5, [r0], #1    @ *dst++=r    \n"
845
  "strb r6, [r0], #1    @ *dst++=g    \n"
846
  "strb r7, [r0], #1    @ *dst++=b    \n"
847
  "subs r3, r3, #1    @ i--     \n"
848
  "bgt  5b      @       \n"
849
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
850
  ENTER_THUMB
851
  );
852
}
853
854
static void
855
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
856
{
857
  asm volatile(
858
  ENTER_ARM
859
  "stmfd  r13!,{r4-r11,r14}       \n"
860
  "@ r0 = dst           \n"
861
  "@ r1 = src           \n"
862
  "@ r2 = weights           \n"
863
  "ldr  r12,[r2],#4   @ r12= flip   \n"
864
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
865
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
866
  "ldr  r5,=0x00800080    @ r5 = rounding   \n"
867
  "ldr  r6,=0x00FF00FF    @ r7 = 0x00FF00FF \n"
868
  "cmp  r12,#0      @ if (flip)   \n"
869
  "beq  4f      @ {     \n"
870
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
871
  "add  r0, r0, r3, LSL #2  @ dst += 4*count  \n"
872
  "1:             \n"
873
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
874
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
875
  "mov  r7, r5      @ r7 = b = rounding \n"
876
  "mov  r8, r5      @ r8 = a = rounding \n"
877
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
878
  "cmp  r9, #0      @ while (len-- > 0) \n"
879
  "beq  3f      @ {     \n"
880
  "2:             \n"
881
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
882
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
883
  "subs r9, r9, #1    @ r9 = len--    \n"
884
  "and  r12,r6, r11   @ r12 = __22__00  \n"
885
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
886
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
887
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
888
  "bgt  2b      @ }     \n"
889
  "3:             \n"
890
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
891
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
892
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
893
  "str  r7, [r0, #-4]!    @ *--dst=r    \n"
894
  "subs r3, r3, #1    @ i--     \n"
895
  "bgt  1b      @       \n"
896
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
897
  "4:             \n"
898
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
899
  "5:             \n"
900
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
901
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
902
  "mov  r7, r5      @ r7 = b = rounding \n"
903
  "mov  r8, r5      @ r8 = a = rounding \n"
904
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
905
  "cmp  r9, #0      @ while (len-- > 0) \n"
906
  "beq  7f      @ {     \n"
907
  "6:             \n"
908
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
909
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
910
  "subs r9, r9, #1    @ r9 = len--    \n"
911
  "and  r12,r6, r11   @ r12 = __22__00  \n"
912
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
913
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
914
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
915
  "bgt  6b      @ }     \n"
916
  "7:             \n"
917
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
918
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
919
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
920
  "str  r7, [r0], #4    @ *dst++=r    \n"
921
  "subs r3, r3, #1    @ i--     \n"
922
  "bgt  5b      @       \n"
923
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
924
  ENTER_THUMB
925
  );
926
}
927
928
static void
929
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
930
{
931
  asm volatile(
932
  ENTER_ARM
933
  "stmfd  r13!,{r4-r11,r14}       \n"
934
  "@ r0 = dst           \n"
935
  "@ r1 = src           \n"
936
  "@ r2 = &weights->index[0]        \n"
937
  "@ r3 = width           \n"
938
  "@ r12= row           \n"
939
  "ldr  r14,[r13,#4*9]    @ r14= n    \n"
940
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
941
  "add  r2, r2, #24   @ r2 = weights->index \n"
942
  "mul    r3, r14, r3   @ r3 = width *= n       \n"
943
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
944
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
945
  "subs r6, r3, #4    @ r6 = x = width-4  \n"
946
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
947
  "       @ r14= len = *contrib \n"
948
  "blt  4f      @ while (x >= 0) {  \n"
949
#ifndef ARCH_UNALIGNED_OK
950
  "tst  r3, #3      @ if ((r3 & 3)    \n"
951
  "tsteq  r1, #3      @ || (r1 & 3))  \n"
952
  "bne  4f      @ can't do fast code  \n"
953
#endif
954
  "ldr  r9, =0x00FF00FF   @ r9 = 0x00FF00FF \n"
955
  "1:             \n"
956
  "ldr  r7, =0x00800080   @ r5 = val0 = round \n"
957
  "stmfd  r13!,{r1,r2,r7}   @ stash r1,r2,r5  \n"
958
  "       @ r1 = min = src  \n"
959
  "       @ r2 = contrib2-4 \n"
960
  "movs r8, r14     @ r8 = len2 = len \n"
961
  "mov  r5, r7      @ r7 = val1 = round \n"
962
  "ble  3f      @ while (len2-- > 0) {  \n"
963
  "2:             \n"
964
  "ldr  r12,[r1], r3    @ r12 = *min  r5 = min += width\n"
965
  "ldr  r10,[r2, #4]!   @ r10 = *contrib2++ \n"
966
  "subs r8, r8, #1    @ len2--    \n"
967
  "and  r11,r9, r12   @ r11= __22__00   \n"
968
  "and  r12,r9, r12,LSR #8  @ r12= __33__11   \n"
969
  "mla  r5, r10,r11,r5    @ r5 = val0 += r11 * r10\n"
970
  "mla  r7, r10,r12,r7    @ r7 = val1 += r12 * r10\n"
971
  "bgt  2b      @ }     \n"
972
  "and  r5, r9, r5, LSR #8  @ r5 = __22__00   \n"
973
  "and  r7, r7, r9, LSL #8  @ r7 = 33__11__   \n"
974
  "orr  r5, r5, r7    @ r5 = 33221100   \n"
975
  "3:             \n"
976
  "ldmfd  r13!,{r1,r2,r7}   @ restore r1,r2,r7  \n"
977
  "subs r6, r6, #4    @ x--     \n"
978
  "add  r1, r1, #4    @ src++     \n"
979
  "str  r5, [r0], #4    @ *dst++ = val    \n"
980
  "bge  1b      @       \n"
981
  "4:       @ } (Less than 4 to go) \n"
982
  "adds r6, r6, #4    @ r6 = x += 4   \n"
983
  "beq  8f      @ if (x == 0) done  \n"
984
  "5:             \n"
985
  "mov  r5, r1      @ r5 = min = src  \n"
986
  "mov  r7, #128    @ r7 = val = 128  \n"
987
  "movs r8, r14     @ r8 = len2 = len \n"
988
  "add  r9, r2, #4    @ r9 = contrib2   \n"
989
  "ble  7f      @ while (len2-- > 0) {  \n"
990
  "6:             \n"
991
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
992
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
993
  "subs r8, r8, #1    @ len2--    \n"
994
  "@ stall r12            \n"
995
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
996
  "bgt  6b      @ }     \n"
997
  "7:             \n"
998
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
999
  "subs r6, r6, #1    @ x--     \n"
1000
  "add  r1, r1, #1    @ src++     \n"
1001
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1002
  "bgt  5b      @       \n"
1003
  "8:             \n"
1004
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1005
  ".ltorg             \n"
1006
  ENTER_THUMB
1007
  );
1008
}
1009
1010
static void
1011
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
1012
{
1013
  asm volatile(
1014
  ENTER_ARM
1015
  "stmfd  r13!,{r4-r11,r14}       \n"
1016
  "mov  r11,#255    @ r11= 255    \n"
1017
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
1018
  "@ r0 = dst           \n"
1019
  "@ r1 = src           \n"
1020
  "@ r2 = &weights->index[0]        \n"
1021
  "@ r3 = width           \n"
1022
  "@ r11= 255           \n"
1023
  "@ r12= row           \n"
1024
  "add  r2, r2, #24   @ r2 = weights->index \n"
1025
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
1026
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
1027
  "mov  r6, r3      @ r6 = x = width  \n"
1028
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
1029
  "       @ r14= len = *contrib \n"
1030
  "5:             \n"
1031
  "ldr  r4,[r13,#4*9]   @ r10= nn = n   \n"
1032
  "1:             \n"
1033
  "mov  r5, r1      @ r5 = min = src  \n"
1034
  "mov  r7, #128    @ r7 = val = 128  \n"
1035
  "movs r8, r14     @ r8 = len2 = len \n"
1036
  "add  r9, r2, #4    @ r9 = contrib2   \n"
1037
  "ble  7f      @ while (len2-- > 0) {  \n"
1038
  "6:             \n"
1039
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
1040
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
1041
  "subs r8, r8, #1    @ len2--    \n"
1042
  "@ stall r12            \n"
1043
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
1044
  "bgt  6b      @ }     \n"
1045
  "7:             \n"
1046
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
1047
  "subs r4, r4, #1    @ r4 = nn--   \n"
1048
  "add  r1, r1, #1    @ src++     \n"
1049
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1050
  "bgt  1b      @       \n"
1051
  "subs r6, r6, #1    @ x--     \n"
1052
  "strb r11,[r0], #1    @ *dst++ = 255    \n"
1053
  "bgt  5b      @       \n"
1054
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1055
  ".ltorg             \n"
1056
  ENTER_THUMB
1057
  );
1058
}
1059
#else
1060
1061
static void
1062
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1063
114k
{
1064
114k
  const int *contrib = &weights->index[weights->index[0]];
1065
114k
  int len, i;
1066
114k
  const unsigned char *min;
1067
1068
114k
  assert(weights->n == 1);
1069
114k
  if (weights->flip)
1070
0
  {
1071
0
    dst += weights->count;
1072
0
    for (i=weights->count; i > 0; i--)
1073
0
    {
1074
0
      int val = 128;
1075
0
      min = &src[*contrib++];
1076
0
      len = *contrib++;
1077
0
      while (len-- > 0)
1078
0
      {
1079
0
        val += *min++ * *contrib++;
1080
0
      }
1081
0
      *--dst = (unsigned char)(val>>8);
1082
0
    }
1083
0
  }
1084
114k
  else
1085
114k
  {
1086
2.57M
    for (i=weights->count; i > 0; i--)
1087
2.45M
    {
1088
2.45M
      int val = 128;
1089
2.45M
      min = &src[*contrib++];
1090
2.45M
      len = *contrib++;
1091
10.4M
      while (len-- > 0)
1092
8.02M
      {
1093
8.02M
        val += *min++ * *contrib++;
1094
8.02M
      }
1095
2.45M
      *dst++ = (unsigned char)(val>>8);
1096
2.45M
    }
1097
114k
  }
1098
114k
}
1099
1100
static void
1101
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1102
0
{
1103
0
  const int *contrib = &weights->index[weights->index[0]];
1104
0
  int len, i;
1105
0
  const unsigned char *min;
1106
1107
0
  assert(weights->n == 2);
1108
0
  if (weights->flip)
1109
0
  {
1110
0
    dst += 2*weights->count;
1111
0
    for (i=weights->count; i > 0; i--)
1112
0
    {
1113
0
      int c1 = 128;
1114
0
      int c2 = 128;
1115
0
      min = &src[2 * *contrib++];
1116
0
      len = *contrib++;
1117
0
      while (len-- > 0)
1118
0
      {
1119
0
        c1 += *min++ * *contrib;
1120
0
        c2 += *min++ * *contrib++;
1121
0
      }
1122
0
      *--dst = (unsigned char)(c2>>8);
1123
0
      *--dst = (unsigned char)(c1>>8);
1124
0
    }
1125
0
  }
1126
0
  else
1127
0
  {
1128
0
    for (i=weights->count; i > 0; i--)
1129
0
    {
1130
0
      int c1 = 128;
1131
0
      int c2 = 128;
1132
0
      min = &src[2 * *contrib++];
1133
0
      len = *contrib++;
1134
0
      while (len-- > 0)
1135
0
      {
1136
0
        c1 += *min++ * *contrib;
1137
0
        c2 += *min++ * *contrib++;
1138
0
      }
1139
0
      *dst++ = (unsigned char)(c1>>8);
1140
0
      *dst++ = (unsigned char)(c2>>8);
1141
0
    }
1142
0
  }
1143
0
}
1144
1145
static void
1146
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1147
122k
{
1148
122k
  const int *contrib = &weights->index[weights->index[0]];
1149
122k
  int len, i;
1150
122k
  const unsigned char *min;
1151
1152
122k
  assert(weights->n == 3);
1153
122k
  if (weights->flip)
1154
0
  {
1155
0
    dst += 3*weights->count;
1156
0
    for (i=weights->count; i > 0; i--)
1157
0
    {
1158
0
      int c1 = 128;
1159
0
      int c2 = 128;
1160
0
      int c3 = 128;
1161
0
      min = &src[3 * *contrib++];
1162
0
      len = *contrib++;
1163
0
      while (len-- > 0)
1164
0
      {
1165
0
        int c = *contrib++;
1166
0
        c1 += *min++ * c;
1167
0
        c2 += *min++ * c;
1168
0
        c3 += *min++ * c;
1169
0
      }
1170
0
      *--dst = (unsigned char)(c3>>8);
1171
0
      *--dst = (unsigned char)(c2>>8);
1172
0
      *--dst = (unsigned char)(c1>>8);
1173
0
    }
1174
0
  }
1175
122k
  else
1176
122k
  {
1177
3.56M
    for (i=weights->count; i > 0; i--)
1178
3.44M
    {
1179
3.44M
      int c1 = 128;
1180
3.44M
      int c2 = 128;
1181
3.44M
      int c3 = 128;
1182
3.44M
      min = &src[3 * *contrib++];
1183
3.44M
      len = *contrib++;
1184
14.3M
      while (len-- > 0)
1185
10.9M
      {
1186
10.9M
        int c = *contrib++;
1187
10.9M
        c1 += *min++ * c;
1188
10.9M
        c2 += *min++ * c;
1189
10.9M
        c3 += *min++ * c;
1190
10.9M
      }
1191
3.44M
      *dst++ = (unsigned char)(c1>>8);
1192
3.44M
      *dst++ = (unsigned char)(c2>>8);
1193
3.44M
      *dst++ = (unsigned char)(c3>>8);
1194
3.44M
    }
1195
122k
  }
1196
122k
}
1197
1198
static void
1199
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1200
0
{
1201
0
  const int *contrib = &weights->index[weights->index[0]];
1202
0
  int len, i;
1203
0
  const unsigned char *min;
1204
1205
0
  assert(weights->n == 4);
1206
0
  if (weights->flip)
1207
0
  {
1208
0
    dst += 4*weights->count;
1209
0
    for (i=weights->count; i > 0; i--)
1210
0
    {
1211
0
      int r = 128;
1212
0
      int g = 128;
1213
0
      int b = 128;
1214
0
      int a = 128;
1215
0
      min = &src[4 * *contrib++];
1216
0
      len = *contrib++;
1217
0
      while (len-- > 0)
1218
0
      {
1219
0
        r += *min++ * *contrib;
1220
0
        g += *min++ * *contrib;
1221
0
        b += *min++ * *contrib;
1222
0
        a += *min++ * *contrib++;
1223
0
      }
1224
0
      *--dst = (unsigned char)(a>>8);
1225
0
      *--dst = (unsigned char)(b>>8);
1226
0
      *--dst = (unsigned char)(g>>8);
1227
0
      *--dst = (unsigned char)(r>>8);
1228
0
    }
1229
0
  }
1230
0
  else
1231
0
  {
1232
0
    for (i=weights->count; i > 0; i--)
1233
0
    {
1234
0
      int r = 128;
1235
0
      int g = 128;
1236
0
      int b = 128;
1237
0
      int a = 128;
1238
0
      min = &src[4 * *contrib++];
1239
0
      len = *contrib++;
1240
0
      while (len-- > 0)
1241
0
      {
1242
0
        r += *min++ * *contrib;
1243
0
        g += *min++ * *contrib;
1244
0
        b += *min++ * *contrib;
1245
0
        a += *min++ * *contrib++;
1246
0
      }
1247
0
      *dst++ = (unsigned char)(r>>8);
1248
0
      *dst++ = (unsigned char)(g>>8);
1249
0
      *dst++ = (unsigned char)(b>>8);
1250
0
      *dst++ = (unsigned char)(a>>8);
1251
0
    }
1252
0
  }
1253
0
}
1254
1255
static void
1256
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1257
116k
{
1258
116k
  const int *contrib = &weights->index[weights->index[row]];
1259
116k
  int len, x;
1260
116k
  int width = w * n;
1261
1262
116k
  contrib++; /* Skip min */
1263
116k
  len = *contrib++;
1264
7.92M
  for (x=width; x > 0; x--)
1265
7.80M
  {
1266
7.80M
    const unsigned char *min = src;
1267
7.80M
    int val = 128;
1268
7.80M
    int len2 = len;
1269
7.80M
    const int *contrib2 = contrib;
1270
1271
36.9M
    while (len2-- > 0)
1272
29.1M
    {
1273
29.1M
      val += *min * *contrib2++;
1274
29.1M
      min += width;
1275
29.1M
    }
1276
7.80M
    *dst++ = (unsigned char)(val>>8);
1277
7.80M
    src++;
1278
7.80M
  }
1279
116k
}
1280
1281
static void
1282
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1283
0
{
1284
0
  const int *contrib = &weights->index[weights->index[row]];
1285
0
  int len, x;
1286
0
  int width = w * n;
1287
1288
0
  contrib++; /* Skip min */
1289
0
  len = *contrib++;
1290
0
  for (x=w; x > 0; x--)
1291
0
  {
1292
0
    int nn;
1293
0
    for (nn = n; nn > 0; nn--)
1294
0
    {
1295
0
      const unsigned char *min = src;
1296
0
      int val = 128;
1297
0
      int len2 = len;
1298
0
      const int *contrib2 = contrib;
1299
1300
0
      while (len2-- > 0)
1301
0
      {
1302
0
        val += *min * *contrib2++;
1303
0
        min += width;
1304
0
      }
1305
0
      *dst++ = (unsigned char)(val>>8);
1306
0
      src++;
1307
0
    }
1308
0
    *dst++ = 255;
1309
0
  }
1310
0
}
1311
#endif
1312
1313
#ifdef SINGLE_PIXEL_SPECIALS
1314
static void
1315
duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride)
1316
0
{
1317
0
  int i;
1318
1319
0
  for (i = n; i > 0; i--)
1320
0
    *dst++ = *src++;
1321
0
  if (forcealpha)
1322
0
    *dst++ = 255;
1323
0
  n += forcealpha;
1324
0
  for (i = w-1; i > 0; i--)
1325
0
  {
1326
0
    memcpy(dst, dst-n, n);
1327
0
    dst += n;
1328
0
  }
1329
0
  w *= n;
1330
0
  dst -= w;
1331
0
  h--;
1332
0
  while (h--)
1333
0
  {
1334
0
    memcpy(dst+stride, dst, w);
1335
0
    dst += stride;
1336
0
  }
1337
0
}
1338
1339
static void
1340
scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha)
1341
0
{
1342
0
  const int *contrib = &weights->index[weights->index[0]];
1343
0
  int min, len, i, j, n, nf;
1344
0
  int tmp[FZ_MAX_COLORS];
1345
1346
0
  n = weights->n;
1347
0
  nf = n + forcealpha;
1348
  /* Scale a single row */
1349
0
  for (j = 0; j < nf; j++)
1350
0
    tmp[j] = 128;
1351
0
  if (weights->flip)
1352
0
  {
1353
0
    dst += (weights->count-1)*nf;
1354
0
    for (i=weights->count; i > 0; i--)
1355
0
    {
1356
0
      min = *contrib++;
1357
0
      len = *contrib++;
1358
0
      min *= n;
1359
0
      while (len-- > 0)
1360
0
      {
1361
0
        int c = *contrib++;
1362
0
        for (j = 0; j < n; j++)
1363
0
          tmp[j] += src[min++] * c;
1364
0
        if (forcealpha)
1365
0
          tmp[j] += 255 * c;
1366
0
      }
1367
0
      for (j = 0; j < nf; j++)
1368
0
      {
1369
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1370
0
        tmp[j] = 128;
1371
0
      }
1372
0
      dst -= 2*nf;
1373
0
    }
1374
0
    dst += nf + dstride;
1375
0
  }
1376
0
  else
1377
0
  {
1378
0
    for (i=weights->count; i > 0; i--)
1379
0
    {
1380
0
      min = *contrib++;
1381
0
      len = *contrib++;
1382
0
      min *= n;
1383
0
      while (len-- > 0)
1384
0
      {
1385
0
        int c = *contrib++;
1386
0
        for (j = 0; j < n; j++)
1387
0
          tmp[j] += src[min++] * c;
1388
0
        if (forcealpha)
1389
0
          tmp[j] += 255 * c;
1390
0
      }
1391
0
      for (j = 0; j < nf; j++)
1392
0
      {
1393
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1394
0
        tmp[j] = 128;
1395
0
      }
1396
0
    }
1397
0
    dst += dstride - weights->count * nf;
1398
0
  }
1399
  /* And then duplicate it h times */
1400
0
  nf *= weights->count;
1401
0
  while (--h > 0)
1402
0
  {
1403
0
    memcpy(dst, dst-dstride, nf);
1404
0
    dst += dstride;
1405
0
  }
1406
0
}
1407
1408
static void
1409
scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha)
1410
0
{
1411
0
  const int *contrib = &weights->index[weights->index[0]];
1412
0
  int min, len, i, j;
1413
0
  int tmp[FZ_MAX_COLORS];
1414
0
  int nf = n + forcealpha;
1415
1416
0
  for (j = 0; j < nf; j++)
1417
0
    tmp[j] = 128;
1418
0
  if (weights->flip)
1419
0
  {
1420
0
    src_w = (src_w-1)*sstride;
1421
0
    for (i=weights->count; i > 0; i--)
1422
0
    {
1423
      /* Scale the next pixel in the column */
1424
0
      min = *contrib++;
1425
0
      len = *contrib++;
1426
0
      min = src_w-min*sstride;
1427
0
      while (len-- > 0)
1428
0
      {
1429
0
        int c = *contrib++;
1430
0
        for (j = 0; j < n; j++)
1431
0
          tmp[j] += src[min+j] * c;
1432
0
        if (forcealpha)
1433
0
          tmp[j] += 255 * c;
1434
0
        min -= sstride;
1435
0
      }
1436
0
      for (j = 0; j < nf; j++)
1437
0
      {
1438
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1439
0
        tmp[j] = 128;
1440
0
      }
1441
      /* And then duplicate it across the row */
1442
0
      for (j = (w-1)*nf; j > 0; j--)
1443
0
      {
1444
0
        *dst = dst[-nf];
1445
0
        dst++;
1446
0
      }
1447
0
      dst += dstride - w*nf;
1448
0
    }
1449
0
  }
1450
0
  else
1451
0
  {
1452
0
    for (i=weights->count; i > 0; i--)
1453
0
    {
1454
      /* Scale the next pixel in the column */
1455
0
      min = *contrib++;
1456
0
      len = *contrib++;
1457
0
      min *= sstride;
1458
0
      while (len-- > 0)
1459
0
      {
1460
0
        int c = *contrib++;
1461
0
        for (j = 0; j < n; j++)
1462
0
          tmp[j] += src[min+j] * c;
1463
0
        if (forcealpha)
1464
0
          tmp[j] += 255 * c;
1465
0
        min += sstride;
1466
0
      }
1467
0
      for (j = 0; j < nf; j++)
1468
0
      {
1469
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1470
0
        tmp[j] = 128;
1471
0
      }
1472
      /* And then duplicate it across the row */
1473
0
      for (j = (w-1)*nf; j > 0; j--)
1474
0
      {
1475
0
        *dst = dst[-nf];
1476
0
        dst++;
1477
0
      }
1478
0
      dst += dstride - w*nf;
1479
0
    }
1480
0
  }
1481
0
}
1482
#endif /* SINGLE_PIXEL_SPECIALS */
1483
1484
static void
1485
get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp)
1486
0
{
1487
0
  const int *contrib = &rows->index[rows->index[0]];
1488
0
  int len, i, t, b;
1489
1490
  /* Calculate the edge alpha values */
1491
0
  contrib++; /* Skip min */
1492
0
  len = *contrib++;
1493
0
  t = 0;
1494
0
  while (len--)
1495
0
    t += *contrib++;
1496
0
  for (i=rows->count-2; i > 0; i--)
1497
0
  {
1498
0
    contrib++; /* Skip min */
1499
0
    len = *contrib++;
1500
0
    contrib += len;
1501
0
  }
1502
0
  b = 0;
1503
0
  if (i == 0)
1504
0
  {
1505
0
    contrib++;
1506
0
    len = *contrib++;
1507
0
    while (len--)
1508
0
      b += *contrib++;
1509
0
  }
1510
0
  if (rows->flip && i == 0)
1511
0
  {
1512
0
    *tp = b;
1513
0
    *bp = t;
1514
0
  }
1515
0
  else
1516
0
  {
1517
0
    *tp = t;
1518
0
    *bp = b;
1519
0
  }
1520
0
}
1521
1522
static void
1523
adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols)
1524
0
{
1525
0
  int t, l, r, b, tl, tr, bl, br, x, y;
1526
0
  unsigned char *dp = pix->samples;
1527
0
  int w = pix->w;
1528
0
  int n = pix->n;
1529
0
  int span = w >= 2 ? (w-1)*n : 0;
1530
0
  int stride = pix->stride;
1531
1532
0
  get_alpha_edge_values(rows, &t, &b);
1533
0
  get_alpha_edge_values(cols, &l, &r);
1534
1535
0
  l = (255 * l + 128)>>8;
1536
0
  r = (255 * r + 128)>>8;
1537
0
  tl = (l * t + 128)>>8;
1538
0
  tr = (r * t + 128)>>8;
1539
0
  bl = (l * b + 128)>>8;
1540
0
  br = (r * b + 128)>>8;
1541
0
  t = (255 * t + 128)>>8;
1542
0
  b = (255 * b + 128)>>8;
1543
0
  dp += n-1;
1544
0
  *dp = tl;
1545
0
  dp += n;
1546
0
  for (x = w-2; x > 0; x--)
1547
0
  {
1548
0
    *dp = t;
1549
0
    dp += n;
1550
0
  }
1551
0
  if (x == 0)
1552
0
  {
1553
0
    *dp = tr;
1554
0
    dp += n;
1555
0
  }
1556
0
  dp += stride - w*n;
1557
0
  for (y = pix->h-2; y > 0; y--)
1558
0
  {
1559
0
    dp[span] = r;
1560
0
    *dp = l;
1561
0
    dp += stride;
1562
0
  }
1563
0
  if (y == 0)
1564
0
  {
1565
0
    *dp = bl;
1566
0
    dp += n;
1567
0
    for (x = w-2; x > 0; x--)
1568
0
    {
1569
0
      *dp = b;
1570
0
      dp += n;
1571
0
    }
1572
0
    if (x == 0)
1573
0
    {
1574
0
      *dp = br;
1575
0
    }
1576
0
  }
1577
0
}
1578
1579
fz_pixmap *
1580
fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip)
1581
0
{
1582
0
  return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
1583
0
}
1584
1585
fz_pixmap *
1586
fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
1587
7.53k
{
1588
7.53k
  fz_scale_filter *filter = &fz_scale_filter_simple;
1589
7.53k
  fz_weights *contrib_rows = NULL;
1590
7.53k
  fz_weights *contrib_cols = NULL;
1591
7.53k
  fz_pixmap *output = NULL;
1592
7.53k
  unsigned char *temp = NULL;
1593
7.53k
  int max_row, temp_span, temp_rows, row;
1594
7.53k
  int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1595
7.53k
  int flip_x, flip_y, forcealpha;
1596
7.53k
  fz_rect patch;
1597
1598
7.53k
  fz_var(contrib_cols);
1599
7.53k
  fz_var(contrib_rows);
1600
1601
  /* Avoid extreme scales where overflows become problematic. */
1602
7.53k
  if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
1603
0
    return NULL;
1604
7.53k
  if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
1605
0
    return NULL;
1606
1607
  /* Clamp small ranges of w and h */
1608
7.53k
  if (w <= -1)
1609
0
  {
1610
    /* Large negative range. Don't clamp */
1611
0
  }
1612
7.53k
  else if (w < 0)
1613
0
  {
1614
0
    w = -1;
1615
0
  }
1616
7.53k
  else if (w < 1)
1617
0
  {
1618
0
    w = 1;
1619
0
  }
1620
7.53k
  if (h <= -1)
1621
0
  {
1622
    /* Large negative range. Don't clamp */
1623
0
  }
1624
7.53k
  else if (h < 0)
1625
0
  {
1626
0
    h = -1;
1627
0
  }
1628
7.53k
  else if (h < 1)
1629
0
  {
1630
0
    h = 1;
1631
0
  }
1632
1633
  /* If the src has an alpha, we'll make the dst have an alpha automatically.
1634
   * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */
1635
7.53k
  forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h);
1636
1637
  /* Find the destination bbox, width/height, and sub pixel offset,
1638
   * allowing for whether we're flipping or not. */
1639
  /* The (x,y) position given describes where the top left corner
1640
   * of the source image should be mapped to (i.e. where (0,0) in image
1641
   * space ends up). Also there are differences in the way we scale
1642
   * horizontally and vertically. When scaling rows horizontally, we
1643
   * always read forwards through the source, and store either forwards
1644
   * or in reverse as required. When scaling vertically, we always store
1645
   * out forwards, but may feed source rows in in a different order.
1646
   *
1647
   * Consider the image rectangle 'r' to which the image is mapped,
1648
   * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1649
   * complete pixels.
1650
   *
1651
   * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1652
   * the image is x flipped or not. Whatever happens 0 <= x < 1.
1653
   * y is always R.ymax - r.ymax.
1654
   */
1655
  /* dst_x_int is calculated to be the left of the scaled image, and
1656
   * x (the sub pixel offset) is the distance in from either the left
1657
   * or right pixel expanded edge. */
1658
7.53k
  flip_x = (w < 0);
1659
7.53k
  if (flip_x)
1660
0
  {
1661
0
    float tmp;
1662
0
    w = -w;
1663
0
    dst_x_int = floorf(x-w);
1664
0
    tmp = ceilf(x);
1665
0
    dst_w_int = (int)tmp;
1666
0
    x = tmp - x;
1667
0
    dst_w_int -= dst_x_int;
1668
0
  }
1669
7.53k
  else
1670
7.53k
  {
1671
7.53k
    dst_x_int = floorf(x);
1672
7.53k
    x -= dst_x_int;
1673
7.53k
    dst_w_int = (int)ceilf(x + w);
1674
7.53k
  }
1675
  /* dst_y_int is calculated to be the top of the scaled image, and
1676
   * y (the sub pixel offset) is the distance in from either the top
1677
   * or bottom pixel expanded edge.
1678
   */
1679
7.53k
  flip_y = (h < 0);
1680
7.53k
  if (flip_y)
1681
0
  {
1682
0
    float tmp;
1683
0
    h = -h;
1684
0
    dst_y_int = floorf(y-h);
1685
0
    tmp = ceilf(y);
1686
0
    dst_h_int = (int)tmp;
1687
0
    y = tmp - y;
1688
0
    dst_h_int -= dst_y_int;
1689
0
  }
1690
7.53k
  else
1691
7.53k
  {
1692
7.53k
    dst_y_int = floorf(y);
1693
7.53k
    y -= dst_y_int;
1694
7.53k
    dst_h_int = (int)ceilf(y + h);
1695
7.53k
  }
1696
1697
7.53k
  fz_valgrind_pixmap(src);
1698
1699
  /* Step 0: Calculate the patch */
1700
7.53k
  patch.x0 = 0;
1701
7.53k
  patch.y0 = 0;
1702
7.53k
  patch.x1 = dst_w_int;
1703
7.53k
  patch.y1 = dst_h_int;
1704
7.53k
  if (clip)
1705
7.53k
  {
1706
7.53k
    if (flip_x)
1707
0
    {
1708
0
      if (dst_x_int + dst_w_int > clip->x1)
1709
0
        patch.x0 = dst_x_int + dst_w_int - clip->x1;
1710
0
      if (clip->x0 > dst_x_int)
1711
0
      {
1712
0
        patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
1713
0
        dst_x_int = clip->x0;
1714
0
      }
1715
0
    }
1716
7.53k
    else
1717
7.53k
    {
1718
7.53k
      if (dst_x_int + dst_w_int > clip->x1)
1719
1.95k
        patch.x1 = clip->x1 - dst_x_int;
1720
7.53k
      if (clip->x0 > dst_x_int)
1721
2
      {
1722
2
        patch.x0 = clip->x0 - dst_x_int;
1723
2
        dst_x_int += patch.x0;
1724
2
      }
1725
7.53k
    }
1726
1727
7.53k
    if (flip_y)
1728
0
    {
1729
0
      if (dst_y_int + dst_h_int > clip->y1)
1730
0
        patch.y1 = clip->y1 - dst_y_int;
1731
0
      if (clip->y0 > dst_y_int)
1732
0
      {
1733
0
        patch.y0 = clip->y0 - dst_y_int;
1734
0
        dst_y_int = clip->y0;
1735
0
      }
1736
0
    }
1737
7.53k
    else
1738
7.53k
    {
1739
7.53k
      if (dst_y_int + dst_h_int > clip->y1)
1740
2.85k
        patch.y1 = clip->y1 - dst_y_int;
1741
7.53k
      if (clip->y0 > dst_y_int)
1742
634
      {
1743
634
        patch.y0 = clip->y0 - dst_y_int;
1744
634
        dst_y_int += patch.y0;
1745
634
      }
1746
7.53k
    }
1747
7.53k
  }
1748
7.53k
  if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
1749
0
    return NULL;
1750
1751
15.0k
  fz_try(ctx)
1752
15.0k
  {
1753
    /* Step 1: Calculate the weights for columns and rows */
1754
7.53k
#ifdef SINGLE_PIXEL_SPECIALS
1755
7.53k
    if (src->w == 1)
1756
0
      contrib_cols = NULL;
1757
7.53k
    else
1758
7.53k
#endif /* SINGLE_PIXEL_SPECIALS */
1759
7.53k
      contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols");
1760
7.53k
#ifdef SINGLE_PIXEL_SPECIALS
1761
7.53k
    if (src->h == 1)
1762
0
      contrib_rows = NULL;
1763
7.53k
    else
1764
7.53k
#endif /* SINGLE_PIXEL_SPECIALS */
1765
7.53k
      contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows");
1766
1767
7.53k
    output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha);
1768
7.53k
  }
1769
15.0k
  fz_catch(ctx)
1770
0
  {
1771
0
    if (!cache_x)
1772
0
      fz_free(ctx, contrib_cols);
1773
0
    if (!cache_y)
1774
0
      fz_free(ctx, contrib_rows);
1775
0
    fz_rethrow(ctx);
1776
0
  }
1777
7.53k
  output->x = dst_x_int;
1778
7.53k
  output->y = dst_y_int;
1779
1780
  /* Step 2: Apply the weights */
1781
7.53k
#ifdef SINGLE_PIXEL_SPECIALS
1782
7.53k
  if (!contrib_rows)
1783
0
  {
1784
    /* Only 1 source pixel high. */
1785
0
    if (!contrib_cols)
1786
0
    {
1787
      /* Only 1 pixel in the entire image! */
1788
0
      duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride);
1789
0
      fz_valgrind_pixmap(output);
1790
0
    }
1791
0
    else
1792
0
    {
1793
      /* Scale the row once, then copy it. */
1794
0
      scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha);
1795
0
      fz_valgrind_pixmap(output);
1796
0
    }
1797
0
  }
1798
7.53k
  else if (!contrib_cols)
1799
0
  {
1800
    /* Only 1 source pixel wide. Scale the col and duplicate. */
1801
0
    scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha);
1802
0
    fz_valgrind_pixmap(output);
1803
0
  }
1804
7.53k
  else
1805
7.53k
#endif /* SINGLE_PIXEL_SPECIALS */
1806
7.53k
  {
1807
7.53k
    void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights);
1808
7.53k
    void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row);
1809
1810
7.53k
    temp_span = contrib_cols->count * src->n;
1811
7.53k
    temp_rows = contrib_rows->max_len;
1812
7.53k
    if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1813
0
      goto cleanup;
1814
15.0k
    fz_try(ctx)
1815
15.0k
    {
1816
7.53k
      temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char));
1817
7.53k
    }
1818
15.0k
    fz_catch(ctx)
1819
0
    {
1820
0
      fz_drop_pixmap(ctx, output);
1821
0
      if (!cache_x)
1822
0
        fz_free(ctx, contrib_cols);
1823
0
      if (!cache_y)
1824
0
        fz_free(ctx, contrib_rows);
1825
0
      fz_rethrow(ctx);
1826
0
    }
1827
7.53k
    switch (src->n)
1828
7.53k
    {
1829
0
    default:
1830
0
      row_scale_in = scale_row_to_temp;
1831
0
      break;
1832
3.74k
    case 1: /* Image mask case or Greyscale case */
1833
3.74k
      row_scale_in = scale_row_to_temp1;
1834
3.74k
      break;
1835
0
    case 2: /* Greyscale with alpha case */
1836
0
      row_scale_in = scale_row_to_temp2;
1837
0
      break;
1838
3.78k
    case 3: /* RGB case */
1839
3.78k
      row_scale_in = scale_row_to_temp3;
1840
3.78k
      break;
1841
0
    case 4: /* RGBA or CMYK case */
1842
0
      row_scale_in = scale_row_to_temp4;
1843
0
      break;
1844
7.53k
    }
1845
7.53k
    row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp;
1846
7.53k
    max_row = contrib_rows->index[contrib_rows->index[0]];
1847
124k
    for (row = 0; row < contrib_rows->count; row++)
1848
116k
    {
1849
      /*
1850
      Which source rows do we need to have scaled into the
1851
      temporary buffer in order to be able to do the final
1852
      scale?
1853
      */
1854
116k
      int row_index = contrib_rows->index[row];
1855
116k
      int row_min = contrib_rows->index[row_index++];
1856
116k
      int row_len = contrib_rows->index[row_index];
1857
353k
      while (max_row < row_min+row_len)
1858
236k
      {
1859
        /* Scale another row */
1860
236k
        assert(max_row < src->h);
1861
236k
        (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols);
1862
236k
        max_row++;
1863
236k
      }
1864
1865
116k
      (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row);
1866
116k
    }
1867
7.53k
    fz_free(ctx, temp);
1868
1869
7.53k
    if (forcealpha)
1870
0
      adjust_alpha_edges(output, contrib_rows, contrib_cols);
1871
1872
7.53k
    fz_valgrind_pixmap(output);
1873
7.53k
  }
1874
1875
7.53k
cleanup:
1876
7.53k
  if (!cache_y)
1877
0
    fz_free(ctx, contrib_rows);
1878
7.53k
  if (!cache_x)
1879
0
    fz_free(ctx, contrib_cols);
1880
1881
7.53k
  return output;
1882
7.53k
}
1883
1884
void
1885
fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
1886
814
{
1887
814
  if (!sc)
1888
0
    return;
1889
814
  fz_free(ctx, sc->weights);
1890
814
  fz_free(ctx, sc);
1891
814
}
1892
1893
fz_scale_cache *
1894
fz_new_scale_cache(fz_context *ctx)
1895
814
{
1896
814
  return fz_malloc_struct(ctx, fz_scale_cache);
1897
814
}