Coverage Report

Created: 2025-09-04 06:50

/src/mupdf/source/fitz/draw-scale-simple.c
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
/*
24
This code does smooth scaling of a pixmap.
25
26
This function returns a new pixmap representing the area starting at (0,0)
27
given by taking the source pixmap src, scaling it to width w, and height h,
28
and then positioning it at (frac(x),frac(y)).
29
30
This is a cut-down version of draw_scale.c that only copes with filters
31
that return values strictly in the 0..1 range, and uses bytes for
32
intermediate results rather than ints.
33
*/
34
35
#include "mupdf/fitz.h"
36
37
#include "draw-imp.h"
38
#include "pixmap-imp.h"
39
40
#include <math.h>
41
#include <string.h>
42
#include <assert.h>
43
#include <limits.h>
44
45
/* Do we special case handling of single pixel high/wide images? The
46
 * 'purest' handling is given by not special casing them, but certain
47
 * files that use such images 'stack' them to give full images. Not
48
 * special casing them results in then being fainter and giving noticeable
49
 * rounding errors.
50
 */
51
#define SINGLE_PIXEL_SPECIALS
52
53
/*
54
Consider a row of source samples, src, of width src_w, positioned at x,
55
scaled to width dst_w.
56
57
src[i] is centred at: x + (i + 0.5)*dst_w/src_w
58
59
Therefore the distance between the centre of the jth output pixel and
60
the centre of the ith source sample is:
61
62
dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
63
64
When scaling up, therefore:
65
66
dst[j] = SUM(filter(dist[j,i]) * src[i])
67
  (for all ints i)
68
69
This can be simplified by noticing that filters are only non zero within
70
a given filter width (henceforth called W). So:
71
72
dst[j] = SUM(filter(dist[j,i]) * src[i])
73
  (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
74
75
When scaling down, each filtered source sample is stretched to be wider
76
to avoid aliasing issues. This effectively reduces the distance between
77
centres.
78
79
dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
80
  (where F = dst_w/src_w)
81
  (for ints i, s.t. (j-W)/F < i < (j+W)/F)
82
83
*/
84
85
typedef struct fz_scale_filter
86
{
87
  int width;
88
  float (*fn)(struct fz_scale_filter *, float);
89
} fz_scale_filter;
90
91
/* Image scale filters */
92
93
static float
94
triangle(fz_scale_filter *filter, float f)
95
0
{
96
0
  if (f >= 1)
97
0
    return 0;
98
0
  return 1-f;
99
0
}
100
101
static float
102
box(fz_scale_filter *filter, float f)
103
0
{
104
0
  if (f >= 0.5f)
105
0
    return 0;
106
0
  return 1;
107
0
}
108
109
static float
110
simple(fz_scale_filter *filter, float x)
111
16.1k
{
112
16.1k
  if (x >= 1)
113
0
    return 0;
114
16.1k
  return 1 + (2*x - 3)*x*x;
115
16.1k
}
116
117
fz_scale_filter fz_scale_filter_box = { 1, box };
118
fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
119
fz_scale_filter fz_scale_filter_simple = { 1, simple };
120
121
/*
122
We build ourselves a set of tables to contain the precalculated weights
123
for a given set of scale settings.
124
125
The first dst_w entries in index are the index into index of the
126
sets of weight for each destination pixel.
127
128
Each of the sets of weights is a set of values consisting of:
129
  the minimum source pixel index used for this destination pixel
130
  the number of weights used for this destination pixel
131
  the weights themselves
132
133
So to calculate dst[i] we do the following:
134
135
  weights = &index[index[i]];
136
  min = *weights++;
137
  len = *weights++;
138
  dst[i] = 0;
139
  while (--len > 0)
140
    dst[i] += src[min++] * *weights++
141
142
in addition, we guarantee that at the end of this process weights will now
143
point to the weights value for dst pixel i+1.
144
145
In the simplest version of this algorithm, we would scale the whole image
146
horizontally first into a temporary buffer, then scale that temporary
147
buffer again vertically to give us our result. Using such a simple
148
algorithm would mean that could use the same style of weights for both
149
horizontal and vertical scaling.
150
151
Unfortunately, this would also require a large temporary buffer,
152
particularly in the case where we are scaling up.
153
154
We therefore modify the algorithm as follows; we scale scanlines from the
155
source image horizontally into a temporary buffer, until we have all the
156
contributors for a given output scanline. We then produce that output
157
scanline from the temporary buffer. In this way we restrict the height
158
of the temporary buffer to a small fraction of the final size.
159
160
Unfortunately, this means that the pseudo code for recombining a
161
scanline of fully scaled pixels is as follows:
162
163
  weights = &index[index[y]];
164
  min = *weights++;
165
  len = *weights++;
166
  for (x=0 to dst_w)
167
    min2 = min
168
    len2 = len
169
    weights2 = weights
170
    dst[x] = 0;
171
    while (--len2 > 0)
172
      dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
173
174
i.e. it requires a % operation for every source pixel - this is typically
175
expensive.
176
177
To avoid this, we alter the order in which vertical weights are stored,
178
so that they are ordered in the same order as the temporary buffer lines
179
would appear. This simplifies the algorithm to:
180
181
  weights = &index[index[y]];
182
  min = *weights++;
183
  len = *weights++;
184
  for (x=0 to dst_w)
185
    min2 = 0
186
    len2 = len
187
    weights2 = weights
188
    dst[x] = 0;
189
    while (--len2 > 0)
190
      dst[x] += temp[i][min2++] * *weights2++
191
192
This means that len may be larger than it needs to be (due to the
193
possible inclusion of a zero weight row or two), but in practise this
194
is only an increase of 1 or 2 at worst.
195
196
We implement this by generating the weights as normal (but ensuring we
197
leave enough space) and then reordering afterwards.
198
199
*/
200
201
/* This structure is accessed from ARM code - bear this in mind before
202
 * altering it! */
203
typedef struct
204
{
205
  int flip; /* true if outputting reversed */
206
  int count;  /* number of output pixels we have records for in this table */
207
  int max_len;  /* Maximum number of weights for any one output pixel */
208
  int n;    /* number of components (src->n) */
209
  int new_line; /* True if no weights for the current output pixel */
210
  int patch_l;  /* How many output pixels we skip over */
211
  int index[FZ_FLEXIBLE_ARRAY];
212
} fz_weights;
213
214
struct fz_scale_cache
215
{
216
  int src_w;
217
  float x;
218
  float dst_w;
219
  fz_scale_filter *filter;
220
  int vertical;
221
  int dst_w_int;
222
  int patch_l;
223
  int patch_r;
224
  int n;
225
  int flip;
226
  fz_weights *weights;
227
};
228
229
static fz_weights *
230
new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
231
2
{
232
2
  int max_len;
233
2
  fz_weights *weights;
234
235
2
  if (src_w > dst_w)
236
2
  {
237
    /* Scaling down, so there will be a maximum of
238
     * 2*filterwidth*src_w/dst_w src pixels
239
     * contributing to each dst pixel. */
240
2
    max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
241
2
    if (max_len > src_w)
242
0
      max_len = src_w;
243
2
  }
244
0
  else
245
0
  {
246
    /* Scaling up, so there will be a maximum of
247
     * 2*filterwidth src pixels contributing to each dst pixel.
248
     */
249
0
    max_len = 2 * filter->width;
250
0
  }
251
  /* We need the size of the struct,
252
   * plus patch_w*sizeof(int) for the index
253
   * plus (2+max_len)*sizeof(int) for the weights
254
   * plus room for an extra set of weights for reordering.
255
   */
256
2
  weights = fz_malloc_flexible(ctx, fz_weights, index, (max_len+3) * (patch_w+1));
257
2
  if (!weights)
258
0
    return NULL;
259
2
  weights->count = -1;
260
2
  weights->max_len = max_len;
261
2
  weights->index[0] = patch_w;
262
2
  weights->n = n;
263
2
  weights->patch_l = patch_l;
264
2
  weights->flip = flip;
265
2
  return weights;
266
2
}
267
268
/* j is destination pixel in the patch_l..patch_l+patch_w range */
269
static void
270
init_weights(fz_weights *weights, int j)
271
6.04k
{
272
6.04k
  int index;
273
274
6.04k
  j -= weights->patch_l;
275
6.04k
  assert(weights->count == j-1);
276
6.04k
  weights->count++;
277
6.04k
  weights->new_line = 1;
278
6.04k
  if (j == 0)
279
2
    index = weights->index[0];
280
6.04k
  else
281
6.04k
  {
282
6.04k
    index = weights->index[j-1];
283
6.04k
    index += 2 + weights->index[index+1];
284
6.04k
  }
285
6.04k
  weights->index[j] = index; /* row pointer */
286
6.04k
  weights->index[index] = 0; /* min */
287
6.04k
  weights->index[index+1] = 0; /* len */
288
6.04k
}
289
290
static void
291
insert_weight(fz_weights *weights, int j, int i, int weight)
292
16.1k
{
293
16.1k
  int min, len, index;
294
295
  /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
296
16.1k
  j -= weights->patch_l;
297
16.1k
  if (weights->new_line)
298
6.04k
  {
299
    /* New line */
300
6.04k
    weights->new_line = 0;
301
6.04k
    index = weights->index[j]; /* row pointer */
302
6.04k
    weights->index[index] = i; /* min */
303
6.04k
    weights->index[index+1] = 0; /* len */
304
6.04k
  }
305
16.1k
  index = weights->index[j];
306
16.1k
  min = weights->index[index++];
307
16.1k
  len = weights->index[index++];
308
16.1k
  while (i < min)
309
0
  {
310
    /* This only happens in rare cases, but we need to insert
311
     * one earlier. In exceedingly rare cases we may need to
312
     * insert more than one earlier. */
313
0
    int k;
314
315
0
    for (k = len; k > 0; k--)
316
0
    {
317
0
      weights->index[index+k] = weights->index[index+k-1];
318
0
    }
319
0
    weights->index[index] = 0;
320
0
    min--;
321
0
    len++;
322
0
    weights->index[index-2] = min;
323
0
    weights->index[index-1] = len;
324
0
  }
325
16.1k
  if (i-min >= len)
326
16.1k
  {
327
    /* The usual case */
328
16.1k
    while (i-min >= ++len)
329
0
    {
330
0
      weights->index[index+len-1] = 0;
331
0
    }
332
16.1k
    assert(len-1 == i-min);
333
16.1k
    weights->index[index+i-min] = weight;
334
16.1k
    weights->index[index-1] = len;
335
16.1k
    assert(len <= weights->max_len);
336
16.1k
  }
337
0
  else
338
0
  {
339
    /* Infrequent case */
340
0
    weights->index[index+i-min] += weight;
341
0
  }
342
16.1k
}
343
344
static void
345
add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
346
  float x, float F, float G, int src_w, float dst_w)
347
16.1k
{
348
16.1k
  float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
349
16.1k
  float f;
350
16.1k
  int weight;
351
352
16.1k
  dist *= G;
353
16.1k
  if (dist < 0)
354
8.06k
    dist = -dist;
355
16.1k
  f = filter->fn(filter, dist)*F;
356
16.1k
  weight = (int)(256*f+0.5f);
357
358
  /* Ensure i is in range */
359
16.1k
  if (i < 0 || i >= src_w)
360
4
    return;
361
16.1k
  if (weight != 0)
362
16.1k
    insert_weight(weights, j, i, weight);
363
16.1k
}
364
365
static void
366
reorder_weights(fz_weights *weights, int j, int src_w)
367
2.59k
{
368
2.59k
  int idx = weights->index[j - weights->patch_l];
369
2.59k
  int min = weights->index[idx++];
370
2.59k
  int len = weights->index[idx++];
371
2.59k
  int max = weights->max_len;
372
2.59k
  int tmp = idx+max;
373
2.59k
  int i, off;
374
375
  /* Copy into the temporary area */
376
2.59k
  memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
377
378
  /* Pad out if required */
379
2.59k
  assert(len <= max);
380
2.59k
  assert(min+len <= src_w);
381
2.59k
  off = 0;
382
2.59k
  if (len < max)
383
866
  {
384
866
    memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
385
866
    len = max;
386
866
    if (min + len > src_w)
387
1
    {
388
1
      off = min + len - src_w;
389
1
      min = src_w - len;
390
1
      weights->index[idx-2] = min;
391
1
    }
392
866
    weights->index[idx-1] = len;
393
866
  }
394
395
  /* Copy back into the proper places */
396
10.3k
  for (i = 0; i < len; i++)
397
7.77k
  {
398
7.77k
    weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
399
7.77k
  }
400
2.59k
}
401
402
/* Due to rounding and edge effects, the sums for the weights sometimes don't
403
 * add up to 256. This causes visible rendering effects. Therefore, we take
404
 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
405
 * 256 for all pixels that are completely covered. See bug #691629. */
406
static void
407
check_weights(fz_weights *weights, int j, int w, float x, float wf)
408
6.04k
{
409
6.04k
  int idx, len;
410
6.04k
  int sum = 0;
411
6.04k
  int max = -256;
412
6.04k
  int maxidx = 0;
413
6.04k
  int i;
414
415
6.04k
  idx = weights->index[j - weights->patch_l];
416
6.04k
  idx++; /* min */
417
6.04k
  len = weights->index[idx++];
418
419
22.1k
  for(i=0; i < len; i++)
420
16.1k
  {
421
16.1k
    int v = weights->index[idx++];
422
16.1k
    sum += v;
423
16.1k
    if (v > max)
424
10.0k
    {
425
10.0k
      max = v;
426
10.0k
      maxidx = idx;
427
10.0k
    }
428
16.1k
  }
429
  /* If we aren't the first or last pixel, OR if the sum is too big
430
   * then adjust it. */
431
6.04k
  if (((j != 0) && (j != w-1)) || (sum > 256))
432
6.04k
    weights->index[maxidx-1] += 256-sum;
433
  /* Otherwise, if we are the first pixel, and it's fully covered, then
434
   * adjust it. */
435
4
  else if ((j == 0) && (x < 0.0001f) && (sum != 256))
436
2
    weights->index[maxidx-1] += 256-sum;
437
  /* Finally, if we are the last pixel, and it's fully covered, then
438
   * adjust it. */
439
2
  else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256))
440
2
    weights->index[maxidx-1] += 256-sum;
441
6.04k
}
442
443
static int
444
window_fix(int l, int *rp, float window, float centre)
445
0
{
446
0
  int r = *rp;
447
0
  while (centre - l > window)
448
0
    l++;
449
0
  while (r - centre > window)
450
0
    r--;
451
0
  *rp = r;
452
0
  return l;
453
0
}
454
455
static fz_weights *
456
make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
457
2
{
458
2
  fz_weights *weights;
459
2
  float F, G;
460
2
  float window;
461
2
  int j;
462
463
2
  if (cache)
464
2
  {
465
2
    if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
466
2
      cache->filter == filter && cache->vertical == vertical &&
467
2
      cache->dst_w_int == dst_w_int &&
468
2
      cache->patch_l == patch_l && cache->patch_r == patch_r &&
469
2
      cache->n == n && cache->flip == flip)
470
0
    {
471
0
      return cache->weights;
472
0
    }
473
2
    cache->src_w = src_w;
474
2
    cache->x = x;
475
2
    cache->dst_w = dst_w;
476
2
    cache->filter = filter;
477
2
    cache->vertical = vertical;
478
2
    cache->dst_w_int = dst_w_int;
479
2
    cache->patch_l = patch_l;
480
2
    cache->patch_r = patch_r;
481
2
    cache->n = n;
482
2
    cache->flip = flip;
483
2
    fz_free(ctx, cache->weights);
484
2
    cache->weights = NULL;
485
2
  }
486
487
2
  if (dst_w < src_w)
488
2
  {
489
    /* Scaling down */
490
2
    F = dst_w / src_w;
491
2
    G = 1;
492
2
  }
493
0
  else
494
0
  {
495
    /* Scaling up */
496
0
    F = 1;
497
0
    G = src_w / dst_w;
498
0
  }
499
2
  window = filter->width / F;
500
2
  weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
501
2
  if (!weights)
502
0
    return NULL;
503
6.05k
  for (j = patch_l; j < patch_r; j++)
504
6.04k
  {
505
    /* find the position of the centre of dst[j] in src space */
506
6.04k
    float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
507
6.04k
    int l, r;
508
6.04k
    l = ceilf(centre - window);
509
6.04k
    r = floorf(centre + window);
510
511
    /* Now, due to the vagaries of floating point, if centre is large, l
512
     * and r can actually end up further than 2*window apart. All we care
513
     * about in this case is that we don't crash! We want a cheap correction
514
     * that avoids the assert and doesn't cost too much in the normal case.
515
     * This should do. */
516
6.04k
    if (r - l > 2 * window)
517
0
      l = window_fix(l, &r, window, centre);
518
519
6.04k
    init_weights(weights, j);
520
22.1k
    for (; l <= r; l++)
521
16.1k
    {
522
16.1k
      add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
523
16.1k
    }
524
6.04k
    if (weights->new_line)
525
0
    {
526
      /* In very rare cases (bug 706764) we might not actually
527
       * have generated any non-zero weights for this destination
528
       * pixel. Just use the central pixel. */
529
0
      int src_x = floorf(centre);
530
0
      if (src_x >= src_w)
531
0
        src_x = src_w-1;
532
0
      if (src_x < 0)
533
0
        src_x = 0;
534
0
      insert_weight(weights, j, src_x, 1);
535
0
    }
536
6.04k
    check_weights(weights, j, dst_w_int, x, dst_w);
537
6.04k
    if (vertical)
538
2.59k
    {
539
2.59k
      reorder_weights(weights, j, src_w);
540
2.59k
    }
541
6.04k
  }
542
2
  weights->count++; /* weights->count = dst_w_int now */
543
2
  if (cache)
544
2
  {
545
2
    cache->weights = weights;
546
2
  }
547
2
  return weights;
548
2
}
549
550
static void
551
scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
552
0
{
553
0
  const int *contrib = &weights->index[weights->index[0]];
554
0
  int len, i, j, n;
555
0
  const unsigned char *min;
556
0
  int tmp[FZ_MAX_COLORS];
557
0
  int *t = tmp;
558
559
0
  n = weights->n;
560
0
  for (j = 0; j < n; j++)
561
0
    tmp[j] = 128;
562
0
  if (weights->flip)
563
0
  {
564
0
    dst += (weights->count-1)*n;
565
0
    for (i=weights->count; i > 0; i--)
566
0
    {
567
0
      min = &src[n * *contrib++];
568
0
      len = *contrib++;
569
0
      while (len-- > 0)
570
0
      {
571
0
        for (j = n; j > 0; j--)
572
0
          *t++ += *min++ * *contrib;
573
0
        t -= n;
574
0
        contrib++;
575
0
      }
576
0
      for (j = n; j > 0; j--)
577
0
      {
578
0
        *dst++ = (unsigned char)(*t>>8);
579
0
        *t++ = 128;
580
0
      }
581
0
      t -= n;
582
0
      dst -= n*2;
583
0
    }
584
0
  }
585
0
  else
586
0
  {
587
0
    for (i=weights->count; i > 0; i--)
588
0
    {
589
0
      min = &src[n * *contrib++];
590
0
      len = *contrib++;
591
0
      while (len-- > 0)
592
0
      {
593
0
        for (j = n; j > 0; j--)
594
0
          *t++ += *min++ * *contrib;
595
0
        t -= n;
596
0
        contrib++;
597
0
      }
598
0
      for (j = n; j > 0; j--)
599
0
      {
600
0
        *dst++ = (unsigned char)(*t>>8);
601
0
        *t++ = 128;
602
0
      }
603
0
      t -= n;
604
0
    }
605
0
  }
606
0
}
607
608
#ifdef ARCH_ARM
609
610
static void
611
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
612
__attribute__((naked));
613
614
static void
615
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
616
__attribute__((naked));
617
618
static void
619
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
620
__attribute__((naked));
621
622
static void
623
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
624
__attribute__((naked));
625
626
static void
627
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
628
__attribute__((naked));
629
630
static void
631
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
632
__attribute__((naked));
633
634
static void
635
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
636
{
637
  asm volatile(
638
  ENTER_ARM
639
    ".syntax unified\n"
640
  "stmfd  r13!,{r4-r7,r9,r14}       \n"
641
  "@ r0 = dst           \n"
642
  "@ r1 = src           \n"
643
  "@ r2 = weights           \n"
644
  "ldr  r12,[r2],#4   @ r12= flip   \n"
645
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
646
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
647
  "cmp  r12,#0      @ if (flip)   \n"
648
  "beq  5f      @ {     \n"
649
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
650
  "add  r0, r0, r3    @ dst += count    \n"
651
  "1:             \n"
652
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
653
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
654
  "mov  r5, #128    @ r5 = a = 128    \n"
655
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
656
  "subs r9, r9, #1    @ len--     \n"
657
  "blt  3f      @ while (len >= 0)  \n"
658
  "2:       @ {     \n"
659
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
660
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
661
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
662
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
663
  "mlagt  r5, r6, r7, r5    @ g += r6 * r7    \n"
664
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
665
  "mla  r5, r12,r14,r5    @ g += r14 * r12  \n"
666
  "bge  2b      @ }     \n"
667
  "3:             \n"
668
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
669
  "strb r5,[r0, #-1]!   @ *--dst=a    \n"
670
  "subs r3, r3, #1    @ i--     \n"
671
  "bgt  1b      @       \n"
672
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
673
  "5:"
674
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
675
  "6:"
676
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
677
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
678
  "mov  r5, #128    @ r5 = a = 128    \n"
679
  "add  r4, r1, r4    @ r4 = min = &src[r4] \n"
680
  "subs r9, r9, #1    @ len--     \n"
681
  "blt  9f      @ while (len > 0) \n"
682
  "7:       @ {     \n"
683
  "ldrgt  r6, [r2], #4    @ r6 = *contrib++ \n"
684
  "ldrbgt r7, [r4], #1    @ r7 = *min++   \n"
685
  "ldr  r12,[r2], #4    @ r12 = *contrib++  \n"
686
  "ldrb r14,[r4], #1    @ r14 = *min++    \n"
687
  "mlagt  r5, r6,r7,r5    @ a += r6 * r7    \n"
688
  "subs r9, r9, #2    @ r9 = len -= 2   \n"
689
  "mla  r5, r12,r14,r5    @ a += r14 * r12  \n"
690
  "bge  7b      @ }     \n"
691
  "9:             \n"
692
  "mov  r5, r5, LSR #8    @ a >>= 8   \n"
693
  "strb r5, [r0], #1    @ *dst++=a    \n"
694
  "subs r3, r3, #1    @ i--     \n"
695
  "bgt  6b      @       \n"
696
  "ldmfd  r13!,{r4-r7,r9,PC}  @ pop, return to thumb  \n"
697
  ENTER_THUMB
698
  );
699
}
700
701
static void
702
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
703
{
704
  asm volatile(
705
  ENTER_ARM
706
  "stmfd  r13!,{r4-r6,r9-r11,r14}       \n"
707
  "@ r0 = dst           \n"
708
  "@ r1 = src           \n"
709
  "@ r2 = weights           \n"
710
  "ldr  r12,[r2],#4   @ r12= flip   \n"
711
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
712
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
713
  "cmp  r12,#0      @ if (flip)   \n"
714
  "beq  4f      @ {     \n"
715
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
716
  "add  r0, r0, r3, LSL #1  @ dst += 2*count  \n"
717
  "1:             \n"
718
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
719
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
720
  "mov  r5, #128    @ r5 = g = 128    \n"
721
  "mov  r6, #128    @ r6 = a = 128    \n"
722
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
723
  "cmp  r9, #0      @ while (len-- > 0) \n"
724
  "beq  3f      @ {     \n"
725
  "2:             \n"
726
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
727
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
728
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
729
  "subs r9, r9, #1    @ r9 = len--    \n"
730
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
731
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
732
  "bgt  2b      @ }     \n"
733
  "3:             \n"
734
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
735
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
736
  "strb r5, [r0, #-2]!    @ *--dst=a    \n"
737
  "strb r6, [r0, #1]    @ *--dst=g    \n"
738
  "subs r3, r3, #1    @ i--     \n"
739
  "bgt  1b      @       \n"
740
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
741
  "4:"
742
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
743
  "5:"
744
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
745
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
746
  "mov  r5, #128    @ r5 = g = 128    \n"
747
  "mov  r6, #128    @ r6 = a = 128    \n"
748
  "add  r4, r1, r4, LSL #1  @ r4 = min = &src[2*r4] \n"
749
  "cmp  r9, #0      @ while (len-- > 0) \n"
750
  "beq  7f      @ {     \n"
751
  "6:             \n"
752
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
753
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
754
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
755
  "subs r9, r9, #1    @ r9 = len--    \n"
756
  "mla  r5, r14,r11,r5    @ g += r11 * r14  \n"
757
  "mla  r6, r14,r12,r6    @ a += r12 * r14  \n"
758
  "bgt  6b      @ }     \n"
759
  "7:             \n"
760
  "mov  r5, r5, lsr #8    @ g >>= 8   \n"
761
  "mov  r6, r6, lsr #8    @ a >>= 8   \n"
762
  "strb r5, [r0], #1    @ *dst++=g    \n"
763
  "strb r6, [r0], #1    @ *dst++=a    \n"
764
  "subs r3, r3, #1    @ i--     \n"
765
  "bgt  5b      @       \n"
766
  "ldmfd  r13!,{r4-r6,r9-r11,PC}  @ pop, return to thumb  \n"
767
  ENTER_THUMB
768
  );
769
}
770
771
static void
772
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
773
{
774
  asm volatile(
775
  ENTER_ARM
776
  "stmfd  r13!,{r4-r11,r14}       \n"
777
  "@ r0 = dst           \n"
778
  "@ r1 = src           \n"
779
  "@ r2 = weights           \n"
780
  "ldr  r12,[r2],#4   @ r12= flip   \n"
781
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
782
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
783
  "cmp  r12,#0      @ if (flip)   \n"
784
  "beq  4f      @ {     \n"
785
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
786
  "add  r0, r0, r3, LSL #1  @     \n"
787
  "add  r0, r0, r3    @ dst += 3*count  \n"
788
  "1:             \n"
789
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
790
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
791
  "mov  r5, #128    @ r5 = r = 128    \n"
792
  "mov  r6, #128    @ r6 = g = 128    \n"
793
  "add  r7, r1, r4, LSL #1  @     \n"
794
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
795
  "mov  r7, #128    @ r7 = b = 128    \n"
796
  "cmp  r9, #0      @ while (len-- > 0) \n"
797
  "beq  3f      @ {     \n"
798
  "2:             \n"
799
  "ldr  r14,[r2], #4    @ r14 = *contrib++  \n"
800
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
801
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
802
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
803
  "subs r9, r9, #1    @ r9 = len--    \n"
804
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
805
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
806
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
807
  "bgt  2b      @ }     \n"
808
  "3:             \n"
809
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
810
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
811
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
812
  "strb r5, [r0, #-3]!    @ *--dst=r    \n"
813
  "strb r6, [r0, #1]    @ *--dst=g    \n"
814
  "strb r7, [r0, #2]    @ *--dst=b    \n"
815
  "subs r3, r3, #1    @ i--     \n"
816
  "bgt  1b      @       \n"
817
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
818
  "4:"
819
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
820
  "5:"
821
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
822
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
823
  "mov  r5, #128    @ r5 = r = 128    \n"
824
  "mov  r6, #128    @ r6 = g = 128    \n"
825
  "add  r7, r1, r4, LSL #1  @ r7 = min = &src[2*r4] \n"
826
  "add  r4, r7, r4    @ r4 = min = &src[3*r4] \n"
827
  "mov  r7, #128    @ r7 = b = 128    \n"
828
  "cmp  r9, #0      @ while (len-- > 0) \n"
829
  "beq  7f      @ {     \n"
830
  "6:             \n"
831
  "ldr  r14,[r2], #4    @ r10 = *contrib++  \n"
832
  "ldrb r8, [r4], #1    @ r8  = *min++    \n"
833
  "ldrb r11,[r4], #1    @ r11 = *min++    \n"
834
  "ldrb r12,[r4], #1    @ r12 = *min++    \n"
835
  "subs r9, r9, #1    @ r9 = len--    \n"
836
  "mla  r5, r14,r8, r5    @ r += r8  * r14  \n"
837
  "mla  r6, r14,r11,r6    @ g += r11 * r14  \n"
838
  "mla  r7, r14,r12,r7    @ b += r12 * r14  \n"
839
  "bgt  6b      @ }     \n"
840
  "7:             \n"
841
  "mov  r5, r5, lsr #8    @ r >>= 8   \n"
842
  "mov  r6, r6, lsr #8    @ g >>= 8   \n"
843
  "mov  r7, r7, lsr #8    @ b >>= 8   \n"
844
  "strb r5, [r0], #1    @ *dst++=r    \n"
845
  "strb r6, [r0], #1    @ *dst++=g    \n"
846
  "strb r7, [r0], #1    @ *dst++=b    \n"
847
  "subs r3, r3, #1    @ i--     \n"
848
  "bgt  5b      @       \n"
849
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
850
  ENTER_THUMB
851
  );
852
}
853
854
static void
855
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
856
{
857
  asm volatile(
858
  ENTER_ARM
859
  "stmfd  r13!,{r4-r11,r14}       \n"
860
  "@ r0 = dst           \n"
861
  "@ r1 = src           \n"
862
  "@ r2 = weights           \n"
863
  "ldr  r12,[r2],#4   @ r12= flip   \n"
864
  "ldr  r3, [r2],#20    @ r3 = count r2 = &index\n"
865
  "ldr  r4, [r2]    @ r4 = index[0]   \n"
866
  "ldr  r5,=0x00800080    @ r5 = rounding   \n"
867
  "ldr  r6,=0x00FF00FF    @ r7 = 0x00FF00FF \n"
868
  "cmp  r12,#0      @ if (flip)   \n"
869
  "beq  4f      @ {     \n"
870
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
871
  "add  r0, r0, r3, LSL #2  @ dst += 4*count  \n"
872
  "1:             \n"
873
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
874
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
875
  "mov  r7, r5      @ r7 = b = rounding \n"
876
  "mov  r8, r5      @ r8 = a = rounding \n"
877
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
878
  "cmp  r9, #0      @ while (len-- > 0) \n"
879
  "beq  3f      @ {     \n"
880
  "2:             \n"
881
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
882
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
883
  "subs r9, r9, #1    @ r9 = len--    \n"
884
  "and  r12,r6, r11   @ r12 = __22__00  \n"
885
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
886
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
887
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
888
  "bgt  2b      @ }     \n"
889
  "3:             \n"
890
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
891
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
892
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
893
  "str  r7, [r0, #-4]!    @ *--dst=r    \n"
894
  "subs r3, r3, #1    @ i--     \n"
895
  "bgt  1b      @       \n"
896
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
897
  "4:             \n"
898
  "add  r2, r2, r4, LSL #2  @ r2 = &index[index[0]] \n"
899
  "5:             \n"
900
  "ldr  r4, [r2], #4    @ r4 = *contrib++ \n"
901
  "ldr  r9, [r2], #4    @ r9 = len = *contrib++ \n"
902
  "mov  r7, r5      @ r7 = b = rounding \n"
903
  "mov  r8, r5      @ r8 = a = rounding \n"
904
  "add  r4, r1, r4, LSL #2  @ r4 = min = &src[4*r4] \n"
905
  "cmp  r9, #0      @ while (len-- > 0) \n"
906
  "beq  7f      @ {     \n"
907
  "6:             \n"
908
  "ldr  r11,[r4], #4    @ r11 = *min++    \n"
909
  "ldr  r10,[r2], #4    @ r10 = *contrib++  \n"
910
  "subs r9, r9, #1    @ r9 = len--    \n"
911
  "and  r12,r6, r11   @ r12 = __22__00  \n"
912
  "and  r11,r6, r11,LSR #8  @ r11 = __33__11  \n"
913
  "mla  r7, r10,r12,r7    @ b += r14 * r10  \n"
914
  "mla  r8, r10,r11,r8    @ a += r11 * r10  \n"
915
  "bgt  6b      @ }     \n"
916
  "7:             \n"
917
  "and  r7, r6, r7, lsr #8  @ r7 = __22__00   \n"
918
  "bic  r8, r8, r6    @ r8 = 33__11__   \n"
919
  "orr  r7, r7, r8    @ r7 = 33221100   \n"
920
  "str  r7, [r0], #4    @ *dst++=r    \n"
921
  "subs r3, r3, #1    @ i--     \n"
922
  "bgt  5b      @       \n"
923
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
924
  ENTER_THUMB
925
  );
926
}
927
928
static void
929
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
930
{
931
  asm volatile(
932
  ENTER_ARM
933
  "stmfd  r13!,{r4-r11,r14}       \n"
934
  "@ r0 = dst           \n"
935
  "@ r1 = src           \n"
936
  "@ r2 = &weights->index[0]        \n"
937
  "@ r3 = width           \n"
938
  "@ r12= row           \n"
939
  "ldr  r14,[r13,#4*9]    @ r14= n    \n"
940
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
941
  "add  r2, r2, #24   @ r2 = weights->index \n"
942
  "mul    r3, r14, r3   @ r3 = width *= n       \n"
943
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
944
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
945
  "subs r6, r3, #4    @ r6 = x = width-4  \n"
946
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
947
  "       @ r14= len = *contrib \n"
948
  "blt  4f      @ while (x >= 0) {  \n"
949
#ifndef ARCH_UNALIGNED_OK
950
  "tst  r3, #3      @ if ((r3 & 3)    \n"
951
  "tsteq  r1, #3      @ || (r1 & 3))  \n"
952
  "bne  4f      @ can't do fast code  \n"
953
#endif
954
  "ldr  r9, =0x00FF00FF   @ r9 = 0x00FF00FF \n"
955
  "1:             \n"
956
  "ldr  r7, =0x00800080   @ r5 = val0 = round \n"
957
  "stmfd  r13!,{r1,r2,r7}   @ stash r1,r2,r5  \n"
958
  "       @ r1 = min = src  \n"
959
  "       @ r2 = contrib2-4 \n"
960
  "movs r8, r14     @ r8 = len2 = len \n"
961
  "mov  r5, r7      @ r7 = val1 = round \n"
962
  "ble  3f      @ while (len2-- > 0) {  \n"
963
  "2:             \n"
964
  "ldr  r12,[r1], r3    @ r12 = *min  r5 = min += width\n"
965
  "ldr  r10,[r2, #4]!   @ r10 = *contrib2++ \n"
966
  "subs r8, r8, #1    @ len2--    \n"
967
  "and  r11,r9, r12   @ r11= __22__00   \n"
968
  "and  r12,r9, r12,LSR #8  @ r12= __33__11   \n"
969
  "mla  r5, r10,r11,r5    @ r5 = val0 += r11 * r10\n"
970
  "mla  r7, r10,r12,r7    @ r7 = val1 += r12 * r10\n"
971
  "bgt  2b      @ }     \n"
972
  "and  r5, r9, r5, LSR #8  @ r5 = __22__00   \n"
973
  "and  r7, r7, r9, LSL #8  @ r7 = 33__11__   \n"
974
  "orr  r5, r5, r7    @ r5 = 33221100   \n"
975
  "3:             \n"
976
  "ldmfd  r13!,{r1,r2,r7}   @ restore r1,r2,r7  \n"
977
  "subs r6, r6, #4    @ x--     \n"
978
  "add  r1, r1, #4    @ src++     \n"
979
  "str  r5, [r0], #4    @ *dst++ = val    \n"
980
  "bge  1b      @       \n"
981
  "4:       @ } (Less than 4 to go) \n"
982
  "adds r6, r6, #4    @ r6 = x += 4   \n"
983
  "beq  8f      @ if (x == 0) done  \n"
984
  "5:             \n"
985
  "mov  r5, r1      @ r5 = min = src  \n"
986
  "mov  r7, #128    @ r7 = val = 128  \n"
987
  "movs r8, r14     @ r8 = len2 = len \n"
988
  "add  r9, r2, #4    @ r9 = contrib2   \n"
989
  "ble  7f      @ while (len2-- > 0) {  \n"
990
  "6:             \n"
991
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
992
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
993
  "subs r8, r8, #1    @ len2--    \n"
994
  "@ stall r12            \n"
995
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
996
  "bgt  6b      @ }     \n"
997
  "7:             \n"
998
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
999
  "subs r6, r6, #1    @ x--     \n"
1000
  "add  r1, r1, #1    @ src++     \n"
1001
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1002
  "bgt  5b      @       \n"
1003
  "8:             \n"
1004
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1005
  ".ltorg             \n"
1006
  ENTER_THUMB
1007
  );
1008
}
1009
1010
static void
1011
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
1012
{
1013
  asm volatile(
1014
  ENTER_ARM
1015
  "stmfd  r13!,{r4-r11,r14}       \n"
1016
  "mov  r11,#255    @ r11= 255    \n"
1017
  "ldr  r12,[r13,#4*10]   @ r12= row    \n"
1018
  "@ r0 = dst           \n"
1019
  "@ r1 = src           \n"
1020
  "@ r2 = &weights->index[0]        \n"
1021
  "@ r3 = width           \n"
1022
  "@ r11= 255           \n"
1023
  "@ r12= row           \n"
1024
  "add  r2, r2, #24   @ r2 = weights->index \n"
1025
  "ldr  r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
1026
  "add  r2, r2, #4    @ r2 = &index[1]  \n"
1027
  "mov  r6, r3      @ r6 = x = width  \n"
1028
  "ldr  r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
1029
  "       @ r14= len = *contrib \n"
1030
  "5:             \n"
1031
  "ldr  r4,[r13,#4*9]   @ r10= nn = n   \n"
1032
  "1:             \n"
1033
  "mov  r5, r1      @ r5 = min = src  \n"
1034
  "mov  r7, #128    @ r7 = val = 128  \n"
1035
  "movs r8, r14     @ r8 = len2 = len \n"
1036
  "add  r9, r2, #4    @ r9 = contrib2   \n"
1037
  "ble  7f      @ while (len2-- > 0) {  \n"
1038
  "6:             \n"
1039
  "ldr  r10,[r9], #4    @ r10 = *contrib2++ \n"
1040
  "ldrb r12,[r5], r3    @ r12 = *min  r5 = min += width\n"
1041
  "subs r8, r8, #1    @ len2--    \n"
1042
  "@ stall r12            \n"
1043
  "mla  r7, r10,r12,r7    @ val += r12 * r10  \n"
1044
  "bgt  6b      @ }     \n"
1045
  "7:             \n"
1046
  "mov  r7, r7, asr #8    @ r7 = val >>= 8  \n"
1047
  "subs r4, r4, #1    @ r4 = nn--   \n"
1048
  "add  r1, r1, #1    @ src++     \n"
1049
  "strb r7, [r0], #1    @ *dst++ = val    \n"
1050
  "bgt  1b      @       \n"
1051
  "subs r6, r6, #1    @ x--     \n"
1052
  "strb r11,[r0], #1    @ *dst++ = 255    \n"
1053
  "bgt  5b      @       \n"
1054
  "ldmfd  r13!,{r4-r11,PC}  @ pop, return to thumb  \n"
1055
  ".ltorg             \n"
1056
  ENTER_THUMB
1057
  );
1058
}
1059
#else
1060
1061
static void
1062
scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1063
0
{
1064
0
  const int *contrib = &weights->index[weights->index[0]];
1065
0
  int len, i;
1066
0
  const unsigned char *min;
1067
1068
0
  assert(weights->n == 1);
1069
0
  if (weights->flip)
1070
0
  {
1071
0
    dst += weights->count;
1072
0
    for (i=weights->count; i > 0; i--)
1073
0
    {
1074
0
      int val = 128;
1075
0
      min = &src[*contrib++];
1076
0
      len = *contrib++;
1077
0
      while (len-- > 0)
1078
0
      {
1079
0
        val += *min++ * *contrib++;
1080
0
      }
1081
0
      *--dst = (unsigned char)(val>>8);
1082
0
    }
1083
0
  }
1084
0
  else
1085
0
  {
1086
0
    for (i=weights->count; i > 0; i--)
1087
0
    {
1088
0
      int val = 128;
1089
0
      min = &src[*contrib++];
1090
0
      len = *contrib++;
1091
0
      while (len-- > 0)
1092
0
      {
1093
0
        val += *min++ * *contrib++;
1094
0
      }
1095
0
      *dst++ = (unsigned char)(val>>8);
1096
0
    }
1097
0
  }
1098
0
}
1099
1100
static void
1101
scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1102
0
{
1103
0
  const int *contrib = &weights->index[weights->index[0]];
1104
0
  int len, i;
1105
0
  const unsigned char *min;
1106
1107
0
  assert(weights->n == 2);
1108
0
  if (weights->flip)
1109
0
  {
1110
0
    dst += 2*weights->count;
1111
0
    for (i=weights->count; i > 0; i--)
1112
0
    {
1113
0
      int c1 = 128;
1114
0
      int c2 = 128;
1115
0
      min = &src[2 * *contrib++];
1116
0
      len = *contrib++;
1117
0
      while (len-- > 0)
1118
0
      {
1119
0
        c1 += *min++ * *contrib;
1120
0
        c2 += *min++ * *contrib++;
1121
0
      }
1122
0
      *--dst = (unsigned char)(c2>>8);
1123
0
      *--dst = (unsigned char)(c1>>8);
1124
0
    }
1125
0
  }
1126
0
  else
1127
0
  {
1128
0
    for (i=weights->count; i > 0; i--)
1129
0
    {
1130
0
      int c1 = 128;
1131
0
      int c2 = 128;
1132
0
      min = &src[2 * *contrib++];
1133
0
      len = *contrib++;
1134
0
      while (len-- > 0)
1135
0
      {
1136
0
        c1 += *min++ * *contrib;
1137
0
        c2 += *min++ * *contrib++;
1138
0
      }
1139
0
      *dst++ = (unsigned char)(c1>>8);
1140
0
      *dst++ = (unsigned char)(c2>>8);
1141
0
    }
1142
0
  }
1143
0
}
1144
1145
static void
1146
scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1147
3.45k
{
1148
3.45k
  const int *contrib = &weights->index[weights->index[0]];
1149
3.45k
  int len, i;
1150
3.45k
  const unsigned char *min;
1151
1152
3.45k
  assert(weights->n == 3);
1153
3.45k
  if (weights->flip)
1154
0
  {
1155
0
    dst += 3*weights->count;
1156
0
    for (i=weights->count; i > 0; i--)
1157
0
    {
1158
0
      int c1 = 128;
1159
0
      int c2 = 128;
1160
0
      int c3 = 128;
1161
0
      min = &src[3 * *contrib++];
1162
0
      len = *contrib++;
1163
0
      while (len-- > 0)
1164
0
      {
1165
0
        int c = *contrib++;
1166
0
        c1 += *min++ * c;
1167
0
        c2 += *min++ * c;
1168
0
        c3 += *min++ * c;
1169
0
      }
1170
0
      *--dst = (unsigned char)(c3>>8);
1171
0
      *--dst = (unsigned char)(c2>>8);
1172
0
      *--dst = (unsigned char)(c1>>8);
1173
0
    }
1174
0
  }
1175
3.45k
  else
1176
3.45k
  {
1177
11.9M
    for (i=weights->count; i > 0; i--)
1178
11.9M
    {
1179
11.9M
      int c1 = 128;
1180
11.9M
      int c2 = 128;
1181
11.9M
      int c3 = 128;
1182
11.9M
      min = &src[3 * *contrib++];
1183
11.9M
      len = *contrib++;
1184
43.7M
      while (len-- > 0)
1185
31.8M
      {
1186
31.8M
        int c = *contrib++;
1187
31.8M
        c1 += *min++ * c;
1188
31.8M
        c2 += *min++ * c;
1189
31.8M
        c3 += *min++ * c;
1190
31.8M
      }
1191
11.9M
      *dst++ = (unsigned char)(c1>>8);
1192
11.9M
      *dst++ = (unsigned char)(c2>>8);
1193
11.9M
      *dst++ = (unsigned char)(c3>>8);
1194
11.9M
    }
1195
3.45k
  }
1196
3.45k
}
1197
1198
static void
1199
scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1200
0
{
1201
0
  const int *contrib = &weights->index[weights->index[0]];
1202
0
  int len, i;
1203
0
  const unsigned char *min;
1204
1205
0
  assert(weights->n == 4);
1206
0
  if (weights->flip)
1207
0
  {
1208
0
    dst += 4*weights->count;
1209
0
    for (i=weights->count; i > 0; i--)
1210
0
    {
1211
0
      int r = 128;
1212
0
      int g = 128;
1213
0
      int b = 128;
1214
0
      int a = 128;
1215
0
      min = &src[4 * *contrib++];
1216
0
      len = *contrib++;
1217
0
      while (len-- > 0)
1218
0
      {
1219
0
        r += *min++ * *contrib;
1220
0
        g += *min++ * *contrib;
1221
0
        b += *min++ * *contrib;
1222
0
        a += *min++ * *contrib++;
1223
0
      }
1224
0
      *--dst = (unsigned char)(a>>8);
1225
0
      *--dst = (unsigned char)(b>>8);
1226
0
      *--dst = (unsigned char)(g>>8);
1227
0
      *--dst = (unsigned char)(r>>8);
1228
0
    }
1229
0
  }
1230
0
  else
1231
0
  {
1232
0
    for (i=weights->count; i > 0; i--)
1233
0
    {
1234
0
      int r = 128;
1235
0
      int g = 128;
1236
0
      int b = 128;
1237
0
      int a = 128;
1238
0
      min = &src[4 * *contrib++];
1239
0
      len = *contrib++;
1240
0
      while (len-- > 0)
1241
0
      {
1242
0
        r += *min++ * *contrib;
1243
0
        g += *min++ * *contrib;
1244
0
        b += *min++ * *contrib;
1245
0
        a += *min++ * *contrib++;
1246
0
      }
1247
0
      *dst++ = (unsigned char)(r>>8);
1248
0
      *dst++ = (unsigned char)(g>>8);
1249
0
      *dst++ = (unsigned char)(b>>8);
1250
0
      *dst++ = (unsigned char)(a>>8);
1251
0
    }
1252
0
  }
1253
0
}
1254
1255
static void
1256
scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1257
2.59k
{
1258
2.59k
  const int *contrib = &weights->index[weights->index[row]];
1259
2.59k
  int len, x;
1260
2.59k
  int width = w * n;
1261
1262
2.59k
  contrib++; /* Skip min */
1263
2.59k
  len = *contrib++;
1264
26.8M
  for (x=width; x > 0; x--)
1265
26.8M
  {
1266
26.8M
    const unsigned char *min = src;
1267
26.8M
    int val = 128;
1268
26.8M
    int len2 = len;
1269
26.8M
    const int *contrib2 = contrib;
1270
1271
107M
    while (len2-- > 0)
1272
80.6M
    {
1273
80.6M
      val += *min * *contrib2++;
1274
80.6M
      min += width;
1275
80.6M
    }
1276
26.8M
    *dst++ = (unsigned char)(val>>8);
1277
26.8M
    src++;
1278
26.8M
  }
1279
2.59k
}
1280
1281
static void
1282
scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1283
0
{
1284
0
  const int *contrib = &weights->index[weights->index[row]];
1285
0
  int len, x;
1286
0
  int width = w * n;
1287
1288
0
  contrib++; /* Skip min */
1289
0
  len = *contrib++;
1290
0
  for (x=w; x > 0; x--)
1291
0
  {
1292
0
    int nn;
1293
0
    for (nn = n; nn > 0; nn--)
1294
0
    {
1295
0
      const unsigned char *min = src;
1296
0
      int val = 128;
1297
0
      int len2 = len;
1298
0
      const int *contrib2 = contrib;
1299
1300
0
      while (len2-- > 0)
1301
0
      {
1302
0
        val += *min * *contrib2++;
1303
0
        min += width;
1304
0
      }
1305
0
      *dst++ = (unsigned char)(val>>8);
1306
0
      src++;
1307
0
    }
1308
0
    *dst++ = 255;
1309
0
  }
1310
0
}
1311
#endif
1312
1313
#ifdef SINGLE_PIXEL_SPECIALS
1314
static void
1315
duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride)
1316
0
{
1317
0
  int i;
1318
1319
0
  for (i = n; i > 0; i--)
1320
0
    *dst++ = *src++;
1321
0
  if (forcealpha)
1322
0
    *dst++ = 255;
1323
0
  n += forcealpha;
1324
0
  for (i = w-1; i > 0; i--)
1325
0
  {
1326
0
    memcpy(dst, dst-n, n);
1327
0
    dst += n;
1328
0
  }
1329
0
  w *= n;
1330
0
  dst -= w;
1331
0
  h--;
1332
0
  while (h--)
1333
0
  {
1334
0
    memcpy(dst+stride, dst, w);
1335
0
    dst += stride;
1336
0
  }
1337
0
}
1338
1339
static void
1340
scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha)
1341
0
{
1342
0
  const int *contrib = &weights->index[weights->index[0]];
1343
0
  int min, len, i, j, n, nf;
1344
0
  int tmp[FZ_MAX_COLORS];
1345
1346
0
  n = weights->n;
1347
0
  nf = n + forcealpha;
1348
  /* Scale a single row */
1349
0
  for (j = 0; j < nf; j++)
1350
0
    tmp[j] = 128;
1351
0
  if (weights->flip)
1352
0
  {
1353
0
    dst += (weights->count-1)*nf;
1354
0
    for (i=weights->count; i > 0; i--)
1355
0
    {
1356
0
      min = *contrib++;
1357
0
      len = *contrib++;
1358
0
      min *= n;
1359
0
      while (len-- > 0)
1360
0
      {
1361
0
        int c = *contrib++;
1362
0
        for (j = 0; j < n; j++)
1363
0
          tmp[j] += src[min++] * c;
1364
0
        if (forcealpha)
1365
0
          tmp[j] += 255 * c;
1366
0
      }
1367
0
      for (j = 0; j < nf; j++)
1368
0
      {
1369
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1370
0
        tmp[j] = 128;
1371
0
      }
1372
0
      dst -= 2*nf;
1373
0
    }
1374
0
    dst += nf + dstride;
1375
0
  }
1376
0
  else
1377
0
  {
1378
0
    for (i=weights->count; i > 0; i--)
1379
0
    {
1380
0
      min = *contrib++;
1381
0
      len = *contrib++;
1382
0
      min *= n;
1383
0
      while (len-- > 0)
1384
0
      {
1385
0
        int c = *contrib++;
1386
0
        for (j = 0; j < n; j++)
1387
0
          tmp[j] += src[min++] * c;
1388
0
        if (forcealpha)
1389
0
          tmp[j] += 255 * c;
1390
0
      }
1391
0
      for (j = 0; j < nf; j++)
1392
0
      {
1393
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1394
0
        tmp[j] = 128;
1395
0
      }
1396
0
    }
1397
0
    dst += dstride - weights->count * nf;
1398
0
  }
1399
  /* And then duplicate it h times */
1400
0
  nf *= weights->count;
1401
0
  while (--h > 0)
1402
0
  {
1403
0
    memcpy(dst, dst-dstride, nf);
1404
0
    dst += dstride;
1405
0
  }
1406
0
}
1407
1408
static void
1409
scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha)
1410
0
{
1411
0
  const int *contrib = &weights->index[weights->index[0]];
1412
0
  int min, len, i, j;
1413
0
  int tmp[FZ_MAX_COLORS];
1414
0
  int nf = n + forcealpha;
1415
1416
0
  for (j = 0; j < nf; j++)
1417
0
    tmp[j] = 128;
1418
0
  if (weights->flip)
1419
0
  {
1420
0
    src_w = (src_w-1)*sstride;
1421
0
    for (i=weights->count; i > 0; i--)
1422
0
    {
1423
      /* Scale the next pixel in the column */
1424
0
      min = *contrib++;
1425
0
      len = *contrib++;
1426
0
      min = src_w-min*sstride;
1427
0
      while (len-- > 0)
1428
0
      {
1429
0
        int c = *contrib++;
1430
0
        for (j = 0; j < n; j++)
1431
0
          tmp[j] += src[min+j] * c;
1432
0
        if (forcealpha)
1433
0
          tmp[j] += 255 * c;
1434
0
        min -= sstride;
1435
0
      }
1436
0
      for (j = 0; j < nf; j++)
1437
0
      {
1438
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1439
0
        tmp[j] = 128;
1440
0
      }
1441
      /* And then duplicate it across the row */
1442
0
      for (j = (w-1)*nf; j > 0; j--)
1443
0
      {
1444
0
        *dst = dst[-nf];
1445
0
        dst++;
1446
0
      }
1447
0
      dst += dstride - w*nf;
1448
0
    }
1449
0
  }
1450
0
  else
1451
0
  {
1452
0
    for (i=weights->count; i > 0; i--)
1453
0
    {
1454
      /* Scale the next pixel in the column */
1455
0
      min = *contrib++;
1456
0
      len = *contrib++;
1457
0
      min *= sstride;
1458
0
      while (len-- > 0)
1459
0
      {
1460
0
        int c = *contrib++;
1461
0
        for (j = 0; j < n; j++)
1462
0
          tmp[j] += src[min+j] * c;
1463
0
        if (forcealpha)
1464
0
          tmp[j] += 255 * c;
1465
0
        min += sstride;
1466
0
      }
1467
0
      for (j = 0; j < nf; j++)
1468
0
      {
1469
0
        *dst++ = (unsigned char)(tmp[j]>>8);
1470
0
        tmp[j] = 128;
1471
0
      }
1472
      /* And then duplicate it across the row */
1473
0
      for (j = (w-1)*nf; j > 0; j--)
1474
0
      {
1475
0
        *dst = dst[-nf];
1476
0
        dst++;
1477
0
      }
1478
0
      dst += dstride - w*nf;
1479
0
    }
1480
0
  }
1481
0
}
1482
#endif /* SINGLE_PIXEL_SPECIALS */
1483
1484
static void
1485
get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp)
1486
0
{
1487
0
  const int *contrib = &rows->index[rows->index[0]];
1488
0
  int len, i, t, b;
1489
1490
  /* Calculate the edge alpha values */
1491
0
  contrib++; /* Skip min */
1492
0
  len = *contrib++;
1493
0
  t = 0;
1494
0
  while (len--)
1495
0
    t += *contrib++;
1496
0
  for (i=rows->count-2; i > 0; i--)
1497
0
  {
1498
0
    contrib++; /* Skip min */
1499
0
    len = *contrib++;
1500
0
    contrib += len;
1501
0
  }
1502
0
  b = 0;
1503
0
  if (i == 0)
1504
0
  {
1505
0
    contrib++;
1506
0
    len = *contrib++;
1507
0
    while (len--)
1508
0
      b += *contrib++;
1509
0
  }
1510
0
  if (rows->flip && i == 0)
1511
0
  {
1512
0
    *tp = b;
1513
0
    *bp = t;
1514
0
  }
1515
0
  else
1516
0
  {
1517
0
    *tp = t;
1518
0
    *bp = b;
1519
0
  }
1520
0
}
1521
1522
static void
1523
adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols)
1524
0
{
1525
0
  int t, l, r, b, tl, tr, bl, br, x, y;
1526
0
  unsigned char *dp = pix->samples;
1527
0
  int w = pix->w;
1528
0
  int n = pix->n;
1529
0
  int span = w >= 2 ? (w-1)*n : 0;
1530
0
  int stride = pix->stride;
1531
1532
0
  get_alpha_edge_values(rows, &t, &b);
1533
0
  get_alpha_edge_values(cols, &l, &r);
1534
1535
0
  l = (255 * l + 128)>>8;
1536
0
  r = (255 * r + 128)>>8;
1537
0
  tl = (l * t + 128)>>8;
1538
0
  tr = (r * t + 128)>>8;
1539
0
  bl = (l * b + 128)>>8;
1540
0
  br = (r * b + 128)>>8;
1541
0
  t = (255 * t + 128)>>8;
1542
0
  b = (255 * b + 128)>>8;
1543
0
  dp += n-1;
1544
0
  *dp = tl;
1545
0
  dp += n;
1546
0
  for (x = w-2; x > 0; x--)
1547
0
  {
1548
0
    *dp = t;
1549
0
    dp += n;
1550
0
  }
1551
0
  if (x == 0)
1552
0
  {
1553
0
    *dp = tr;
1554
0
    dp += n;
1555
0
  }
1556
0
  dp += stride - w*n;
1557
0
  for (y = pix->h-2; y > 0; y--)
1558
0
  {
1559
0
    dp[span] = r;
1560
0
    *dp = l;
1561
0
    dp += stride;
1562
0
  }
1563
0
  if (y == 0)
1564
0
  {
1565
0
    *dp = bl;
1566
0
    dp += n;
1567
0
    for (x = w-2; x > 0; x--)
1568
0
    {
1569
0
      *dp = b;
1570
0
      dp += n;
1571
0
    }
1572
0
    if (x == 0)
1573
0
    {
1574
0
      *dp = br;
1575
0
    }
1576
0
  }
1577
0
}
1578
1579
fz_pixmap *
1580
fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip)
1581
0
{
1582
0
  return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
1583
0
}
1584
1585
fz_pixmap *
1586
fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
1587
1
{
1588
1
  fz_scale_filter *filter = &fz_scale_filter_simple;
1589
1
  fz_weights *contrib_rows = NULL;
1590
1
  fz_weights *contrib_cols = NULL;
1591
1
  fz_pixmap *output = NULL;
1592
1
  unsigned char *temp = NULL;
1593
1
  int max_row, temp_span, temp_rows, row;
1594
1
  int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1595
1
  int flip_x, flip_y, forcealpha;
1596
1
  fz_rect patch;
1597
1598
1
  fz_var(contrib_cols);
1599
1
  fz_var(contrib_rows);
1600
1601
  /* Avoid extreme scales where overflows become problematic. */
1602
1
  if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
1603
0
    return NULL;
1604
1
  if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
1605
0
    return NULL;
1606
1607
  /* Clamp small ranges of w and h */
1608
1
  if (w <= -1)
1609
0
  {
1610
    /* Large negative range. Don't clamp */
1611
0
  }
1612
1
  else if (w < 0)
1613
0
  {
1614
0
    w = -1;
1615
0
  }
1616
1
  else if (w < 1)
1617
0
  {
1618
0
    w = 1;
1619
0
  }
1620
1
  if (h <= -1)
1621
1
  {
1622
    /* Large negative range. Don't clamp */
1623
1
  }
1624
0
  else if (h < 0)
1625
0
  {
1626
0
    h = -1;
1627
0
  }
1628
0
  else if (h < 1)
1629
0
  {
1630
0
    h = 1;
1631
0
  }
1632
1633
  /* If the src has an alpha, we'll make the dst have an alpha automatically.
1634
   * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */
1635
1
  forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h);
1636
1637
  /* Find the destination bbox, width/height, and sub pixel offset,
1638
   * allowing for whether we're flipping or not. */
1639
  /* The (x,y) position given describes where the top left corner
1640
   * of the source image should be mapped to (i.e. where (0,0) in image
1641
   * space ends up). Also there are differences in the way we scale
1642
   * horizontally and vertically. When scaling rows horizontally, we
1643
   * always read forwards through the source, and store either forwards
1644
   * or in reverse as required. When scaling vertically, we always store
1645
   * out forwards, but may feed source rows in in a different order.
1646
   *
1647
   * Consider the image rectangle 'r' to which the image is mapped,
1648
   * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1649
   * complete pixels.
1650
   *
1651
   * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1652
   * the image is x flipped or not. Whatever happens 0 <= x < 1.
1653
   * y is always R.ymax - r.ymax.
1654
   */
1655
  /* dst_x_int is calculated to be the left of the scaled image, and
1656
   * x (the sub pixel offset) is the distance in from either the left
1657
   * or right pixel expanded edge. */
1658
1
  flip_x = (w < 0);
1659
1
  if (flip_x)
1660
0
  {
1661
0
    float tmp;
1662
0
    w = -w;
1663
0
    dst_x_int = floorf(x-w);
1664
0
    tmp = ceilf(x);
1665
0
    dst_w_int = (int)tmp;
1666
0
    x = tmp - x;
1667
0
    dst_w_int -= dst_x_int;
1668
0
  }
1669
1
  else
1670
1
  {
1671
1
    dst_x_int = floorf(x);
1672
1
    x -= dst_x_int;
1673
1
    dst_w_int = (int)ceilf(x + w);
1674
1
  }
1675
  /* dst_y_int is calculated to be the top of the scaled image, and
1676
   * y (the sub pixel offset) is the distance in from either the top
1677
   * or bottom pixel expanded edge.
1678
   */
1679
1
  flip_y = (h < 0);
1680
1
  if (flip_y)
1681
1
  {
1682
1
    float tmp;
1683
1
    h = -h;
1684
1
    dst_y_int = floorf(y-h);
1685
1
    tmp = ceilf(y);
1686
1
    dst_h_int = (int)tmp;
1687
1
    y = tmp - y;
1688
1
    dst_h_int -= dst_y_int;
1689
1
  }
1690
0
  else
1691
0
  {
1692
0
    dst_y_int = floorf(y);
1693
0
    y -= dst_y_int;
1694
0
    dst_h_int = (int)ceilf(y + h);
1695
0
  }
1696
1697
1
  fz_valgrind_pixmap(src);
1698
1699
  /* Step 0: Calculate the patch */
1700
1
  patch.x0 = 0;
1701
1
  patch.y0 = 0;
1702
1
  patch.x1 = dst_w_int;
1703
1
  patch.y1 = dst_h_int;
1704
1
  if (clip)
1705
1
  {
1706
1
    if (flip_x)
1707
0
    {
1708
0
      if (dst_x_int + dst_w_int > clip->x1)
1709
0
        patch.x0 = dst_x_int + dst_w_int - clip->x1;
1710
0
      if (clip->x0 > dst_x_int)
1711
0
      {
1712
0
        patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
1713
0
        dst_x_int = clip->x0;
1714
0
      }
1715
0
    }
1716
1
    else
1717
1
    {
1718
1
      if (dst_x_int + dst_w_int > clip->x1)
1719
0
        patch.x1 = clip->x1 - dst_x_int;
1720
1
      if (clip->x0 > dst_x_int)
1721
0
      {
1722
0
        patch.x0 = clip->x0 - dst_x_int;
1723
0
        dst_x_int += patch.x0;
1724
0
      }
1725
1
    }
1726
1727
1
    if (flip_y)
1728
1
    {
1729
1
      if (dst_y_int + dst_h_int > clip->y1)
1730
0
        patch.y1 = clip->y1 - dst_y_int;
1731
1
      if (clip->y0 > dst_y_int)
1732
0
      {
1733
0
        patch.y0 = clip->y0 - dst_y_int;
1734
0
        dst_y_int = clip->y0;
1735
0
      }
1736
1
    }
1737
0
    else
1738
0
    {
1739
0
      if (dst_y_int + dst_h_int > clip->y1)
1740
0
        patch.y1 = clip->y1 - dst_y_int;
1741
0
      if (clip->y0 > dst_y_int)
1742
0
      {
1743
0
        patch.y0 = clip->y0 - dst_y_int;
1744
0
        dst_y_int += patch.y0;
1745
0
      }
1746
0
    }
1747
1
  }
1748
1
  if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
1749
0
    return NULL;
1750
1751
2
  fz_try(ctx)
1752
2
  {
1753
    /* Step 1: Calculate the weights for columns and rows */
1754
1
#ifdef SINGLE_PIXEL_SPECIALS
1755
1
    if (src->w == 1)
1756
0
      contrib_cols = NULL;
1757
1
    else
1758
1
#endif /* SINGLE_PIXEL_SPECIALS */
1759
1
      contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols");
1760
1
#ifdef SINGLE_PIXEL_SPECIALS
1761
1
    if (src->h == 1)
1762
0
      contrib_rows = NULL;
1763
1
    else
1764
1
#endif /* SINGLE_PIXEL_SPECIALS */
1765
1
      contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows");
1766
1767
1
    output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha);
1768
1
  }
1769
2
  fz_catch(ctx)
1770
0
  {
1771
0
    if (!cache_x)
1772
0
      fz_free(ctx, contrib_cols);
1773
0
    if (!cache_y)
1774
0
      fz_free(ctx, contrib_rows);
1775
0
    fz_rethrow(ctx);
1776
0
  }
1777
1
  output->x = dst_x_int;
1778
1
  output->y = dst_y_int;
1779
1780
  /* Step 2: Apply the weights */
1781
1
#ifdef SINGLE_PIXEL_SPECIALS
1782
1
  if (!contrib_rows)
1783
0
  {
1784
    /* Only 1 source pixel high. */
1785
0
    if (!contrib_cols)
1786
0
    {
1787
      /* Only 1 pixel in the entire image! */
1788
0
      duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride);
1789
0
      fz_valgrind_pixmap(output);
1790
0
    }
1791
0
    else
1792
0
    {
1793
      /* Scale the row once, then copy it. */
1794
0
      scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha);
1795
0
      fz_valgrind_pixmap(output);
1796
0
    }
1797
0
  }
1798
1
  else if (!contrib_cols)
1799
0
  {
1800
    /* Only 1 source pixel wide. Scale the col and duplicate. */
1801
0
    scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha);
1802
0
    fz_valgrind_pixmap(output);
1803
0
  }
1804
1
  else
1805
1
#endif /* SINGLE_PIXEL_SPECIALS */
1806
1
  {
1807
1
    void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights);
1808
1
    void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row);
1809
1810
1
    temp_span = contrib_cols->count * src->n;
1811
1
    temp_rows = contrib_rows->max_len;
1812
1
    if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1813
0
      goto cleanup;
1814
2
    fz_try(ctx)
1815
2
    {
1816
1
      temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char));
1817
1
    }
1818
2
    fz_catch(ctx)
1819
0
    {
1820
0
      fz_drop_pixmap(ctx, output);
1821
0
      if (!cache_x)
1822
0
        fz_free(ctx, contrib_cols);
1823
0
      if (!cache_y)
1824
0
        fz_free(ctx, contrib_rows);
1825
0
      fz_rethrow(ctx);
1826
0
    }
1827
1
    switch (src->n)
1828
1
    {
1829
0
    default:
1830
0
      row_scale_in = scale_row_to_temp;
1831
0
      break;
1832
0
    case 1: /* Image mask case or Greyscale case */
1833
0
      row_scale_in = scale_row_to_temp1;
1834
0
      break;
1835
0
    case 2: /* Greyscale with alpha case */
1836
0
      row_scale_in = scale_row_to_temp2;
1837
0
      break;
1838
1
    case 3: /* RGB case */
1839
1
      row_scale_in = scale_row_to_temp3;
1840
1
      break;
1841
0
    case 4: /* RGBA or CMYK case */
1842
0
      row_scale_in = scale_row_to_temp4;
1843
0
      break;
1844
1
    }
1845
1
    row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp;
1846
1
    max_row = contrib_rows->index[contrib_rows->index[0]];
1847
2.59k
    for (row = 0; row < contrib_rows->count; row++)
1848
2.59k
    {
1849
      /*
1850
      Which source rows do we need to have scaled into the
1851
      temporary buffer in order to be able to do the final
1852
      scale?
1853
      */
1854
2.59k
      int row_index = contrib_rows->index[row];
1855
2.59k
      int row_min = contrib_rows->index[row_index++];
1856
2.59k
      int row_len = contrib_rows->index[row_index];
1857
6.04k
      while (max_row < row_min+row_len)
1858
3.45k
      {
1859
        /* Scale another row */
1860
3.45k
        assert(max_row < src->h);
1861
3.45k
        (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols);
1862
3.45k
        max_row++;
1863
3.45k
      }
1864
1865
2.59k
      (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row);
1866
2.59k
    }
1867
1
    fz_free(ctx, temp);
1868
1869
1
    if (forcealpha)
1870
0
      adjust_alpha_edges(output, contrib_rows, contrib_cols);
1871
1872
1
    fz_valgrind_pixmap(output);
1873
1
  }
1874
1875
1
cleanup:
1876
1
  if (!cache_y)
1877
0
    fz_free(ctx, contrib_rows);
1878
1
  if (!cache_x)
1879
0
    fz_free(ctx, contrib_cols);
1880
1881
1
  return output;
1882
1
}
1883
1884
void
1885
fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
1886
42
{
1887
42
  if (!sc)
1888
0
    return;
1889
42
  fz_free(ctx, sc->weights);
1890
42
  fz_free(ctx, sc);
1891
42
}
1892
1893
fz_scale_cache *
1894
fz_new_scale_cache(fz_context *ctx)
1895
42
{
1896
42
  return fz_malloc_struct(ctx, fz_scale_cache);
1897
42
}