Coverage Report

Created: 2026-06-10 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/scale.cc
Line
Count
Source
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyPlane
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
#include "libyuv/scale_uv.h"  // For UVScale
21
22
#ifdef __cplusplus
23
namespace libyuv {
24
extern "C" {
25
#endif
26
27
52.3k
static __inline int Abs(int v) {
28
52.3k
  return v >= 0 ? v : -v;
29
52.3k
}
30
31
0
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
32
1.63k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
33
34
// Scale plane, 1/2
35
// This is an optimized version for scaling down a plane to 1/2 of
36
// its original size.
37
38
static void ScalePlaneDown2(int src_width,
39
                            int src_height,
40
                            int dst_width,
41
                            int dst_height,
42
                            int src_stride,
43
                            int dst_stride,
44
                            const uint8_t* src_ptr,
45
                            uint8_t* dst_ptr,
46
54
                            enum FilterMode filtering) {
47
54
  int y;
48
54
  void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
49
54
                        uint8_t* dst_ptr, int dst_width) =
50
54
      filtering == kFilterNone
51
54
          ? ScaleRowDown2_C
52
54
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
53
54
                                        : ScaleRowDown2Box_C);
54
54
  int row_stride = src_stride * 2;
55
54
  (void)src_width;
56
54
  (void)src_height;
57
54
  if (!filtering) {
58
0
    src_ptr += src_stride;  // Point to odd rows.
59
0
    src_stride = 0;
60
0
  }
61
62
#if defined(HAS_SCALEROWDOWN2_NEON)
63
  if (TestCpuFlag(kCpuHasNEON)) {
64
    ScaleRowDown2 =
65
        filtering == kFilterNone
66
            ? ScaleRowDown2_Any_NEON
67
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
68
                                          : ScaleRowDown2Box_Any_NEON);
69
    if (IS_ALIGNED(dst_width, 16)) {
70
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
71
                                               : (filtering == kFilterLinear
72
                                                      ? ScaleRowDown2Linear_NEON
73
                                                      : ScaleRowDown2Box_NEON);
74
    }
75
  }
76
#endif
77
#if defined(HAS_SCALEROWDOWN2_SME)
78
  if (TestCpuFlag(kCpuHasSME)) {
79
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_SME
80
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_SME
81
                                                 : ScaleRowDown2Box_SME;
82
  }
83
#endif
84
54
#if defined(HAS_SCALEROWDOWN2_SSSE3)
85
54
  if (TestCpuFlag(kCpuHasSSSE3)) {
86
54
    ScaleRowDown2 =
87
54
        filtering == kFilterNone
88
54
            ? ScaleRowDown2_Any_SSSE3
89
54
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
90
54
                                          : ScaleRowDown2Box_Any_SSSE3);
91
54
    if (IS_ALIGNED(dst_width, 16)) {
92
0
      ScaleRowDown2 =
93
0
          filtering == kFilterNone
94
0
              ? ScaleRowDown2_SSSE3
95
0
              : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
96
0
                                            : ScaleRowDown2Box_SSSE3);
97
0
    }
98
54
  }
99
54
#endif
100
54
#if defined(HAS_SCALEROWDOWN2_AVX2)
101
54
  if (TestCpuFlag(kCpuHasAVX2)) {
102
54
    ScaleRowDown2 =
103
54
        filtering == kFilterNone
104
54
            ? ScaleRowDown2_Any_AVX2
105
54
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
106
54
                                          : ScaleRowDown2Box_Any_AVX2);
107
54
    if (IS_ALIGNED(dst_width, 32)) {
108
0
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
109
0
                                               : (filtering == kFilterLinear
110
0
                                                      ? ScaleRowDown2Linear_AVX2
111
0
                                                      : ScaleRowDown2Box_AVX2);
112
0
    }
113
54
  }
114
54
#endif
115
#if defined(HAS_SCALEROWDOWN2_LSX)
116
  if (TestCpuFlag(kCpuHasLSX)) {
117
    ScaleRowDown2 =
118
        filtering == kFilterNone
119
            ? ScaleRowDown2_Any_LSX
120
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX
121
                                          : ScaleRowDown2Box_Any_LSX);
122
    if (IS_ALIGNED(dst_width, 32)) {
123
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX
124
                                               : (filtering == kFilterLinear
125
                                                      ? ScaleRowDown2Linear_LSX
126
                                                      : ScaleRowDown2Box_LSX);
127
    }
128
  }
129
#endif
130
#if defined(HAS_SCALEROWDOWN2_RVV)
131
  if (TestCpuFlag(kCpuHasRVV)) {
132
    ScaleRowDown2 = filtering == kFilterNone
133
                        ? ScaleRowDown2_RVV
134
                        : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
135
                                                      : ScaleRowDown2Box_RVV);
136
  }
137
#endif
138
139
54
  if (filtering == kFilterLinear) {
140
0
    src_stride = 0;
141
0
  }
142
  // TODO(fbarchard): Loop through source height to allow odd height.
143
497
  for (y = 0; y < dst_height; ++y) {
144
443
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
145
443
    src_ptr += row_stride;
146
443
    dst_ptr += dst_stride;
147
443
  }
148
54
}
149
150
static void ScalePlaneDown2_16(int src_width,
151
                               int src_height,
152
                               int dst_width,
153
                               int dst_height,
154
                               int src_stride,
155
                               int dst_stride,
156
                               const uint16_t* src_ptr,
157
                               uint16_t* dst_ptr,
158
66
                               enum FilterMode filtering) {
159
66
  int y;
160
66
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
161
66
                        uint16_t* dst_ptr, int dst_width) =
162
66
      filtering == kFilterNone
163
66
          ? ScaleRowDown2_16_C
164
66
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
165
66
                                        : ScaleRowDown2Box_16_C);
166
66
  int row_stride = src_stride * 2;
167
66
  (void)src_width;
168
66
  (void)src_height;
169
66
  if (!filtering) {
170
0
    src_ptr += src_stride;  // Point to odd rows.
171
0
    src_stride = 0;
172
0
  }
173
174
#if defined(HAS_SCALEROWDOWN2_16_NEON)
175
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
176
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_NEON
177
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_NEON
178
                                                 : ScaleRowDown2Box_16_NEON;
179
  }
180
#endif
181
#if defined(HAS_SCALEROWDOWN2_16_SME)
182
  if (TestCpuFlag(kCpuHasSME)) {
183
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_SME
184
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_SME
185
                                                 : ScaleRowDown2Box_16_SME;
186
  }
187
#endif
188
#if defined(HAS_SCALEROWDOWN2_16_SSE2)
189
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
190
    ScaleRowDown2 =
191
        filtering == kFilterNone
192
            ? ScaleRowDown2_16_SSE2
193
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
194
                                          : ScaleRowDown2Box_16_SSE2);
195
  }
196
#endif
197
198
66
  if (filtering == kFilterLinear) {
199
0
    src_stride = 0;
200
0
  }
201
  // TODO(fbarchard): Loop through source height to allow odd height.
202
953
  for (y = 0; y < dst_height; ++y) {
203
887
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
204
887
    src_ptr += row_stride;
205
887
    dst_ptr += dst_stride;
206
887
  }
207
66
}
208
209
void ScalePlaneDown2_16To8(int src_width,
210
                           int src_height,
211
                           int dst_width,
212
                           int dst_height,
213
                           int src_stride,
214
                           int dst_stride,
215
                           const uint16_t* src_ptr,
216
                           uint8_t* dst_ptr,
217
                           int scale,
218
0
                           enum FilterMode filtering) {
219
0
  int y;
220
0
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
221
0
                        uint8_t* dst_ptr, int dst_width, int scale) =
222
0
      (src_width & 1)
223
0
          ? (filtering == kFilterNone
224
0
                 ? ScaleRowDown2_16To8_Odd_C
225
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
226
0
                                               : ScaleRowDown2Box_16To8_Odd_C))
227
0
          : (filtering == kFilterNone
228
0
                 ? ScaleRowDown2_16To8_C
229
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
230
0
                                               : ScaleRowDown2Box_16To8_C));
231
0
  int row_stride = src_stride * 2;
232
0
  (void)dst_height;
233
0
  if (!filtering) {
234
0
    src_ptr += src_stride;  // Point to odd rows.
235
0
    src_stride = 0;
236
0
  }
237
238
0
  if (filtering == kFilterLinear) {
239
0
    src_stride = 0;
240
0
  }
241
0
  for (y = 0; y < src_height / 2; ++y) {
242
0
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
243
0
    src_ptr += row_stride;
244
0
    dst_ptr += dst_stride;
245
0
  }
246
0
  if (src_height & 1) {
247
0
    if (!filtering) {
248
0
      src_ptr -= src_stride;  // Point to last row.
249
0
    }
250
0
    ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
251
0
  }
252
0
}
253
254
// Scale plane, 1/4
255
// This is an optimized version for scaling down a plane to 1/4 of
256
// its original size.
257
258
static void ScalePlaneDown4(int src_width,
259
                            int src_height,
260
                            int dst_width,
261
                            int dst_height,
262
                            int src_stride,
263
                            int dst_stride,
264
                            const uint8_t* src_ptr,
265
                            uint8_t* dst_ptr,
266
38
                            enum FilterMode filtering) {
267
38
  int y;
268
38
  void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
269
38
                        uint8_t* dst_ptr, int dst_width) =
270
38
      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
271
38
  int row_stride = src_stride * 4;
272
38
  (void)src_width;
273
38
  (void)src_height;
274
38
  if (!filtering) {
275
0
    src_ptr += src_stride * 2;  // Point to row 2.
276
0
    src_stride = 0;
277
0
  }
278
#if defined(HAS_SCALEROWDOWN4_NEON)
279
  if (TestCpuFlag(kCpuHasNEON)) {
280
    ScaleRowDown4 =
281
        filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
282
    if (IS_ALIGNED(dst_width, 16)) {
283
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
284
    }
285
  }
286
#endif
287
38
#if defined(HAS_SCALEROWDOWN4_SSSE3)
288
38
  if (TestCpuFlag(kCpuHasSSSE3)) {
289
38
    ScaleRowDown4 =
290
38
        filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
291
38
    if (IS_ALIGNED(dst_width, 8)) {
292
0
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
293
0
    }
294
38
  }
295
38
#endif
296
38
#if defined(HAS_SCALEROWDOWN4_AVX2)
297
38
  if (TestCpuFlag(kCpuHasAVX2)) {
298
38
    ScaleRowDown4 =
299
38
        filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
300
38
    if (IS_ALIGNED(dst_width, 16)) {
301
0
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
302
0
    }
303
38
  }
304
38
#endif
305
#if defined(HAS_SCALEROWDOWN4_LSX)
306
  if (TestCpuFlag(kCpuHasLSX)) {
307
    ScaleRowDown4 =
308
        filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX;
309
    if (IS_ALIGNED(dst_width, 16)) {
310
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX;
311
    }
312
  }
313
#endif
314
#if defined(HAS_SCALEROWDOWN4_RVV)
315
  if (TestCpuFlag(kCpuHasRVV)) {
316
    ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
317
  }
318
#endif
319
320
38
  if (filtering == kFilterLinear) {
321
0
    src_stride = 0;
322
0
  }
323
464
  for (y = 0; y < dst_height; ++y) {
324
426
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
325
426
    src_ptr += row_stride;
326
426
    dst_ptr += dst_stride;
327
426
  }
328
38
}
329
330
static void ScalePlaneDown4_16(int src_width,
331
                               int src_height,
332
                               int dst_width,
333
                               int dst_height,
334
                               int src_stride,
335
                               int dst_stride,
336
                               const uint16_t* src_ptr,
337
                               uint16_t* dst_ptr,
338
38
                               enum FilterMode filtering) {
339
38
  int y;
340
38
  void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
341
38
                        uint16_t* dst_ptr, int dst_width) =
342
38
      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
343
38
  int row_stride = src_stride * 4;
344
38
  (void)src_width;
345
38
  (void)src_height;
346
38
  if (!filtering) {
347
0
    src_ptr += src_stride * 2;  // Point to row 2.
348
0
    src_stride = 0;
349
0
  }
350
#if defined(HAS_SCALEROWDOWN4_16_NEON)
351
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
352
    ScaleRowDown4 =
353
        filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
354
  }
355
#endif
356
#if defined(HAS_SCALEROWDOWN4_16_SSE2)
357
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
358
    ScaleRowDown4 =
359
        filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
360
  }
361
#endif
362
363
38
  if (filtering == kFilterLinear) {
364
0
    src_stride = 0;
365
0
  }
366
464
  for (y = 0; y < dst_height; ++y) {
367
426
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
368
426
    src_ptr += row_stride;
369
426
    dst_ptr += dst_stride;
370
426
  }
371
38
}
372
373
// Scale plane down, 3/4
374
static void ScalePlaneDown34(int src_width,
375
                             int src_height,
376
                             int dst_width,
377
                             int dst_height,
378
                             int src_stride,
379
                             int dst_stride,
380
                             const uint8_t* src_ptr,
381
                             uint8_t* dst_ptr,
382
2
                             enum FilterMode filtering) {
383
2
  int y;
384
2
  void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
385
2
                           uint8_t* dst_ptr, int dst_width);
386
2
  void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
387
2
                           uint8_t* dst_ptr, int dst_width);
388
2
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
389
2
  (void)src_width;
390
2
  (void)src_height;
391
2
  assert(dst_width % 3 == 0);
392
2
  if (!filtering) {
393
0
    ScaleRowDown34_0 = ScaleRowDown34_C;
394
0
    ScaleRowDown34_1 = ScaleRowDown34_C;
395
2
  } else {
396
2
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
397
2
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
398
2
  }
399
#if defined(HAS_SCALEROWDOWN34_NEON)
400
  if (TestCpuFlag(kCpuHasNEON)) {
401
#if defined(__aarch64__)
402
    if (dst_width % 48 == 0) {
403
#else
404
    if (dst_width % 24 == 0) {
405
#endif
406
      if (!filtering) {
407
        ScaleRowDown34_0 = ScaleRowDown34_NEON;
408
        ScaleRowDown34_1 = ScaleRowDown34_NEON;
409
      } else {
410
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
411
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
412
      }
413
    } else {
414
      if (!filtering) {
415
        ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
416
        ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
417
      } else {
418
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
419
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
420
      }
421
    }
422
  }
423
#endif
424
#if defined(HAS_SCALEROWDOWN34_LSX)
425
  if (TestCpuFlag(kCpuHasLSX)) {
426
    if (dst_width % 48 == 0) {
427
      if (!filtering) {
428
        ScaleRowDown34_0 = ScaleRowDown34_LSX;
429
        ScaleRowDown34_1 = ScaleRowDown34_LSX;
430
      } else {
431
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX;
432
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX;
433
      }
434
    } else {
435
      if (!filtering) {
436
        ScaleRowDown34_0 = ScaleRowDown34_Any_LSX;
437
        ScaleRowDown34_1 = ScaleRowDown34_Any_LSX;
438
      } else {
439
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX;
440
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX;
441
      }
442
    }
443
  }
444
#endif
445
2
#if defined(HAS_SCALEROWDOWN34_SSSE3)
446
2
  if (TestCpuFlag(kCpuHasSSSE3)) {
447
2
    if (dst_width % 24 == 0) {
448
0
      if (!filtering) {
449
0
        ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
450
0
        ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
451
0
      } else {
452
0
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
453
0
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
454
0
      }
455
2
    } else {
456
2
      if (!filtering) {
457
0
        ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
458
0
        ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
459
2
      } else {
460
2
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
461
2
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
462
2
      }
463
2
    }
464
2
  }
465
2
#endif
466
#if defined(HAS_SCALEROWDOWN34_RVV)
467
  if (TestCpuFlag(kCpuHasRVV)) {
468
    if (!filtering) {
469
      ScaleRowDown34_0 = ScaleRowDown34_RVV;
470
      ScaleRowDown34_1 = ScaleRowDown34_RVV;
471
    } else {
472
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
473
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
474
    }
475
  }
476
#endif
477
478
4
  for (y = 0; y < dst_height - 2; y += 3) {
479
2
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
480
2
    src_ptr += src_stride;
481
2
    dst_ptr += dst_stride;
482
2
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
483
2
    src_ptr += src_stride;
484
2
    dst_ptr += dst_stride;
485
2
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
486
2
    src_ptr += src_stride * 2;
487
2
    dst_ptr += dst_stride;
488
2
  }
489
490
  // Remainder 1 or 2 rows with last row vertically unfiltered
491
2
  if ((dst_height % 3) == 2) {
492
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
493
0
    src_ptr += src_stride;
494
0
    dst_ptr += dst_stride;
495
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
496
2
  } else if ((dst_height % 3) == 1) {
497
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
498
0
  }
499
2
}
500
501
static void ScalePlaneDown34_16(int src_width,
502
                                int src_height,
503
                                int dst_width,
504
                                int dst_height,
505
                                int src_stride,
506
                                int dst_stride,
507
                                const uint16_t* src_ptr,
508
                                uint16_t* dst_ptr,
509
0
                                enum FilterMode filtering) {
510
0
  int y;
511
0
  void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
512
0
                           uint16_t* dst_ptr, int dst_width);
513
0
  void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
514
0
                           uint16_t* dst_ptr, int dst_width);
515
0
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
516
0
  (void)src_width;
517
0
  (void)src_height;
518
0
  assert(dst_width % 3 == 0);
519
0
  if (!filtering) {
520
0
    ScaleRowDown34_0 = ScaleRowDown34_16_C;
521
0
    ScaleRowDown34_1 = ScaleRowDown34_16_C;
522
0
  } else {
523
0
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
524
0
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
525
0
  }
526
#if defined(HAS_SCALEROWDOWN34_16_NEON)
527
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
528
    if (!filtering) {
529
      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
530
      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
531
    } else {
532
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
533
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
534
    }
535
  }
536
#endif
537
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
538
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
539
    if (!filtering) {
540
      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
541
      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
542
    } else {
543
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
544
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
545
    }
546
  }
547
#endif
548
549
0
  for (y = 0; y < dst_height - 2; y += 3) {
550
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
551
0
    src_ptr += src_stride;
552
0
    dst_ptr += dst_stride;
553
0
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
554
0
    src_ptr += src_stride;
555
0
    dst_ptr += dst_stride;
556
0
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
557
0
    src_ptr += src_stride * 2;
558
0
    dst_ptr += dst_stride;
559
0
  }
560
561
  // Remainder 1 or 2 rows with last row vertically unfiltered
562
0
  if ((dst_height % 3) == 2) {
563
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
564
0
    src_ptr += src_stride;
565
0
    dst_ptr += dst_stride;
566
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
567
0
  } else if ((dst_height % 3) == 1) {
568
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
569
0
  }
570
0
}
571
572
// Scale plane, 3/8
573
// This is an optimized version for scaling down a plane to 3/8
574
// of its original size.
575
//
576
// Uses box filter arranges like this
577
// aaabbbcc -> abc
578
// aaabbbcc    def
579
// aaabbbcc    ghi
580
// dddeeeff
581
// dddeeeff
582
// dddeeeff
583
// ggghhhii
584
// ggghhhii
585
// Boxes are 3x3, 2x3, 3x2 and 2x2
586
587
static void ScalePlaneDown38(int src_width,
588
                             int src_height,
589
                             int dst_width,
590
                             int dst_height,
591
                             int src_stride,
592
                             int dst_stride,
593
                             const uint8_t* src_ptr,
594
                             uint8_t* dst_ptr,
595
12
                             enum FilterMode filtering) {
596
12
  int y;
597
12
  void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
598
12
                           uint8_t* dst_ptr, int dst_width);
599
12
  void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
600
12
                           uint8_t* dst_ptr, int dst_width);
601
12
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
602
12
  assert(dst_width % 3 == 0);
603
12
  (void)src_width;
604
12
  (void)src_height;
605
12
  if (!filtering) {
606
0
    ScaleRowDown38_3 = ScaleRowDown38_C;
607
0
    ScaleRowDown38_2 = ScaleRowDown38_C;
608
12
  } else {
609
12
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
610
12
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
611
12
  }
612
613
#if defined(HAS_SCALEROWDOWN38_NEON)
614
  if (TestCpuFlag(kCpuHasNEON)) {
615
    if (!filtering) {
616
      ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
617
      ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
618
    } else {
619
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
620
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
621
    }
622
    if (dst_width % 12 == 0) {
623
      if (!filtering) {
624
        ScaleRowDown38_3 = ScaleRowDown38_NEON;
625
        ScaleRowDown38_2 = ScaleRowDown38_NEON;
626
      } else {
627
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
628
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
629
      }
630
    }
631
  }
632
#endif
633
12
#if defined(HAS_SCALEROWDOWN38_SSSE3)
634
12
  if (TestCpuFlag(kCpuHasSSSE3)) {
635
12
    if (!filtering) {
636
0
      ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
637
0
      ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
638
12
    } else {
639
12
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
640
12
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
641
12
    }
642
12
    if (dst_width % 12 == 0 && !filtering) {
643
0
      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
644
0
      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
645
0
    }
646
12
    if (dst_width % 6 == 0 && filtering) {
647
12
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
648
12
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
649
12
    }
650
12
  }
651
12
#endif
652
#if defined(HAS_SCALEROWDOWN38_LSX)
653
  if (TestCpuFlag(kCpuHasLSX)) {
654
    if (!filtering) {
655
      ScaleRowDown38_3 = ScaleRowDown38_Any_LSX;
656
      ScaleRowDown38_2 = ScaleRowDown38_Any_LSX;
657
    } else {
658
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX;
659
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX;
660
    }
661
    if (dst_width % 12 == 0) {
662
      if (!filtering) {
663
        ScaleRowDown38_3 = ScaleRowDown38_LSX;
664
        ScaleRowDown38_2 = ScaleRowDown38_LSX;
665
      } else {
666
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX;
667
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX;
668
      }
669
    }
670
  }
671
#endif
672
#if defined(HAS_SCALEROWDOWN38_RVV)
673
  if (TestCpuFlag(kCpuHasRVV)) {
674
    if (!filtering) {
675
      ScaleRowDown38_3 = ScaleRowDown38_RVV;
676
      ScaleRowDown38_2 = ScaleRowDown38_RVV;
677
    } else {
678
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
679
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
680
    }
681
  }
682
#endif
683
684
36
  for (y = 0; y < dst_height - 2; y += 3) {
685
24
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
686
24
    src_ptr += src_stride * 3;
687
24
    dst_ptr += dst_stride;
688
24
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
689
24
    src_ptr += src_stride * 3;
690
24
    dst_ptr += dst_stride;
691
24
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
692
24
    src_ptr += src_stride * 2;
693
24
    dst_ptr += dst_stride;
694
24
  }
695
696
  // Remainder 1 or 2 rows with last row vertically unfiltered
697
12
  if ((dst_height % 3) == 2) {
698
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
699
0
    src_ptr += src_stride * 3;
700
0
    dst_ptr += dst_stride;
701
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
702
12
  } else if ((dst_height % 3) == 1) {
703
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
704
0
  }
705
12
}
706
707
static void ScalePlaneDown38_16(int src_width,
708
                                int src_height,
709
                                int dst_width,
710
                                int dst_height,
711
                                int src_stride,
712
                                int dst_stride,
713
                                const uint16_t* src_ptr,
714
                                uint16_t* dst_ptr,
715
21
                                enum FilterMode filtering) {
716
21
  int y;
717
21
  void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
718
21
                           uint16_t* dst_ptr, int dst_width);
719
21
  void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
720
21
                           uint16_t* dst_ptr, int dst_width);
721
21
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
722
21
  (void)src_width;
723
21
  (void)src_height;
724
21
  assert(dst_width % 3 == 0);
725
21
  if (!filtering) {
726
0
    ScaleRowDown38_3 = ScaleRowDown38_16_C;
727
0
    ScaleRowDown38_2 = ScaleRowDown38_16_C;
728
21
  } else {
729
21
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
730
21
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
731
21
  }
732
#if defined(HAS_SCALEROWDOWN38_16_NEON)
733
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
734
    if (!filtering) {
735
      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
736
      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
737
    } else {
738
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
739
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
740
    }
741
  }
742
#endif
743
#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
744
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
745
    if (!filtering) {
746
      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
747
      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
748
    } else {
749
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
750
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
751
    }
752
  }
753
#endif
754
755
57
  for (y = 0; y < dst_height - 2; y += 3) {
756
36
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
757
36
    src_ptr += src_stride * 3;
758
36
    dst_ptr += dst_stride;
759
36
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
760
36
    src_ptr += src_stride * 3;
761
36
    dst_ptr += dst_stride;
762
36
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
763
36
    src_ptr += src_stride * 2;
764
36
    dst_ptr += dst_stride;
765
36
  }
766
767
  // Remainder 1 or 2 rows with last row vertically unfiltered
768
21
  if ((dst_height % 3) == 2) {
769
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
770
0
    src_ptr += src_stride * 3;
771
0
    dst_ptr += dst_stride;
772
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
773
21
  } else if ((dst_height % 3) == 1) {
774
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
775
0
  }
776
21
}
777
778
9.01M
#define MIN1(x) ((x) < 1 ? 1 : (x))
779
780
7.41M
static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
781
7.41M
  uint32_t sum = 0u;
782
7.41M
  int x;
783
7.41M
  assert(iboxwidth > 0);
784
56.2M
  for (x = 0; x < iboxwidth; ++x) {
785
48.8M
    sum += src_ptr[x];
786
48.8M
  }
787
7.41M
  return sum;
788
7.41M
}
789
790
4.23M
static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
791
4.23M
  uint32_t sum = 0u;
792
4.23M
  int x;
793
4.23M
  assert(iboxwidth > 0);
794
45.0M
  for (x = 0; x < iboxwidth; ++x) {
795
40.8M
    sum += src_ptr[x];
796
40.8M
  }
797
4.23M
  return sum;
798
4.23M
}
799
800
static void ScaleAddCols2_C(int dst_width,
801
                            int boxheight,
802
                            int x,
803
                            int dx,
804
                            const uint16_t* src_ptr,
805
90.8k
                            uint8_t* dst_ptr) {
806
90.8k
  int i;
807
90.8k
  int scaletbl[2];
808
90.8k
  int minboxwidth = dx >> 16;
809
90.8k
  int boxwidth;
810
90.8k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
811
90.8k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
812
5.89M
  for (i = 0; i < dst_width; ++i) {
813
5.80M
    int ix = x >> 16;
814
5.80M
    x += dx;
815
5.80M
    boxwidth = MIN1((x >> 16) - ix);
816
5.80M
    int scaletbl_index = boxwidth - minboxwidth;
817
5.80M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
818
5.80M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
819
5.80M
                               scaletbl[scaletbl_index] >>
820
5.80M
                           16);
821
5.80M
  }
822
90.8k
}
823
824
static void ScaleAddCols2_16_C(int dst_width,
825
                               int boxheight,
826
                               int x,
827
                               int dx,
828
                               const uint32_t* src_ptr,
829
61.8k
                               uint16_t* dst_ptr) {
830
61.8k
  int i;
831
61.8k
  int scaletbl[2];
832
61.8k
  int minboxwidth = dx >> 16;
833
61.8k
  int boxwidth;
834
61.8k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
835
61.8k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
836
2.72M
  for (i = 0; i < dst_width; ++i) {
837
2.66M
    int ix = x >> 16;
838
2.66M
    x += dx;
839
2.66M
    boxwidth = MIN1((x >> 16) - ix);
840
2.66M
    int scaletbl_index = boxwidth - minboxwidth;
841
2.66M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
842
2.66M
    *dst_ptr++ =
843
2.66M
        SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
844
2.66M
  }
845
61.8k
}
846
847
static void ScaleAddCols0_C(int dst_width,
848
                            int boxheight,
849
                            int x,
850
                            int dx,
851
                            const uint16_t* src_ptr,
852
0
                            uint8_t* dst_ptr) {
853
0
  int scaleval = 65536 / boxheight;
854
0
  int i;
855
0
  (void)dx;
856
0
  src_ptr += (x >> 16);
857
0
  for (i = 0; i < dst_width; ++i) {
858
0
    *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
859
0
  }
860
0
}
861
862
static void ScaleAddCols1_C(int dst_width,
863
                            int boxheight,
864
                            int x,
865
                            int dx,
866
                            const uint16_t* src_ptr,
867
18.4k
                            uint8_t* dst_ptr) {
868
18.4k
  int boxwidth = MIN1(dx >> 16);
869
18.4k
  int scaleval = 65536 / (boxwidth * boxheight);
870
18.4k
  int i;
871
18.4k
  x >>= 16;
872
1.62M
  for (i = 0; i < dst_width; ++i) {
873
1.60M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
874
1.60M
    x += boxwidth;
875
1.60M
  }
876
18.4k
}
877
878
static void ScaleAddCols1_16_C(int dst_width,
879
                               int boxheight,
880
                               int x,
881
                               int dx,
882
                               const uint32_t* src_ptr,
883
29.2k
                               uint16_t* dst_ptr) {
884
29.2k
  int boxwidth = MIN1(dx >> 16);
885
29.2k
  int scaleval = 65536 / (boxwidth * boxheight);
886
29.2k
  int i;
887
1.60M
  for (i = 0; i < dst_width; ++i) {
888
1.57M
    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
889
1.57M
    x += boxwidth;
890
1.57M
  }
891
29.2k
}
892
893
// Scale plane down to any dimensions, with interpolation.
894
// (boxfilter).
895
//
896
// Same method as SimpleScale, which is fixed point, outputting
897
// one pixel of destination using fixed point (16.16) to step
898
// through source, sampling a box of pixel with simple
899
// averaging.
900
static int ScalePlaneBox(int src_width,
901
                         int src_height,
902
                         int dst_width,
903
                         int dst_height,
904
                         int src_stride,
905
                         int dst_stride,
906
                         const uint8_t* src_ptr,
907
1.22k
                         uint8_t* dst_ptr) {
908
1.22k
  int j, k;
909
  // Initial source x/y coordinate and step values as 16.16 fixed point.
910
1.22k
  int x = 0;
911
1.22k
  int y = 0;
912
1.22k
  int dx = 0;
913
1.22k
  int dy = 0;
914
1.22k
  const int max_y = (src_height << 16);
915
1.22k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
916
1.22k
             &dx, &dy);
917
1.22k
  src_width = Abs(src_width);
918
1.22k
  {
919
    // Allocate a row buffer of uint16_t.
920
1.22k
    align_buffer_64(row16, src_width * 2);
921
1.22k
    if (!row16)
922
0
      return 1;
923
1.22k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
924
1.22k
                         const uint16_t* src_ptr, uint8_t* dst_ptr) =
925
1.22k
        (dx & 0xffff) ? ScaleAddCols2_C
926
1.22k
                      : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
927
1.22k
    void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
928
1.22k
                        int src_width) = ScaleAddRow_C;
929
1.22k
#if defined(HAS_SCALEADDROW_SSE2)
930
1.22k
    if (TestCpuFlag(kCpuHasSSE2)) {
931
1.22k
      ScaleAddRow = ScaleAddRow_Any_SSE2;
932
1.22k
      if (IS_ALIGNED(src_width, 16)) {
933
247
        ScaleAddRow = ScaleAddRow_SSE2;
934
247
      }
935
1.22k
    }
936
1.22k
#endif
937
1.22k
#if defined(HAS_SCALEADDROW_AVX2)
938
1.22k
    if (TestCpuFlag(kCpuHasAVX2)) {
939
1.22k
      ScaleAddRow = ScaleAddRow_Any_AVX2;
940
1.22k
      if (IS_ALIGNED(src_width, 32)) {
941
208
        ScaleAddRow = ScaleAddRow_AVX2;
942
208
      }
943
1.22k
    }
944
1.22k
#endif
945
#if defined(HAS_SCALEADDROW_NEON)
946
    if (TestCpuFlag(kCpuHasNEON)) {
947
      ScaleAddRow = ScaleAddRow_Any_NEON;
948
      if (IS_ALIGNED(src_width, 16)) {
949
        ScaleAddRow = ScaleAddRow_NEON;
950
      }
951
    }
952
#endif
953
#if defined(HAS_SCALEADDROW_LSX)
954
    if (TestCpuFlag(kCpuHasLSX)) {
955
      ScaleAddRow = ScaleAddRow_Any_LSX;
956
      if (IS_ALIGNED(src_width, 16)) {
957
        ScaleAddRow = ScaleAddRow_LSX;
958
      }
959
    }
960
#endif
961
#if defined(HAS_SCALEADDROW_RVV)
962
    if (TestCpuFlag(kCpuHasRVV)) {
963
      ScaleAddRow = ScaleAddRow_RVV;
964
    }
965
#endif
966
967
110k
    for (j = 0; j < dst_height; ++j) {
968
109k
      int boxheight;
969
109k
      int iy = y >> 16;
970
109k
      const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
971
109k
      y += dy;
972
109k
      if (y > max_y) {
973
0
        y = max_y;
974
0
      }
975
109k
      boxheight = MIN1((y >> 16) - iy);
976
109k
      memset(row16, 0, src_width * 2);
977
1.40M
      for (k = 0; k < boxheight; ++k) {
978
1.29M
        ScaleAddRow(src, (uint16_t*)(row16), src_width);
979
1.29M
        src += src_stride;
980
1.29M
      }
981
109k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
982
109k
      dst_ptr += dst_stride;
983
109k
    }
984
1.22k
    free_aligned_buffer_64(row16);
985
1.22k
  }
986
0
  return 0;
987
1.22k
}
988
989
static int ScalePlaneBox_16(int src_width,
990
                            int src_height,
991
                            int dst_width,
992
                            int dst_height,
993
                            int src_stride,
994
                            int dst_stride,
995
                            const uint16_t* src_ptr,
996
1.34k
                            uint16_t* dst_ptr) {
997
1.34k
  int j, k;
998
  // Initial source x/y coordinate and step values as 16.16 fixed point.
999
1.34k
  int x = 0;
1000
1.34k
  int y = 0;
1001
1.34k
  int dx = 0;
1002
1.34k
  int dy = 0;
1003
1.34k
  const int max_y = (src_height << 16);
1004
1.34k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
1005
1.34k
             &dx, &dy);
1006
1.34k
  src_width = Abs(src_width);
1007
1.34k
  {
1008
    // Allocate a row buffer of uint32_t.
1009
1.34k
    align_buffer_64(row32, src_width * 4);
1010
1.34k
    if (!row32)
1011
0
      return 1;
1012
1.34k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
1013
1.34k
                         const uint32_t* src_ptr, uint16_t* dst_ptr) =
1014
1.34k
        (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
1015
1.34k
    void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
1016
1.34k
                        int src_width) = ScaleAddRow_16_C;
1017
1018
#if defined(HAS_SCALEADDROW_16_SSE2)
1019
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
1020
      ScaleAddRow = ScaleAddRow_16_SSE2;
1021
    }
1022
#endif
1023
1024
92.4k
    for (j = 0; j < dst_height; ++j) {
1025
91.1k
      int boxheight;
1026
91.1k
      int iy = y >> 16;
1027
91.1k
      const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
1028
91.1k
      y += dy;
1029
91.1k
      if (y > max_y) {
1030
0
        y = max_y;
1031
0
      }
1032
91.1k
      boxheight = MIN1((y >> 16) - iy);
1033
91.1k
      memset(row32, 0, src_width * 4);
1034
1.22M
      for (k = 0; k < boxheight; ++k) {
1035
1.13M
        ScaleAddRow(src, (uint32_t*)(row32), src_width);
1036
1.13M
        src += src_stride;
1037
1.13M
      }
1038
91.1k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
1039
91.1k
      dst_ptr += dst_stride;
1040
91.1k
    }
1041
1.34k
    free_aligned_buffer_64(row32);
1042
1.34k
  }
1043
0
  return 0;
1044
1.34k
}
1045
1046
// Scale plane down with bilinear interpolation.
1047
static int ScalePlaneBilinearDown(int src_width,
1048
                                  int src_height,
1049
                                  int dst_width,
1050
                                  int dst_height,
1051
                                  int src_stride,
1052
                                  int dst_stride,
1053
                                  const uint8_t* src_ptr,
1054
                                  uint8_t* dst_ptr,
1055
4.05k
                                  enum FilterMode filtering) {
1056
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1057
4.05k
  int x = 0;
1058
4.05k
  int y = 0;
1059
4.05k
  int dx = 0;
1060
4.05k
  int dy = 0;
1061
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1062
  // Allocate a row buffer.
1063
4.05k
  align_buffer_64(row, src_width);
1064
4.05k
  if (!row)
1065
0
    return 1;
1066
1067
4.05k
  const int max_y = (src_height - 1) << 16;
1068
4.05k
  int j;
1069
4.05k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1070
4.05k
                          int dst_width, int x, int dx) =
1071
4.05k
      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1072
4.05k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1073
4.05k
                         ptrdiff_t src_stride, int dst_width,
1074
4.05k
                         int source_y_fraction) = InterpolateRow_C;
1075
4.05k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1076
4.05k
             &dx, &dy);
1077
4.05k
  src_width = Abs(src_width);
1078
1079
4.05k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1080
4.05k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1081
4.05k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1082
4.05k
    if (IS_ALIGNED(src_width, 16)) {
1083
734
      InterpolateRow = InterpolateRow_SSSE3;
1084
734
    }
1085
4.05k
  }
1086
4.05k
#endif
1087
4.05k
#if defined(HAS_INTERPOLATEROW_AVX2)
1088
4.05k
  if (TestCpuFlag(kCpuHasAVX2)) {
1089
4.05k
    InterpolateRow = InterpolateRow_Any_AVX2;
1090
4.05k
    if (IS_ALIGNED(src_width, 32)) {
1091
391
      InterpolateRow = InterpolateRow_AVX2;
1092
391
    }
1093
4.05k
  }
1094
4.05k
#endif
1095
#if defined(HAS_INTERPOLATEROW_NEON)
1096
  if (TestCpuFlag(kCpuHasNEON)) {
1097
    InterpolateRow = InterpolateRow_Any_NEON;
1098
    if (IS_ALIGNED(src_width, 16)) {
1099
      InterpolateRow = InterpolateRow_NEON;
1100
    }
1101
  }
1102
#endif
1103
#if defined(HAS_INTERPOLATEROW_SME)
1104
  if (TestCpuFlag(kCpuHasSME)) {
1105
    InterpolateRow = InterpolateRow_SME;
1106
  }
1107
#endif
1108
#if defined(HAS_INTERPOLATEROW_LSX)
1109
  if (TestCpuFlag(kCpuHasLSX)) {
1110
    InterpolateRow = InterpolateRow_Any_LSX;
1111
    if (IS_ALIGNED(src_width, 32)) {
1112
      InterpolateRow = InterpolateRow_LSX;
1113
    }
1114
  }
1115
#endif
1116
#if defined(HAS_INTERPOLATEROW_RVV)
1117
  if (TestCpuFlag(kCpuHasRVV)) {
1118
    InterpolateRow = InterpolateRow_RVV;
1119
  }
1120
#endif
1121
1122
4.05k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1123
4.05k
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1124
4.05k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1125
4.05k
  }
1126
4.05k
#endif
1127
#if defined(HAS_SCALEFILTERCOLS_NEON)
1128
  if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1129
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1130
    if (IS_ALIGNED(dst_width, 8)) {
1131
      ScaleFilterCols = ScaleFilterCols_NEON;
1132
    }
1133
  }
1134
#endif
1135
#if defined(HAS_SCALEFILTERCOLS_LSX)
1136
  if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1137
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1138
    if (IS_ALIGNED(dst_width, 16)) {
1139
      ScaleFilterCols = ScaleFilterCols_LSX;
1140
    }
1141
  }
1142
#endif
1143
4.05k
  if (y > max_y) {
1144
177
    y = max_y;
1145
177
  }
1146
1147
244k
  for (j = 0; j < dst_height; ++j) {
1148
240k
    int yi = y >> 16;
1149
240k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1150
240k
    if (filtering == kFilterLinear) {
1151
134k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1152
134k
    } else {
1153
105k
      int yf = (y >> 8) & 255;
1154
105k
      InterpolateRow(row, src, src_stride, src_width, yf);
1155
105k
      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1156
105k
    }
1157
240k
    dst_ptr += dst_stride;
1158
240k
    y += dy;
1159
240k
    if (y > max_y) {
1160
5.70k
      y = max_y;
1161
5.70k
    }
1162
240k
  }
1163
4.05k
  free_aligned_buffer_64(row);
1164
4.05k
  return 0;
1165
4.05k
}
1166
1167
static int ScalePlaneBilinearDown_16(int src_width,
1168
                                     int src_height,
1169
                                     int dst_width,
1170
                                     int dst_height,
1171
                                     int src_stride,
1172
                                     int dst_stride,
1173
                                     const uint16_t* src_ptr,
1174
                                     uint16_t* dst_ptr,
1175
5.49k
                                     enum FilterMode filtering) {
1176
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1177
5.49k
  int x = 0;
1178
5.49k
  int y = 0;
1179
5.49k
  int dx = 0;
1180
5.49k
  int dy = 0;
1181
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1182
  // Allocate a row buffer.
1183
5.49k
  align_buffer_64(row, src_width * 2);
1184
5.49k
  if (!row)
1185
0
    return 1;
1186
1187
5.49k
  const int max_y = (src_height - 1) << 16;
1188
5.49k
  int j;
1189
5.49k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1190
5.49k
                          int dst_width, int x, int dx) =
1191
5.49k
      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1192
5.49k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1193
5.49k
                         ptrdiff_t src_stride, int dst_width,
1194
5.49k
                         int source_y_fraction) = InterpolateRow_16_C;
1195
5.49k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1196
5.49k
             &dx, &dy);
1197
5.49k
  src_width = Abs(src_width);
1198
1199
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1200
  if (TestCpuFlag(kCpuHasSSE2)) {
1201
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1202
    if (IS_ALIGNED(src_width, 16)) {
1203
      InterpolateRow = InterpolateRow_16_SSE2;
1204
    }
1205
  }
1206
#endif
1207
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1208
  if (TestCpuFlag(kCpuHasSSSE3)) {
1209
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1210
    if (IS_ALIGNED(src_width, 16)) {
1211
      InterpolateRow = InterpolateRow_16_SSSE3;
1212
    }
1213
  }
1214
#endif
1215
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1216
  if (TestCpuFlag(kCpuHasAVX2)) {
1217
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1218
    if (IS_ALIGNED(src_width, 32)) {
1219
      InterpolateRow = InterpolateRow_16_AVX2;
1220
    }
1221
  }
1222
#endif
1223
#if defined(HAS_INTERPOLATEROW_16_NEON)
1224
  if (TestCpuFlag(kCpuHasNEON)) {
1225
    InterpolateRow = InterpolateRow_16_Any_NEON;
1226
    if (IS_ALIGNED(src_width, 16)) {
1227
      InterpolateRow = InterpolateRow_16_NEON;
1228
    }
1229
  }
1230
#endif
1231
#if defined(HAS_INTERPOLATEROW_16_SME)
1232
  if (TestCpuFlag(kCpuHasSME)) {
1233
    InterpolateRow = InterpolateRow_16_SME;
1234
  }
1235
#endif
1236
1237
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1238
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1239
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1240
  }
1241
#endif
1242
5.49k
  if (y > max_y) {
1243
58
    y = max_y;
1244
58
  }
1245
1246
451k
  for (j = 0; j < dst_height; ++j) {
1247
445k
    int yi = y >> 16;
1248
445k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1249
445k
    if (filtering == kFilterLinear) {
1250
201k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1251
244k
    } else {
1252
244k
      int yf = (y >> 8) & 255;
1253
244k
      InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1254
244k
      ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1255
244k
    }
1256
445k
    dst_ptr += dst_stride;
1257
445k
    y += dy;
1258
445k
    if (y > max_y) {
1259
6.76k
      y = max_y;
1260
6.76k
    }
1261
445k
  }
1262
5.49k
  free_aligned_buffer_64(row);
1263
5.49k
  return 0;
1264
5.49k
}
1265
1266
// Scale up down with bilinear interpolation.
1267
static int ScalePlaneBilinearUp(int src_width,
1268
                                int src_height,
1269
                                int dst_width,
1270
                                int dst_height,
1271
                                int src_stride,
1272
                                int dst_stride,
1273
                                const uint8_t* src_ptr,
1274
                                uint8_t* dst_ptr,
1275
5.58k
                                enum FilterMode filtering) {
1276
5.58k
  int j;
1277
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1278
5.58k
  int x = 0;
1279
5.58k
  int y = 0;
1280
5.58k
  int dx = 0;
1281
5.58k
  int dy = 0;
1282
5.58k
  const int max_y = (src_height - 1) << 16;
1283
5.58k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1284
5.58k
                         ptrdiff_t src_stride, int dst_width,
1285
5.58k
                         int source_y_fraction) = InterpolateRow_C;
1286
5.58k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1287
5.58k
                          int dst_width, int x, int dx) =
1288
5.58k
      filtering ? ScaleFilterCols_C : ScaleCols_C;
1289
5.58k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1290
5.58k
             &dx, &dy);
1291
5.58k
  src_width = Abs(src_width);
1292
1293
5.58k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1294
5.58k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1295
5.58k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1296
5.58k
    if (IS_ALIGNED(dst_width, 16)) {
1297
2.71k
      InterpolateRow = InterpolateRow_SSSE3;
1298
2.71k
    }
1299
5.58k
  }
1300
5.58k
#endif
1301
5.58k
#if defined(HAS_INTERPOLATEROW_AVX2)
1302
5.58k
  if (TestCpuFlag(kCpuHasAVX2)) {
1303
5.58k
    InterpolateRow = InterpolateRow_Any_AVX2;
1304
5.58k
    if (IS_ALIGNED(dst_width, 32)) {
1305
2.15k
      InterpolateRow = InterpolateRow_AVX2;
1306
2.15k
    }
1307
5.58k
  }
1308
5.58k
#endif
1309
#if defined(HAS_INTERPOLATEROW_NEON)
1310
  if (TestCpuFlag(kCpuHasNEON)) {
1311
    InterpolateRow = InterpolateRow_Any_NEON;
1312
    if (IS_ALIGNED(dst_width, 16)) {
1313
      InterpolateRow = InterpolateRow_NEON;
1314
    }
1315
  }
1316
#endif
1317
#if defined(HAS_INTERPOLATEROW_SME)
1318
  if (TestCpuFlag(kCpuHasSME)) {
1319
    InterpolateRow = InterpolateRow_SME;
1320
  }
1321
#endif
1322
#if defined(HAS_INTERPOLATEROW_RVV)
1323
  if (TestCpuFlag(kCpuHasRVV)) {
1324
    InterpolateRow = InterpolateRow_RVV;
1325
  }
1326
#endif
1327
1328
5.58k
  if (filtering && src_width >= 32768) {
1329
0
    ScaleFilterCols = ScaleFilterCols64_C;
1330
0
  }
1331
5.58k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1332
5.58k
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1333
5.58k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1334
5.58k
  }
1335
5.58k
#endif
1336
#if defined(HAS_SCALEFILTERCOLS_NEON)
1337
  if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1338
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1339
    if (IS_ALIGNED(dst_width, 8)) {
1340
      ScaleFilterCols = ScaleFilterCols_NEON;
1341
    }
1342
  }
1343
#endif
1344
#if defined(HAS_SCALEFILTERCOLS_LSX)
1345
  if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1346
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1347
    if (IS_ALIGNED(dst_width, 16)) {
1348
      ScaleFilterCols = ScaleFilterCols_LSX;
1349
    }
1350
  }
1351
#endif
1352
5.58k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1353
0
    ScaleFilterCols = ScaleColsUp2_C;
1354
#if defined(HAS_SCALECOLS_SSE2)
1355
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1356
      ScaleFilterCols = ScaleColsUp2_SSE2;
1357
    }
1358
#endif
1359
0
  }
1360
1361
5.58k
  if (y > max_y) {
1362
1.35k
    y = max_y;
1363
1.35k
  }
1364
5.58k
  {
1365
5.58k
    int yi = y >> 16;
1366
5.58k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1367
1368
    // Allocate 2 row buffers.
1369
5.58k
    const int row_size = (dst_width + 31) & ~31;
1370
5.58k
    align_buffer_64(row, row_size * 2);
1371
5.58k
    if (!row)
1372
0
      return 1;
1373
1374
5.58k
    uint8_t* rowptr = row;
1375
5.58k
    int rowstride = row_size;
1376
5.58k
    int lasty = yi;
1377
1378
5.58k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1379
5.58k
    if (src_height > 1) {
1380
4.23k
      src += src_stride;
1381
4.23k
    }
1382
5.58k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1383
5.58k
    if (src_height > 2) {
1384
3.93k
      src += src_stride;
1385
3.93k
    }
1386
1387
2.59M
    for (j = 0; j < dst_height; ++j) {
1388
2.59M
      yi = y >> 16;
1389
2.59M
      if (yi != lasty) {
1390
122k
        if (y > max_y) {
1391
0
          y = max_y;
1392
0
          yi = y >> 16;
1393
0
          src = src_ptr + yi * (int64_t)src_stride;
1394
0
        }
1395
122k
        if (yi != lasty) {
1396
122k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1397
122k
          rowptr += rowstride;
1398
122k
          rowstride = -rowstride;
1399
122k
          lasty = yi;
1400
122k
          if ((y + 65536) < max_y) {
1401
118k
            src += src_stride;
1402
118k
          }
1403
122k
        }
1404
122k
      }
1405
2.59M
      if (filtering == kFilterLinear) {
1406
533k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1407
2.05M
      } else {
1408
2.05M
        int yf = (y >> 8) & 255;
1409
2.05M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1410
2.05M
      }
1411
2.59M
      dst_ptr += dst_stride;
1412
2.59M
      y += dy;
1413
2.59M
    }
1414
5.58k
    free_aligned_buffer_64(row);
1415
5.58k
  }
1416
0
  return 0;
1417
5.58k
}
1418
1419
// Scale plane, horizontally up by 2 times.
1420
// Uses linear filter horizontally, nearest vertically.
1421
// This is an optimized version for scaling up a plane to 2 times of
1422
// its original width, using linear interpolation.
1423
// This is used to scale U and V planes of I422 to I444.
1424
static void ScalePlaneUp2_Linear(int src_width,
1425
                                 int src_height,
1426
                                 int dst_width,
1427
                                 int dst_height,
1428
                                 int src_stride,
1429
                                 int dst_stride,
1430
                                 const uint8_t* src_ptr,
1431
232
                                 uint8_t* dst_ptr) {
1432
232
  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
1433
232
      ScaleRowUp2_Linear_Any_C;
1434
232
  int i;
1435
232
  int y;
1436
232
  int dy;
1437
1438
232
  (void)src_width;
1439
  // This function can only scale up by 2 times horizontally.
1440
232
  assert(src_width == ((dst_width + 1) / 2));
1441
1442
232
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
1443
232
  if (TestCpuFlag(kCpuHasSSE2)) {
1444
232
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
1445
232
  }
1446
232
#endif
1447
1448
232
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
1449
232
  if (TestCpuFlag(kCpuHasSSSE3)) {
1450
232
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
1451
232
  }
1452
232
#endif
1453
1454
232
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
1455
232
  if (TestCpuFlag(kCpuHasAVX2)) {
1456
232
    ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
1457
232
  }
1458
232
#endif
1459
1460
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
1461
  if (TestCpuFlag(kCpuHasNEON)) {
1462
    ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
1463
  }
1464
#endif
1465
#ifdef HAS_SCALEROWUP2_LINEAR_RVV
1466
  if (TestCpuFlag(kCpuHasRVV)) {
1467
    ScaleRowUp = ScaleRowUp2_Linear_RVV;
1468
  }
1469
#endif
1470
1471
232
  if (dst_height == 1) {
1472
28
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1473
28
               dst_width);
1474
204
  } else {
1475
204
    dy = FixedDiv(src_height - 1, dst_height - 1);
1476
204
    y = (1 << 15) - 1;
1477
173k
    for (i = 0; i < dst_height; ++i) {
1478
173k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1479
173k
      dst_ptr += dst_stride;
1480
173k
      y += dy;
1481
173k
    }
1482
204
  }
1483
232
}
1484
1485
// Scale plane, up by 2 times.
1486
// This is an optimized version for scaling up a plane to 2 times of
1487
// its original size, using bilinear interpolation.
1488
// This is used to scale U and V planes of I420 to I444.
1489
static void ScalePlaneUp2_Bilinear(int src_width,
1490
                                   int src_height,
1491
                                   int dst_width,
1492
                                   int dst_height,
1493
                                   int src_stride,
1494
                                   int dst_stride,
1495
                                   const uint8_t* src_ptr,
1496
159
                                   uint8_t* dst_ptr) {
1497
159
  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1498
159
                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1499
159
      ScaleRowUp2_Bilinear_Any_C;
1500
159
  int x;
1501
1502
159
  (void)src_width;
1503
  // This function can only scale up by 2 times.
1504
159
  assert(src_width == ((dst_width + 1) / 2));
1505
159
  assert(src_height == ((dst_height + 1) / 2));
1506
1507
159
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
1508
159
  if (TestCpuFlag(kCpuHasSSE2)) {
1509
159
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
1510
159
  }
1511
159
#endif
1512
1513
159
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
1514
159
  if (TestCpuFlag(kCpuHasSSSE3)) {
1515
159
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
1516
159
  }
1517
159
#endif
1518
1519
159
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
1520
159
  if (TestCpuFlag(kCpuHasAVX2)) {
1521
159
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
1522
159
  }
1523
159
#endif
1524
1525
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
1526
  if (TestCpuFlag(kCpuHasNEON)) {
1527
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
1528
  }
1529
#endif
1530
#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
1531
  if (TestCpuFlag(kCpuHasRVV)) {
1532
    Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
1533
  }
1534
#endif
1535
1536
159
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1537
159
  dst_ptr += dst_stride;
1538
4.14k
  for (x = 0; x < src_height - 1; ++x) {
1539
3.98k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1540
3.98k
    src_ptr += src_stride;
1541
    // TODO(fbarchard): Test performance of writing one row of destination at a
1542
    // time.
1543
3.98k
    dst_ptr += 2 * dst_stride;
1544
3.98k
  }
1545
159
  if (!(dst_height & 1)) {
1546
103
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1547
103
  }
1548
159
}
1549
1550
// Scale at most 14 bit plane, horizontally up by 2 times.
1551
// This is an optimized version for scaling up a plane to 2 times of
1552
// its original width, using linear interpolation.
1553
// stride is in count of uint16_t.
1554
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
1555
static void ScalePlaneUp2_12_Linear(int src_width,
1556
                                    int src_height,
1557
                                    int dst_width,
1558
                                    int dst_height,
1559
                                    int src_stride,
1560
                                    int dst_stride,
1561
                                    const uint16_t* src_ptr,
1562
228
                                    uint16_t* dst_ptr) {
1563
228
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1564
228
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1565
228
  int i;
1566
228
  int y;
1567
228
  int dy;
1568
1569
228
  (void)src_width;
1570
  // This function can only scale up by 2 times horizontally.
1571
228
  assert(src_width == ((dst_width + 1) / 2));
1572
1573
228
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
1574
228
  if (TestCpuFlag(kCpuHasSSSE3)) {
1575
228
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
1576
228
  }
1577
228
#endif
1578
1579
228
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
1580
228
  if (TestCpuFlag(kCpuHasAVX2)) {
1581
228
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
1582
228
  }
1583
228
#endif
1584
1585
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
1586
  if (TestCpuFlag(kCpuHasNEON)) {
1587
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
1588
  }
1589
#endif
1590
1591
228
  if (dst_height == 1) {
1592
16
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1593
16
               dst_width);
1594
212
  } else {
1595
212
    dy = FixedDiv(src_height - 1, dst_height - 1);
1596
212
    y = (1 << 15) - 1;
1597
401k
    for (i = 0; i < dst_height; ++i) {
1598
401k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1599
401k
      dst_ptr += dst_stride;
1600
401k
      y += dy;
1601
401k
    }
1602
212
  }
1603
228
}
1604
1605
// Scale at most 12 bit plane, up by 2 times.
1606
// This is an optimized version for scaling up a plane to 2 times of
1607
// its original size, using bilinear interpolation.
1608
// stride is in count of uint16_t.
1609
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
1610
static void ScalePlaneUp2_12_Bilinear(int src_width,
1611
                                      int src_height,
1612
                                      int dst_width,
1613
                                      int dst_height,
1614
                                      int src_stride,
1615
                                      int dst_stride,
1616
                                      const uint16_t* src_ptr,
1617
173
                                      uint16_t* dst_ptr) {
1618
173
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1619
173
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1620
173
      ScaleRowUp2_Bilinear_16_Any_C;
1621
173
  int x;
1622
1623
173
  (void)src_width;
1624
  // This function can only scale up by 2 times.
1625
173
  assert(src_width == ((dst_width + 1) / 2));
1626
173
  assert(src_height == ((dst_height + 1) / 2));
1627
1628
173
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
1629
173
  if (TestCpuFlag(kCpuHasSSSE3)) {
1630
173
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
1631
173
  }
1632
173
#endif
1633
1634
173
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
1635
173
  if (TestCpuFlag(kCpuHasAVX2)) {
1636
173
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
1637
173
  }
1638
173
#endif
1639
1640
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
1641
  if (TestCpuFlag(kCpuHasNEON)) {
1642
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
1643
  }
1644
#endif
1645
1646
173
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1647
173
  dst_ptr += dst_stride;
1648
7.54k
  for (x = 0; x < src_height - 1; ++x) {
1649
7.37k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1650
7.37k
    src_ptr += src_stride;
1651
7.37k
    dst_ptr += 2 * dst_stride;
1652
7.37k
  }
1653
173
  if (!(dst_height & 1)) {
1654
117
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1655
117
  }
1656
173
}
1657
1658
static void ScalePlaneUp2_16_Linear(int src_width,
1659
                                    int src_height,
1660
                                    int dst_width,
1661
                                    int dst_height,
1662
                                    int src_stride,
1663
                                    int dst_stride,
1664
                                    const uint16_t* src_ptr,
1665
0
                                    uint16_t* dst_ptr) {
1666
0
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1667
0
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1668
0
  int i;
1669
0
  int y;
1670
0
  int dy;
1671
1672
0
  (void)src_width;
1673
  // This function can only scale up by 2 times horizontally.
1674
0
  assert(src_width == ((dst_width + 1) / 2));
1675
1676
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
1677
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1678
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
1679
0
  }
1680
0
#endif
1681
1682
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
1683
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1684
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
1685
0
  }
1686
0
#endif
1687
1688
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
1689
  if (TestCpuFlag(kCpuHasNEON)) {
1690
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
1691
  }
1692
#endif
1693
1694
0
  if (dst_height == 1) {
1695
0
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1696
0
               dst_width);
1697
0
  } else {
1698
0
    dy = FixedDiv(src_height - 1, dst_height - 1);
1699
0
    y = (1 << 15) - 1;
1700
0
    for (i = 0; i < dst_height; ++i) {
1701
0
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1702
0
      dst_ptr += dst_stride;
1703
0
      y += dy;
1704
0
    }
1705
0
  }
1706
0
}
1707
1708
static void ScalePlaneUp2_16_Bilinear(int src_width,
1709
                                      int src_height,
1710
                                      int dst_width,
1711
                                      int dst_height,
1712
                                      int src_stride,
1713
                                      int dst_stride,
1714
                                      const uint16_t* src_ptr,
1715
0
                                      uint16_t* dst_ptr) {
1716
0
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1717
0
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1718
0
      ScaleRowUp2_Bilinear_16_Any_C;
1719
0
  int x;
1720
1721
0
  (void)src_width;
1722
  // This function can only scale up by 2 times.
1723
0
  assert(src_width == ((dst_width + 1) / 2));
1724
0
  assert(src_height == ((dst_height + 1) / 2));
1725
1726
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
1727
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1728
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
1729
0
  }
1730
0
#endif
1731
1732
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
1733
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1734
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
1735
0
  }
1736
0
#endif
1737
1738
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
1739
  if (TestCpuFlag(kCpuHasNEON)) {
1740
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
1741
  }
1742
#endif
1743
1744
0
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1745
0
  dst_ptr += dst_stride;
1746
0
  for (x = 0; x < src_height - 1; ++x) {
1747
0
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1748
0
    src_ptr += src_stride;
1749
0
    dst_ptr += 2 * dst_stride;
1750
0
  }
1751
0
  if (!(dst_height & 1)) {
1752
0
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1753
0
  }
1754
0
}
1755
1756
static int ScalePlaneBilinearUp_16(int src_width,
1757
                                   int src_height,
1758
                                   int dst_width,
1759
                                   int dst_height,
1760
                                   int src_stride,
1761
                                   int dst_stride,
1762
                                   const uint16_t* src_ptr,
1763
                                   uint16_t* dst_ptr,
1764
5.81k
                                   enum FilterMode filtering) {
1765
5.81k
  int j;
1766
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1767
5.81k
  int x = 0;
1768
5.81k
  int y = 0;
1769
5.81k
  int dx = 0;
1770
5.81k
  int dy = 0;
1771
5.81k
  const int max_y = (src_height - 1) << 16;
1772
5.81k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1773
5.81k
                         ptrdiff_t src_stride, int dst_width,
1774
5.81k
                         int source_y_fraction) = InterpolateRow_16_C;
1775
5.81k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1776
5.81k
                          int dst_width, int x, int dx) =
1777
5.81k
      filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1778
5.81k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1779
5.81k
             &dx, &dy);
1780
5.81k
  src_width = Abs(src_width);
1781
1782
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1783
  if (TestCpuFlag(kCpuHasSSE2)) {
1784
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1785
    if (IS_ALIGNED(dst_width, 16)) {
1786
      InterpolateRow = InterpolateRow_16_SSE2;
1787
    }
1788
  }
1789
#endif
1790
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1791
  if (TestCpuFlag(kCpuHasSSSE3)) {
1792
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1793
    if (IS_ALIGNED(dst_width, 16)) {
1794
      InterpolateRow = InterpolateRow_16_SSSE3;
1795
    }
1796
  }
1797
#endif
1798
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1799
  if (TestCpuFlag(kCpuHasAVX2)) {
1800
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1801
    if (IS_ALIGNED(dst_width, 32)) {
1802
      InterpolateRow = InterpolateRow_16_AVX2;
1803
    }
1804
  }
1805
#endif
1806
#if defined(HAS_INTERPOLATEROW_16_NEON)
1807
  if (TestCpuFlag(kCpuHasNEON)) {
1808
    InterpolateRow = InterpolateRow_16_Any_NEON;
1809
    if (IS_ALIGNED(dst_width, 16)) {
1810
      InterpolateRow = InterpolateRow_16_NEON;
1811
    }
1812
  }
1813
#endif
1814
#if defined(HAS_INTERPOLATEROW_16_SME)
1815
  if (TestCpuFlag(kCpuHasSME)) {
1816
    InterpolateRow = InterpolateRow_16_SME;
1817
  }
1818
#endif
1819
1820
5.81k
  if (filtering && src_width >= 32768) {
1821
0
    ScaleFilterCols = ScaleFilterCols64_16_C;
1822
0
  }
1823
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1824
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1825
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1826
  }
1827
#endif
1828
5.81k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1829
0
    ScaleFilterCols = ScaleColsUp2_16_C;
1830
#if defined(HAS_SCALECOLS_16_SSE2)
1831
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1832
      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1833
    }
1834
#endif
1835
0
  }
1836
5.81k
  if (y > max_y) {
1837
1.04k
    y = max_y;
1838
1.04k
  }
1839
5.81k
  {
1840
5.81k
    int yi = y >> 16;
1841
5.81k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1842
1843
    // Allocate 2 row buffers.
1844
5.81k
    const int row_size = (dst_width + 31) & ~31;
1845
5.81k
    align_buffer_64(row, row_size * 4);
1846
5.81k
    int rowstride = row_size;
1847
5.81k
    int lasty = yi;
1848
5.81k
    uint16_t* rowptr = (uint16_t*)row;
1849
5.81k
    if (!row)
1850
0
      return 1;
1851
1852
5.81k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1853
5.81k
    if (src_height > 1) {
1854
4.77k
      src += src_stride;
1855
4.77k
    }
1856
5.81k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1857
5.81k
    if (src_height > 2) {
1858
3.80k
      src += src_stride;
1859
3.80k
    }
1860
1861
5.33M
    for (j = 0; j < dst_height; ++j) {
1862
5.32M
      yi = y >> 16;
1863
5.32M
      if (yi != lasty) {
1864
173k
        if (y > max_y) {
1865
0
          y = max_y;
1866
0
          yi = y >> 16;
1867
0
          src = src_ptr + yi * (int64_t)src_stride;
1868
0
        }
1869
173k
        if (yi != lasty) {
1870
173k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1871
173k
          rowptr += rowstride;
1872
173k
          rowstride = -rowstride;
1873
173k
          lasty = yi;
1874
173k
          if ((y + 65536) < max_y) {
1875
169k
            src += src_stride;
1876
169k
          }
1877
173k
        }
1878
173k
      }
1879
5.32M
      if (filtering == kFilterLinear) {
1880
360k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1881
4.96M
      } else {
1882
4.96M
        int yf = (y >> 8) & 255;
1883
4.96M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1884
4.96M
      }
1885
5.32M
      dst_ptr += dst_stride;
1886
5.32M
      y += dy;
1887
5.32M
    }
1888
5.81k
    free_aligned_buffer_64(row);
1889
5.81k
  }
1890
0
  return 0;
1891
5.81k
}
1892
1893
// Scale Plane to/from any dimensions, without interpolation.
1894
// Fixed point math is used for performance: The upper 16 bits
1895
// of x and dx is the integer part of the source position and
1896
// the lower 16 bits are the fixed decimal part.
1897
1898
static void ScalePlaneSimple(int src_width,
1899
                             int src_height,
1900
                             int dst_width,
1901
                             int dst_height,
1902
                             int src_stride,
1903
                             int dst_stride,
1904
                             const uint8_t* src_ptr,
1905
1.22k
                             uint8_t* dst_ptr) {
1906
1.22k
  int i;
1907
1.22k
  void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
1908
1.22k
                    int x, int dx) = ScaleCols_C;
1909
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1910
1.22k
  int x = 0;
1911
1.22k
  int y = 0;
1912
1.22k
  int dx = 0;
1913
1.22k
  int dy = 0;
1914
1.22k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1915
1.22k
             &dx, &dy);
1916
1.22k
  src_width = Abs(src_width);
1917
1918
1.22k
  if (src_width * 2 == dst_width && x < 0x8000) {
1919
95
    ScaleCols = ScaleColsUp2_C;
1920
#if defined(HAS_SCALECOLS_SSE2)
1921
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1922
      ScaleCols = ScaleColsUp2_SSE2;
1923
    }
1924
#endif
1925
95
  }
1926
1927
1.03M
  for (i = 0; i < dst_height; ++i) {
1928
1.03M
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
1929
1.03M
              dx);
1930
1.03M
    dst_ptr += dst_stride;
1931
1.03M
    y += dy;
1932
1.03M
  }
1933
1.22k
}
1934
1935
static void ScalePlaneSimple_16(int src_width,
1936
                                int src_height,
1937
                                int dst_width,
1938
                                int dst_height,
1939
                                int src_stride,
1940
                                int dst_stride,
1941
                                const uint16_t* src_ptr,
1942
1.11k
                                uint16_t* dst_ptr) {
1943
1.11k
  int i;
1944
1.11k
  void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
1945
1.11k
                    int x, int dx) = ScaleCols_16_C;
1946
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1947
1.11k
  int x = 0;
1948
1.11k
  int y = 0;
1949
1.11k
  int dx = 0;
1950
1.11k
  int dy = 0;
1951
1.11k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1952
1.11k
             &dx, &dy);
1953
1.11k
  src_width = Abs(src_width);
1954
1955
1.11k
  if (src_width * 2 == dst_width && x < 0x8000) {
1956
78
    ScaleCols = ScaleColsUp2_16_C;
1957
#if defined(HAS_SCALECOLS_16_SSE2)
1958
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1959
      ScaleCols = ScaleColsUp2_16_SSE2;
1960
    }
1961
#endif
1962
78
  }
1963
1964
1.11M
  for (i = 0; i < dst_height; ++i) {
1965
1.11M
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
1966
1.11M
              dx);
1967
1.11M
    dst_ptr += dst_stride;
1968
1.11M
    y += dy;
1969
1.11M
  }
1970
1.11k
}
1971
1972
// Scale a plane.
1973
// This function dispatches to a specialized scaler based on scale factor.
1974
LIBYUV_API
1975
int ScalePlane(const uint8_t* src,
1976
               int src_stride,
1977
               int src_width,
1978
               int src_height,
1979
               uint8_t* dst,
1980
               int dst_stride,
1981
               int dst_width,
1982
               int dst_height,
1983
15.2k
               enum FilterMode filtering) {
1984
  // Simplify filtering when possible.
1985
15.2k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1986
15.2k
                                filtering);
1987
1988
  // Negative height means invert the image.
1989
15.2k
  if (src_height < 0) {
1990
0
    src_height = -src_height;
1991
0
    src = src + (src_height - 1) * (int64_t)src_stride;
1992
0
    src_stride = -src_stride;
1993
0
  }
1994
  // Use specialized scales to improve performance for common resolutions.
1995
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
1996
15.2k
  if (dst_width == src_width && dst_height == src_height) {
1997
    // Straight copy.
1998
228
    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1999
228
    return 0;
2000
228
  }
2001
15.0k
  if (dst_width == src_width && filtering != kFilterBox) {
2002
2.48k
    int dy = 0;
2003
2.48k
    int y = 0;
2004
    // When scaling down, use the center 2 rows to filter.
2005
    // When scaling up, last row of destination uses the last 2 source rows.
2006
2.48k
    if (dst_height <= src_height) {
2007
892
      dy = FixedDiv(src_height, dst_height);
2008
892
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2009
1.58k
    } else if (src_height > 1 && dst_height > 1) {
2010
1.42k
      dy = FixedDiv1(src_height, dst_height);
2011
1.42k
    }
2012
    // Arbitrary scale vertically, but unscaled horizontally.
2013
2.48k
    ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
2014
2.48k
                       dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2015
2.48k
    return 0;
2016
2.48k
  }
2017
12.5k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2018
    // Scale down.
2019
2.66k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2020
      // optimized, 3/4
2021
2
      ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
2022
2
                       dst_stride, src, dst, filtering);
2023
2
      return 0;
2024
2
    }
2025
2.66k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2026
      // optimized, 1/2
2027
54
      ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
2028
54
                      dst_stride, src, dst, filtering);
2029
54
      return 0;
2030
54
    }
2031
    // 3/8 rounded up for odd sized chroma height.
2032
2.61k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2033
      // optimized, 3/8
2034
12
      ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
2035
12
                       dst_stride, src, dst, filtering);
2036
12
      return 0;
2037
12
    }
2038
2.59k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2039
38
        (filtering == kFilterBox || filtering == kFilterNone)) {
2040
      // optimized, 1/4
2041
38
      ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
2042
38
                      dst_stride, src, dst, filtering);
2043
38
      return 0;
2044
38
    }
2045
2.59k
  }
2046
12.4k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2047
1.22k
    return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
2048
1.22k
                         src_stride, dst_stride, src, dst);
2049
1.22k
  }
2050
11.2k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2051
232
    ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
2052
232
                         src_stride, dst_stride, src, dst);
2053
232
    return 0;
2054
232
  }
2055
11.0k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2056
197
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2057
159
    ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
2058
159
                           src_stride, dst_stride, src, dst);
2059
159
    return 0;
2060
159
  }
2061
10.8k
  if (filtering && dst_height > src_height) {
2062
5.58k
    return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
2063
5.58k
                                src_stride, dst_stride, src, dst, filtering);
2064
5.58k
  }
2065
5.27k
  if (filtering) {
2066
4.05k
    return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
2067
4.05k
                                  src_stride, dst_stride, src, dst, filtering);
2068
4.05k
  }
2069
1.22k
  ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
2070
1.22k
                   dst_stride, src, dst);
2071
1.22k
  return 0;
2072
5.27k
}
2073
2074
LIBYUV_API
2075
int ScalePlane_16(const uint16_t* src,
2076
                  int src_stride,
2077
                  int src_width,
2078
                  int src_height,
2079
                  uint16_t* dst,
2080
                  int dst_stride,
2081
                  int dst_width,
2082
                  int dst_height,
2083
16.2k
                  enum FilterMode filtering) {
2084
  // Simplify filtering when possible.
2085
16.2k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2086
16.2k
                                filtering);
2087
2088
  // Negative height means invert the image.
2089
16.2k
  if (src_height < 0) {
2090
0
    src_height = -src_height;
2091
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2092
0
    src_stride = -src_stride;
2093
0
  }
2094
  // Use specialized scales to improve performance for common resolutions.
2095
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
2096
16.2k
  if (dst_width == src_width && dst_height == src_height) {
2097
    // Straight copy.
2098
218
    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
2099
218
    return 0;
2100
218
  }
2101
16.0k
  if (dst_width == src_width && filtering != kFilterBox) {
2102
2.17k
    int dy = 0;
2103
2.17k
    int y = 0;
2104
    // When scaling down, use the center 2 rows to filter.
2105
    // When scaling up, last row of destination uses the last 2 source rows.
2106
2.17k
    if (dst_height <= src_height) {
2107
740
      dy = FixedDiv(src_height, dst_height);
2108
740
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2109
      // When scaling up, ensure the last row of destination uses the last
2110
      // source. Avoid divide by zero for dst_height but will do no scaling
2111
      // later.
2112
1.43k
    } else if (src_height > 1 && dst_height > 1) {
2113
1.33k
      dy = FixedDiv1(src_height, dst_height);
2114
1.33k
    }
2115
    // Arbitrary scale vertically, but unscaled horizontally.
2116
2.17k
    ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
2117
2.17k
                          dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2118
2.17k
    return 0;
2119
2.17k
  }
2120
13.8k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2121
    // Scale down.
2122
3.25k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2123
      // optimized, 3/4
2124
0
      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
2125
0
                          src_stride, dst_stride, src, dst, filtering);
2126
0
      return 0;
2127
0
    }
2128
3.25k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2129
      // optimized, 1/2
2130
66
      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
2131
66
                         src_stride, dst_stride, src, dst, filtering);
2132
66
      return 0;
2133
66
    }
2134
    // 3/8 rounded up for odd sized chroma height.
2135
3.19k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2136
      // optimized, 3/8
2137
21
      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
2138
21
                          src_stride, dst_stride, src, dst, filtering);
2139
21
      return 0;
2140
21
    }
2141
3.16k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2142
38
        (filtering == kFilterBox || filtering == kFilterNone)) {
2143
      // optimized, 1/4
2144
38
      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
2145
38
                         src_stride, dst_stride, src, dst, filtering);
2146
38
      return 0;
2147
38
    }
2148
3.16k
  }
2149
13.7k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2150
1.34k
    return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
2151
1.34k
                            src_stride, dst_stride, src, dst);
2152
1.34k
  }
2153
12.4k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2154
0
    ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
2155
0
                            src_stride, dst_stride, src, dst);
2156
0
    return 0;
2157
0
  }
2158
12.4k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2159
26
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2160
0
    ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
2161
0
                              src_stride, dst_stride, src, dst);
2162
0
    return 0;
2163
0
  }
2164
12.4k
  if (filtering && dst_height > src_height) {
2165
5.81k
    return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
2166
5.81k
                                   src_stride, dst_stride, src, dst, filtering);
2167
5.81k
  }
2168
6.60k
  if (filtering) {
2169
5.49k
    return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
2170
5.49k
                                     dst_height, src_stride, dst_stride, src,
2171
5.49k
                                     dst, filtering);
2172
5.49k
  }
2173
1.11k
  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
2174
1.11k
                      dst_stride, src, dst);
2175
1.11k
  return 0;
2176
6.60k
}
2177
2178
LIBYUV_API
2179
int ScalePlane_12(const uint16_t* src,
2180
                  int src_stride,
2181
                  int src_width,
2182
                  int src_height,
2183
                  uint16_t* dst,
2184
                  int dst_stride,
2185
                  int dst_width,
2186
                  int dst_height,
2187
16.6k
                  enum FilterMode filtering) {
2188
  // Simplify filtering when possible.
2189
16.6k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2190
16.6k
                                filtering);
2191
2192
  // Negative height means invert the image.
2193
16.6k
  if (src_height < 0) {
2194
0
    src_height = -src_height;
2195
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2196
0
    src_stride = -src_stride;
2197
0
  }
2198
2199
16.6k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2200
228
    ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
2201
228
                            src_stride, dst_stride, src, dst);
2202
228
    return 0;
2203
228
  }
2204
16.4k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2205
219
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2206
173
    ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
2207
173
                              src_stride, dst_stride, src, dst);
2208
173
    return 0;
2209
173
  }
2210
2211
16.2k
  return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
2212
16.2k
                       dst_width, dst_height, filtering);
2213
16.4k
}
2214
2215
// Scale an I420 image.
2216
// This function in turn calls a scaling function for each plane.
2217
2218
LIBYUV_API
2219
int I420Scale(const uint8_t* src_y,
2220
              int src_stride_y,
2221
              const uint8_t* src_u,
2222
              int src_stride_u,
2223
              const uint8_t* src_v,
2224
              int src_stride_v,
2225
              int src_width,
2226
              int src_height,
2227
              uint8_t* dst_y,
2228
              int dst_stride_y,
2229
              uint8_t* dst_u,
2230
              int dst_stride_u,
2231
              uint8_t* dst_v,
2232
              int dst_stride_v,
2233
              int dst_width,
2234
              int dst_height,
2235
0
              enum FilterMode filtering) {
2236
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2237
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2238
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2239
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2240
0
  int r;
2241
2242
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2243
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2244
0
      dst_width <= 0 || dst_height <= 0) {
2245
0
    return -1;
2246
0
  }
2247
2248
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2249
0
                 dst_stride_y, dst_width, dst_height, filtering);
2250
0
  if (r != 0) {
2251
0
    return r;
2252
0
  }
2253
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2254
0
                 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2255
0
  if (r != 0) {
2256
0
    return r;
2257
0
  }
2258
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2259
0
                 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2260
0
  return r;
2261
0
}
2262
2263
LIBYUV_API
2264
int I420Scale_16(const uint16_t* src_y,
2265
                 int src_stride_y,
2266
                 const uint16_t* src_u,
2267
                 int src_stride_u,
2268
                 const uint16_t* src_v,
2269
                 int src_stride_v,
2270
                 int src_width,
2271
                 int src_height,
2272
                 uint16_t* dst_y,
2273
                 int dst_stride_y,
2274
                 uint16_t* dst_u,
2275
                 int dst_stride_u,
2276
                 uint16_t* dst_v,
2277
                 int dst_stride_v,
2278
                 int dst_width,
2279
                 int dst_height,
2280
0
                 enum FilterMode filtering) {
2281
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2282
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2283
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2284
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2285
0
  int r;
2286
2287
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2288
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2289
0
      dst_width <= 0 || dst_height <= 0) {
2290
0
    return -1;
2291
0
  }
2292
2293
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2294
0
                    dst_stride_y, dst_width, dst_height, filtering);
2295
0
  if (r != 0) {
2296
0
    return r;
2297
0
  }
2298
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2299
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2300
0
  if (r != 0) {
2301
0
    return r;
2302
0
  }
2303
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2304
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2305
0
  return r;
2306
0
}
2307
2308
LIBYUV_API
2309
int I420Scale_12(const uint16_t* src_y,
2310
                 int src_stride_y,
2311
                 const uint16_t* src_u,
2312
                 int src_stride_u,
2313
                 const uint16_t* src_v,
2314
                 int src_stride_v,
2315
                 int src_width,
2316
                 int src_height,
2317
                 uint16_t* dst_y,
2318
                 int dst_stride_y,
2319
                 uint16_t* dst_u,
2320
                 int dst_stride_u,
2321
                 uint16_t* dst_v,
2322
                 int dst_stride_v,
2323
                 int dst_width,
2324
                 int dst_height,
2325
0
                 enum FilterMode filtering) {
2326
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2327
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2328
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2329
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2330
0
  int r;
2331
2332
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2333
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2334
0
      dst_width <= 0 || dst_height <= 0) {
2335
0
    return -1;
2336
0
  }
2337
2338
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2339
0
                    dst_stride_y, dst_width, dst_height, filtering);
2340
0
  if (r != 0) {
2341
0
    return r;
2342
0
  }
2343
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2344
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2345
0
  if (r != 0) {
2346
0
    return r;
2347
0
  }
2348
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2349
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2350
0
  return r;
2351
0
}
2352
2353
// Scale an I444 image.
2354
// This function in turn calls a scaling function for each plane.
2355
2356
LIBYUV_API
2357
int I444Scale(const uint8_t* src_y,
2358
              int src_stride_y,
2359
              const uint8_t* src_u,
2360
              int src_stride_u,
2361
              const uint8_t* src_v,
2362
              int src_stride_v,
2363
              int src_width,
2364
              int src_height,
2365
              uint8_t* dst_y,
2366
              int dst_stride_y,
2367
              uint8_t* dst_u,
2368
              int dst_stride_u,
2369
              uint8_t* dst_v,
2370
              int dst_stride_v,
2371
              int dst_width,
2372
              int dst_height,
2373
0
              enum FilterMode filtering) {
2374
0
  int r;
2375
2376
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2377
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2378
0
      dst_width <= 0 || dst_height <= 0) {
2379
0
    return -1;
2380
0
  }
2381
2382
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2383
0
                 dst_stride_y, dst_width, dst_height, filtering);
2384
0
  if (r != 0) {
2385
0
    return r;
2386
0
  }
2387
0
  r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
2388
0
                 dst_stride_u, dst_width, dst_height, filtering);
2389
0
  if (r != 0) {
2390
0
    return r;
2391
0
  }
2392
0
  r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
2393
0
                 dst_stride_v, dst_width, dst_height, filtering);
2394
0
  return r;
2395
0
}
2396
2397
LIBYUV_API
2398
int I444Scale_16(const uint16_t* src_y,
2399
                 int src_stride_y,
2400
                 const uint16_t* src_u,
2401
                 int src_stride_u,
2402
                 const uint16_t* src_v,
2403
                 int src_stride_v,
2404
                 int src_width,
2405
                 int src_height,
2406
                 uint16_t* dst_y,
2407
                 int dst_stride_y,
2408
                 uint16_t* dst_u,
2409
                 int dst_stride_u,
2410
                 uint16_t* dst_v,
2411
                 int dst_stride_v,
2412
                 int dst_width,
2413
                 int dst_height,
2414
0
                 enum FilterMode filtering) {
2415
0
  int r;
2416
2417
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2418
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2419
0
      dst_width <= 0 || dst_height <= 0) {
2420
0
    return -1;
2421
0
  }
2422
2423
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2424
0
                    dst_stride_y, dst_width, dst_height, filtering);
2425
0
  if (r != 0) {
2426
0
    return r;
2427
0
  }
2428
0
  r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
2429
0
                    dst_stride_u, dst_width, dst_height, filtering);
2430
0
  if (r != 0) {
2431
0
    return r;
2432
0
  }
2433
0
  r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
2434
0
                    dst_stride_v, dst_width, dst_height, filtering);
2435
0
  return r;
2436
0
}
2437
2438
LIBYUV_API
2439
int I444Scale_12(const uint16_t* src_y,
2440
                 int src_stride_y,
2441
                 const uint16_t* src_u,
2442
                 int src_stride_u,
2443
                 const uint16_t* src_v,
2444
                 int src_stride_v,
2445
                 int src_width,
2446
                 int src_height,
2447
                 uint16_t* dst_y,
2448
                 int dst_stride_y,
2449
                 uint16_t* dst_u,
2450
                 int dst_stride_u,
2451
                 uint16_t* dst_v,
2452
                 int dst_stride_v,
2453
                 int dst_width,
2454
                 int dst_height,
2455
0
                 enum FilterMode filtering) {
2456
0
  int r;
2457
2458
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2459
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2460
0
      dst_width <= 0 || dst_height <= 0) {
2461
0
    return -1;
2462
0
  }
2463
2464
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2465
0
                    dst_stride_y, dst_width, dst_height, filtering);
2466
0
  if (r != 0) {
2467
0
    return r;
2468
0
  }
2469
0
  r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
2470
0
                    dst_stride_u, dst_width, dst_height, filtering);
2471
0
  if (r != 0) {
2472
0
    return r;
2473
0
  }
2474
0
  r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
2475
0
                    dst_stride_v, dst_width, dst_height, filtering);
2476
0
  return r;
2477
0
}
2478
2479
// Scale an I422 image.
2480
// This function in turn calls a scaling function for each plane.
2481
2482
LIBYUV_API
2483
int I422Scale(const uint8_t* src_y,
2484
              int src_stride_y,
2485
              const uint8_t* src_u,
2486
              int src_stride_u,
2487
              const uint8_t* src_v,
2488
              int src_stride_v,
2489
              int src_width,
2490
              int src_height,
2491
              uint8_t* dst_y,
2492
              int dst_stride_y,
2493
              uint8_t* dst_u,
2494
              int dst_stride_u,
2495
              uint8_t* dst_v,
2496
              int dst_stride_v,
2497
              int dst_width,
2498
              int dst_height,
2499
0
              enum FilterMode filtering) {
2500
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2501
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2502
0
  int r;
2503
2504
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2505
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2506
0
      dst_width <= 0 || dst_height <= 0) {
2507
0
    return -1;
2508
0
  }
2509
2510
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2511
0
                 dst_stride_y, dst_width, dst_height, filtering);
2512
0
  if (r != 0) {
2513
0
    return r;
2514
0
  }
2515
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2516
0
                 dst_stride_u, dst_halfwidth, dst_height, filtering);
2517
0
  if (r != 0) {
2518
0
    return r;
2519
0
  }
2520
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2521
0
                 dst_stride_v, dst_halfwidth, dst_height, filtering);
2522
0
  return r;
2523
0
}
2524
2525
LIBYUV_API
2526
int I422Scale_16(const uint16_t* src_y,
2527
                 int src_stride_y,
2528
                 const uint16_t* src_u,
2529
                 int src_stride_u,
2530
                 const uint16_t* src_v,
2531
                 int src_stride_v,
2532
                 int src_width,
2533
                 int src_height,
2534
                 uint16_t* dst_y,
2535
                 int dst_stride_y,
2536
                 uint16_t* dst_u,
2537
                 int dst_stride_u,
2538
                 uint16_t* dst_v,
2539
                 int dst_stride_v,
2540
                 int dst_width,
2541
                 int dst_height,
2542
0
                 enum FilterMode filtering) {
2543
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2544
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2545
0
  int r;
2546
2547
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2548
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2549
0
      dst_width <= 0 || dst_height <= 0) {
2550
0
    return -1;
2551
0
  }
2552
2553
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2554
0
                    dst_stride_y, dst_width, dst_height, filtering);
2555
0
  if (r != 0) {
2556
0
    return r;
2557
0
  }
2558
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2559
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2560
0
  if (r != 0) {
2561
0
    return r;
2562
0
  }
2563
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2564
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2565
0
  return r;
2566
0
}
2567
2568
LIBYUV_API
2569
int I422Scale_12(const uint16_t* src_y,
2570
                 int src_stride_y,
2571
                 const uint16_t* src_u,
2572
                 int src_stride_u,
2573
                 const uint16_t* src_v,
2574
                 int src_stride_v,
2575
                 int src_width,
2576
                 int src_height,
2577
                 uint16_t* dst_y,
2578
                 int dst_stride_y,
2579
                 uint16_t* dst_u,
2580
                 int dst_stride_u,
2581
                 uint16_t* dst_v,
2582
                 int dst_stride_v,
2583
                 int dst_width,
2584
                 int dst_height,
2585
0
                 enum FilterMode filtering) {
2586
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2587
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2588
0
  int r;
2589
2590
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2591
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2592
0
      dst_width <= 0 || dst_height <= 0) {
2593
0
    return -1;
2594
0
  }
2595
2596
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2597
0
                    dst_stride_y, dst_width, dst_height, filtering);
2598
0
  if (r != 0) {
2599
0
    return r;
2600
0
  }
2601
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2602
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2603
0
  if (r != 0) {
2604
0
    return r;
2605
0
  }
2606
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2607
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2608
0
  return r;
2609
0
}
2610
2611
// Scale an NV12 image.
2612
// This function in turn calls a scaling function for each plane.
2613
2614
LIBYUV_API
2615
int NV12Scale(const uint8_t* src_y,
2616
              int src_stride_y,
2617
              const uint8_t* src_uv,
2618
              int src_stride_uv,
2619
              int src_width,
2620
              int src_height,
2621
              uint8_t* dst_y,
2622
              int dst_stride_y,
2623
              uint8_t* dst_uv,
2624
              int dst_stride_uv,
2625
              int dst_width,
2626
              int dst_height,
2627
0
              enum FilterMode filtering) {
2628
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2629
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2630
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2631
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2632
0
  int r;
2633
2634
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2635
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2636
0
      dst_width <= 0 || dst_height <= 0) {
2637
0
    return -1;
2638
0
  }
2639
2640
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2641
0
                 dst_stride_y, dst_width, dst_height, filtering);
2642
0
  if (r != 0) {
2643
0
    return r;
2644
0
  }
2645
0
  r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
2646
0
              dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
2647
0
  return r;
2648
0
}
2649
2650
LIBYUV_API
2651
int NV24Scale(const uint8_t* src_y,
2652
              int src_stride_y,
2653
              const uint8_t* src_uv,
2654
              int src_stride_uv,
2655
              int src_width,
2656
              int src_height,
2657
              uint8_t* dst_y,
2658
              int dst_stride_y,
2659
              uint8_t* dst_uv,
2660
              int dst_stride_uv,
2661
              int dst_width,
2662
              int dst_height,
2663
0
              enum FilterMode filtering) {
2664
0
  int r;
2665
2666
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2667
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2668
0
      dst_width <= 0 || dst_height <= 0) {
2669
0
    return -1;
2670
0
  }
2671
2672
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2673
0
                 dst_stride_y, dst_width, dst_height, filtering);
2674
0
  if (r != 0) {
2675
0
    return r;
2676
0
  }
2677
0
  r = UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv,
2678
0
              dst_stride_uv, dst_width, dst_height, filtering);
2679
0
  return r;
2680
0
}
2681
2682
// Deprecated api
2683
LIBYUV_API
2684
int Scale(const uint8_t* src_y,
2685
          const uint8_t* src_u,
2686
          const uint8_t* src_v,
2687
          int src_stride_y,
2688
          int src_stride_u,
2689
          int src_stride_v,
2690
          int src_width,
2691
          int src_height,
2692
          uint8_t* dst_y,
2693
          uint8_t* dst_u,
2694
          uint8_t* dst_v,
2695
          int dst_stride_y,
2696
          int dst_stride_u,
2697
          int dst_stride_v,
2698
          int dst_width,
2699
          int dst_height,
2700
0
          LIBYUV_BOOL interpolate) {
2701
0
  return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
2702
0
                   src_stride_v, src_width, src_height, dst_y, dst_stride_y,
2703
0
                   dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
2704
0
                   dst_height, interpolate ? kFilterBox : kFilterNone);
2705
0
}
2706
2707
#ifdef __cplusplus
2708
}  // extern "C"
2709
}  // namespace libyuv
2710
#endif