Coverage Report

Created: 2025-07-12 06:45

/src/libavif/ext/libyuv/source/scale.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyPlane
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
#include "libyuv/scale_uv.h"  // For UVScale
21
22
#ifdef __cplusplus
23
namespace libyuv {
24
extern "C" {
25
#endif
26
27
47.0k
static __inline int Abs(int v) {
28
47.0k
  return v >= 0 ? v : -v;
29
47.0k
}
30
31
0
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
32
1.25k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
33
34
// Scale plane, 1/2
35
// This is an optimized version for scaling down a plane to 1/2 of
36
// its original size.
37
38
static void ScalePlaneDown2(int src_width,
39
                            int src_height,
40
                            int dst_width,
41
                            int dst_height,
42
                            int src_stride,
43
                            int dst_stride,
44
                            const uint8_t* src_ptr,
45
                            uint8_t* dst_ptr,
46
65
                            enum FilterMode filtering) {
47
65
  int y;
48
65
  void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
49
65
                        uint8_t* dst_ptr, int dst_width) =
50
65
      filtering == kFilterNone
51
65
          ? ScaleRowDown2_C
52
65
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
53
65
                                        : ScaleRowDown2Box_C);
54
65
  int row_stride = src_stride * 2;
55
65
  (void)src_width;
56
65
  (void)src_height;
57
65
  if (!filtering) {
58
0
    src_ptr += src_stride;  // Point to odd rows.
59
0
    src_stride = 0;
60
0
  }
61
62
#if defined(HAS_SCALEROWDOWN2_NEON)
63
  if (TestCpuFlag(kCpuHasNEON)) {
64
    ScaleRowDown2 =
65
        filtering == kFilterNone
66
            ? ScaleRowDown2_Any_NEON
67
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
68
                                          : ScaleRowDown2Box_Any_NEON);
69
    if (IS_ALIGNED(dst_width, 16)) {
70
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
71
                                               : (filtering == kFilterLinear
72
                                                      ? ScaleRowDown2Linear_NEON
73
                                                      : ScaleRowDown2Box_NEON);
74
    }
75
  }
76
#endif
77
#if defined(HAS_SCALEROWDOWN2_SME)
78
  if (TestCpuFlag(kCpuHasSME)) {
79
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_SME
80
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_SME
81
                                                 : ScaleRowDown2Box_SME;
82
  }
83
#endif
84
65
#if defined(HAS_SCALEROWDOWN2_SSSE3)
85
65
  if (TestCpuFlag(kCpuHasSSSE3)) {
86
65
    ScaleRowDown2 =
87
65
        filtering == kFilterNone
88
65
            ? ScaleRowDown2_Any_SSSE3
89
65
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
90
65
                                          : ScaleRowDown2Box_Any_SSSE3);
91
65
    if (IS_ALIGNED(dst_width, 16)) {
92
0
      ScaleRowDown2 =
93
0
          filtering == kFilterNone
94
0
              ? ScaleRowDown2_SSSE3
95
0
              : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
96
0
                                            : ScaleRowDown2Box_SSSE3);
97
0
    }
98
65
  }
99
65
#endif
100
65
#if defined(HAS_SCALEROWDOWN2_AVX2)
101
65
  if (TestCpuFlag(kCpuHasAVX2)) {
102
65
    ScaleRowDown2 =
103
65
        filtering == kFilterNone
104
65
            ? ScaleRowDown2_Any_AVX2
105
65
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
106
65
                                          : ScaleRowDown2Box_Any_AVX2);
107
65
    if (IS_ALIGNED(dst_width, 32)) {
108
0
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
109
0
                                               : (filtering == kFilterLinear
110
0
                                                      ? ScaleRowDown2Linear_AVX2
111
0
                                                      : ScaleRowDown2Box_AVX2);
112
0
    }
113
65
  }
114
65
#endif
115
#if defined(HAS_SCALEROWDOWN2_MSA)
116
  if (TestCpuFlag(kCpuHasMSA)) {
117
    ScaleRowDown2 =
118
        filtering == kFilterNone
119
            ? ScaleRowDown2_Any_MSA
120
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
121
                                          : ScaleRowDown2Box_Any_MSA);
122
    if (IS_ALIGNED(dst_width, 32)) {
123
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
124
                                               : (filtering == kFilterLinear
125
                                                      ? ScaleRowDown2Linear_MSA
126
                                                      : ScaleRowDown2Box_MSA);
127
    }
128
  }
129
#endif
130
#if defined(HAS_SCALEROWDOWN2_LSX)
131
  if (TestCpuFlag(kCpuHasLSX)) {
132
    ScaleRowDown2 =
133
        filtering == kFilterNone
134
            ? ScaleRowDown2_Any_LSX
135
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX
136
                                          : ScaleRowDown2Box_Any_LSX);
137
    if (IS_ALIGNED(dst_width, 32)) {
138
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX
139
                                               : (filtering == kFilterLinear
140
                                                      ? ScaleRowDown2Linear_LSX
141
                                                      : ScaleRowDown2Box_LSX);
142
    }
143
  }
144
#endif
145
#if defined(HAS_SCALEROWDOWN2_RVV)
146
  if (TestCpuFlag(kCpuHasRVV)) {
147
    ScaleRowDown2 = filtering == kFilterNone
148
                        ? ScaleRowDown2_RVV
149
                        : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
150
                                                      : ScaleRowDown2Box_RVV);
151
  }
152
#endif
153
154
65
  if (filtering == kFilterLinear) {
155
0
    src_stride = 0;
156
0
  }
157
  // TODO(fbarchard): Loop through source height to allow odd height.
158
1.70k
  for (y = 0; y < dst_height; ++y) {
159
1.64k
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
160
1.64k
    src_ptr += row_stride;
161
1.64k
    dst_ptr += dst_stride;
162
1.64k
  }
163
65
}
164
165
static void ScalePlaneDown2_16(int src_width,
166
                               int src_height,
167
                               int dst_width,
168
                               int dst_height,
169
                               int src_stride,
170
                               int dst_stride,
171
                               const uint16_t* src_ptr,
172
                               uint16_t* dst_ptr,
173
74
                               enum FilterMode filtering) {
174
74
  int y;
175
74
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
176
74
                        uint16_t* dst_ptr, int dst_width) =
177
74
      filtering == kFilterNone
178
74
          ? ScaleRowDown2_16_C
179
74
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
180
74
                                        : ScaleRowDown2Box_16_C);
181
74
  int row_stride = src_stride * 2;
182
74
  (void)src_width;
183
74
  (void)src_height;
184
74
  if (!filtering) {
185
0
    src_ptr += src_stride;  // Point to odd rows.
186
0
    src_stride = 0;
187
0
  }
188
189
#if defined(HAS_SCALEROWDOWN2_16_NEON)
190
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
191
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_NEON
192
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_NEON
193
                                                 : ScaleRowDown2Box_16_NEON;
194
  }
195
#endif
196
#if defined(HAS_SCALEROWDOWN2_16_SME)
197
  if (TestCpuFlag(kCpuHasSME)) {
198
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_SME
199
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_SME
200
                                                 : ScaleRowDown2Box_16_SME;
201
  }
202
#endif
203
#if defined(HAS_SCALEROWDOWN2_16_SSE2)
204
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
205
    ScaleRowDown2 =
206
        filtering == kFilterNone
207
            ? ScaleRowDown2_16_SSE2
208
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
209
                                          : ScaleRowDown2Box_16_SSE2);
210
  }
211
#endif
212
213
74
  if (filtering == kFilterLinear) {
214
0
    src_stride = 0;
215
0
  }
216
  // TODO(fbarchard): Loop through source height to allow odd height.
217
2.76k
  for (y = 0; y < dst_height; ++y) {
218
2.68k
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
219
2.68k
    src_ptr += row_stride;
220
2.68k
    dst_ptr += dst_stride;
221
2.68k
  }
222
74
}
223
224
void ScalePlaneDown2_16To8(int src_width,
225
                           int src_height,
226
                           int dst_width,
227
                           int dst_height,
228
                           int src_stride,
229
                           int dst_stride,
230
                           const uint16_t* src_ptr,
231
                           uint8_t* dst_ptr,
232
                           int scale,
233
0
                           enum FilterMode filtering) {
234
0
  int y;
235
0
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
236
0
                        uint8_t* dst_ptr, int dst_width, int scale) =
237
0
      (src_width & 1)
238
0
          ? (filtering == kFilterNone
239
0
                 ? ScaleRowDown2_16To8_Odd_C
240
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
241
0
                                               : ScaleRowDown2Box_16To8_Odd_C))
242
0
          : (filtering == kFilterNone
243
0
                 ? ScaleRowDown2_16To8_C
244
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
245
0
                                               : ScaleRowDown2Box_16To8_C));
246
0
  int row_stride = src_stride * 2;
247
0
  (void)dst_height;
248
0
  if (!filtering) {
249
0
    src_ptr += src_stride;  // Point to odd rows.
250
0
    src_stride = 0;
251
0
  }
252
253
0
  if (filtering == kFilterLinear) {
254
0
    src_stride = 0;
255
0
  }
256
0
  for (y = 0; y < src_height / 2; ++y) {
257
0
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
258
0
    src_ptr += row_stride;
259
0
    dst_ptr += dst_stride;
260
0
  }
261
0
  if (src_height & 1) {
262
0
    if (!filtering) {
263
0
      src_ptr -= src_stride;  // Point to last row.
264
0
    }
265
0
    ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
266
0
  }
267
0
}
268
269
// Scale plane, 1/4
270
// This is an optimized version for scaling down a plane to 1/4 of
271
// its original size.
272
273
static void ScalePlaneDown4(int src_width,
274
                            int src_height,
275
                            int dst_width,
276
                            int dst_height,
277
                            int src_stride,
278
                            int dst_stride,
279
                            const uint8_t* src_ptr,
280
                            uint8_t* dst_ptr,
281
44
                            enum FilterMode filtering) {
282
44
  int y;
283
44
  void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
284
44
                        uint8_t* dst_ptr, int dst_width) =
285
44
      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
286
44
  int row_stride = src_stride * 4;
287
44
  (void)src_width;
288
44
  (void)src_height;
289
44
  if (!filtering) {
290
0
    src_ptr += src_stride * 2;  // Point to row 2.
291
0
    src_stride = 0;
292
0
  }
293
#if defined(HAS_SCALEROWDOWN4_NEON)
294
  if (TestCpuFlag(kCpuHasNEON)) {
295
    ScaleRowDown4 =
296
        filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
297
    if (IS_ALIGNED(dst_width, 16)) {
298
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
299
    }
300
  }
301
#endif
302
44
#if defined(HAS_SCALEROWDOWN4_SSSE3)
303
44
  if (TestCpuFlag(kCpuHasSSSE3)) {
304
44
    ScaleRowDown4 =
305
44
        filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
306
44
    if (IS_ALIGNED(dst_width, 8)) {
307
0
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
308
0
    }
309
44
  }
310
44
#endif
311
44
#if defined(HAS_SCALEROWDOWN4_AVX2)
312
44
  if (TestCpuFlag(kCpuHasAVX2)) {
313
44
    ScaleRowDown4 =
314
44
        filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
315
44
    if (IS_ALIGNED(dst_width, 16)) {
316
0
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
317
0
    }
318
44
  }
319
44
#endif
320
#if defined(HAS_SCALEROWDOWN4_MSA)
321
  if (TestCpuFlag(kCpuHasMSA)) {
322
    ScaleRowDown4 =
323
        filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
324
    if (IS_ALIGNED(dst_width, 16)) {
325
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
326
    }
327
  }
328
#endif
329
#if defined(HAS_SCALEROWDOWN4_LSX)
330
  if (TestCpuFlag(kCpuHasLSX)) {
331
    ScaleRowDown4 =
332
        filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX;
333
    if (IS_ALIGNED(dst_width, 16)) {
334
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX;
335
    }
336
  }
337
#endif
338
#if defined(HAS_SCALEROWDOWN4_RVV)
339
  if (TestCpuFlag(kCpuHasRVV)) {
340
    ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
341
  }
342
#endif
343
344
44
  if (filtering == kFilterLinear) {
345
0
    src_stride = 0;
346
0
  }
347
498
  for (y = 0; y < dst_height; ++y) {
348
454
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
349
454
    src_ptr += row_stride;
350
454
    dst_ptr += dst_stride;
351
454
  }
352
44
}
353
354
static void ScalePlaneDown4_16(int src_width,
355
                               int src_height,
356
                               int dst_width,
357
                               int dst_height,
358
                               int src_stride,
359
                               int dst_stride,
360
                               const uint16_t* src_ptr,
361
                               uint16_t* dst_ptr,
362
38
                               enum FilterMode filtering) {
363
38
  int y;
364
38
  void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
365
38
                        uint16_t* dst_ptr, int dst_width) =
366
38
      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
367
38
  int row_stride = src_stride * 4;
368
38
  (void)src_width;
369
38
  (void)src_height;
370
38
  if (!filtering) {
371
0
    src_ptr += src_stride * 2;  // Point to row 2.
372
0
    src_stride = 0;
373
0
  }
374
#if defined(HAS_SCALEROWDOWN4_16_NEON)
375
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
376
    ScaleRowDown4 =
377
        filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
378
  }
379
#endif
380
#if defined(HAS_SCALEROWDOWN4_16_SSE2)
381
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
382
    ScaleRowDown4 =
383
        filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
384
  }
385
#endif
386
387
38
  if (filtering == kFilterLinear) {
388
0
    src_stride = 0;
389
0
  }
390
304
  for (y = 0; y < dst_height; ++y) {
391
266
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
392
266
    src_ptr += row_stride;
393
266
    dst_ptr += dst_stride;
394
266
  }
395
38
}
396
397
// Scale plane down, 3/4
398
static void ScalePlaneDown34(int src_width,
399
                             int src_height,
400
                             int dst_width,
401
                             int dst_height,
402
                             int src_stride,
403
                             int dst_stride,
404
                             const uint8_t* src_ptr,
405
                             uint8_t* dst_ptr,
406
27
                             enum FilterMode filtering) {
407
27
  int y;
408
27
  void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
409
27
                           uint8_t* dst_ptr, int dst_width);
410
27
  void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
411
27
                           uint8_t* dst_ptr, int dst_width);
412
27
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
413
27
  (void)src_width;
414
27
  (void)src_height;
415
27
  assert(dst_width % 3 == 0);
416
27
  if (!filtering) {
417
0
    ScaleRowDown34_0 = ScaleRowDown34_C;
418
0
    ScaleRowDown34_1 = ScaleRowDown34_C;
419
27
  } else {
420
27
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
421
27
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
422
27
  }
423
#if defined(HAS_SCALEROWDOWN34_NEON)
424
  if (TestCpuFlag(kCpuHasNEON)) {
425
#if defined(__aarch64__)
426
    if (dst_width % 48 == 0) {
427
#else
428
    if (dst_width % 24 == 0) {
429
#endif
430
      if (!filtering) {
431
        ScaleRowDown34_0 = ScaleRowDown34_NEON;
432
        ScaleRowDown34_1 = ScaleRowDown34_NEON;
433
      } else {
434
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
435
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
436
      }
437
    } else {
438
      if (!filtering) {
439
        ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
440
        ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
441
      } else {
442
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
443
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
444
      }
445
    }
446
  }
447
#endif
448
#if defined(HAS_SCALEROWDOWN34_MSA)
449
  if (TestCpuFlag(kCpuHasMSA)) {
450
    if (dst_width % 48 == 0) {
451
      if (!filtering) {
452
        ScaleRowDown34_0 = ScaleRowDown34_MSA;
453
        ScaleRowDown34_1 = ScaleRowDown34_MSA;
454
      } else {
455
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
456
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
457
      }
458
    } else {
459
      if (!filtering) {
460
        ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
461
        ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
462
      } else {
463
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
464
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
465
      }
466
    }
467
  }
468
#endif
469
#if defined(HAS_SCALEROWDOWN34_LSX)
470
  if (TestCpuFlag(kCpuHasLSX)) {
471
    if (dst_width % 48 == 0) {
472
      if (!filtering) {
473
        ScaleRowDown34_0 = ScaleRowDown34_LSX;
474
        ScaleRowDown34_1 = ScaleRowDown34_LSX;
475
      } else {
476
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX;
477
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX;
478
      }
479
    } else {
480
      if (!filtering) {
481
        ScaleRowDown34_0 = ScaleRowDown34_Any_LSX;
482
        ScaleRowDown34_1 = ScaleRowDown34_Any_LSX;
483
      } else {
484
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX;
485
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX;
486
      }
487
    }
488
  }
489
#endif
490
27
#if defined(HAS_SCALEROWDOWN34_SSSE3)
491
27
  if (TestCpuFlag(kCpuHasSSSE3)) {
492
27
    if (dst_width % 24 == 0) {
493
0
      if (!filtering) {
494
0
        ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
495
0
        ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
496
0
      } else {
497
0
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
498
0
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
499
0
      }
500
27
    } else {
501
27
      if (!filtering) {
502
0
        ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
503
0
        ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
504
27
      } else {
505
27
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
506
27
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
507
27
      }
508
27
    }
509
27
  }
510
27
#endif
511
#if defined(HAS_SCALEROWDOWN34_RVV)
512
  if (TestCpuFlag(kCpuHasRVV)) {
513
    if (!filtering) {
514
      ScaleRowDown34_0 = ScaleRowDown34_RVV;
515
      ScaleRowDown34_1 = ScaleRowDown34_RVV;
516
    } else {
517
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
518
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
519
    }
520
  }
521
#endif
522
523
618
  for (y = 0; y < dst_height - 2; y += 3) {
524
591
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
525
591
    src_ptr += src_stride;
526
591
    dst_ptr += dst_stride;
527
591
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
528
591
    src_ptr += src_stride;
529
591
    dst_ptr += dst_stride;
530
591
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
531
591
    src_ptr += src_stride * 2;
532
591
    dst_ptr += dst_stride;
533
591
  }
534
535
  // Remainder 1 or 2 rows with last row vertically unfiltered
536
27
  if ((dst_height % 3) == 2) {
537
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
538
0
    src_ptr += src_stride;
539
0
    dst_ptr += dst_stride;
540
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
541
27
  } else if ((dst_height % 3) == 1) {
542
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
543
0
  }
544
27
}
545
546
static void ScalePlaneDown34_16(int src_width,
547
                                int src_height,
548
                                int dst_width,
549
                                int dst_height,
550
                                int src_stride,
551
                                int dst_stride,
552
                                const uint16_t* src_ptr,
553
                                uint16_t* dst_ptr,
554
30
                                enum FilterMode filtering) {
555
30
  int y;
556
30
  void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
557
30
                           uint16_t* dst_ptr, int dst_width);
558
30
  void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
559
30
                           uint16_t* dst_ptr, int dst_width);
560
30
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
561
30
  (void)src_width;
562
30
  (void)src_height;
563
30
  assert(dst_width % 3 == 0);
564
30
  if (!filtering) {
565
0
    ScaleRowDown34_0 = ScaleRowDown34_16_C;
566
0
    ScaleRowDown34_1 = ScaleRowDown34_16_C;
567
30
  } else {
568
30
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
569
30
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
570
30
  }
571
#if defined(HAS_SCALEROWDOWN34_16_NEON)
572
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
573
    if (!filtering) {
574
      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
575
      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
576
    } else {
577
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
578
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
579
    }
580
  }
581
#endif
582
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
583
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
584
    if (!filtering) {
585
      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
586
      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
587
    } else {
588
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
589
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
590
    }
591
  }
592
#endif
593
594
627
  for (y = 0; y < dst_height - 2; y += 3) {
595
597
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
596
597
    src_ptr += src_stride;
597
597
    dst_ptr += dst_stride;
598
597
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
599
597
    src_ptr += src_stride;
600
597
    dst_ptr += dst_stride;
601
597
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
602
597
    src_ptr += src_stride * 2;
603
597
    dst_ptr += dst_stride;
604
597
  }
605
606
  // Remainder 1 or 2 rows with last row vertically unfiltered
607
30
  if ((dst_height % 3) == 2) {
608
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
609
0
    src_ptr += src_stride;
610
0
    dst_ptr += dst_stride;
611
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
612
30
  } else if ((dst_height % 3) == 1) {
613
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
614
0
  }
615
30
}
616
617
// Scale plane, 3/8
618
// This is an optimized version for scaling down a plane to 3/8
619
// of its original size.
620
//
621
// Uses box filter arranges like this
622
// aaabbbcc -> abc
623
// aaabbbcc    def
624
// aaabbbcc    ghi
625
// dddeeeff
626
// dddeeeff
627
// dddeeeff
628
// ggghhhii
629
// ggghhhii
630
// Boxes are 3x3, 2x3, 3x2 and 2x2
631
632
static void ScalePlaneDown38(int src_width,
633
                             int src_height,
634
                             int dst_width,
635
                             int dst_height,
636
                             int src_stride,
637
                             int dst_stride,
638
                             const uint8_t* src_ptr,
639
                             uint8_t* dst_ptr,
640
22
                             enum FilterMode filtering) {
641
22
  int y;
642
22
  void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
643
22
                           uint8_t* dst_ptr, int dst_width);
644
22
  void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
645
22
                           uint8_t* dst_ptr, int dst_width);
646
22
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
647
22
  assert(dst_width % 3 == 0);
648
22
  (void)src_width;
649
22
  (void)src_height;
650
22
  if (!filtering) {
651
0
    ScaleRowDown38_3 = ScaleRowDown38_C;
652
0
    ScaleRowDown38_2 = ScaleRowDown38_C;
653
22
  } else {
654
22
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
655
22
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
656
22
  }
657
658
#if defined(HAS_SCALEROWDOWN38_NEON)
659
  if (TestCpuFlag(kCpuHasNEON)) {
660
    if (!filtering) {
661
      ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
662
      ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
663
    } else {
664
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
665
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
666
    }
667
    if (dst_width % 12 == 0) {
668
      if (!filtering) {
669
        ScaleRowDown38_3 = ScaleRowDown38_NEON;
670
        ScaleRowDown38_2 = ScaleRowDown38_NEON;
671
      } else {
672
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
673
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
674
      }
675
    }
676
  }
677
#endif
678
22
#if defined(HAS_SCALEROWDOWN38_SSSE3)
679
22
  if (TestCpuFlag(kCpuHasSSSE3)) {
680
22
    if (!filtering) {
681
0
      ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
682
0
      ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
683
22
    } else {
684
22
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
685
22
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
686
22
    }
687
22
    if (dst_width % 12 == 0 && !filtering) {
688
0
      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
689
0
      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
690
0
    }
691
22
    if (dst_width % 6 == 0 && filtering) {
692
0
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
693
0
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
694
0
    }
695
22
  }
696
22
#endif
697
#if defined(HAS_SCALEROWDOWN38_MSA)
698
  if (TestCpuFlag(kCpuHasMSA)) {
699
    if (!filtering) {
700
      ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
701
      ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
702
    } else {
703
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
704
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
705
    }
706
    if (dst_width % 12 == 0) {
707
      if (!filtering) {
708
        ScaleRowDown38_3 = ScaleRowDown38_MSA;
709
        ScaleRowDown38_2 = ScaleRowDown38_MSA;
710
      } else {
711
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
712
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
713
      }
714
    }
715
  }
716
#endif
717
#if defined(HAS_SCALEROWDOWN38_LSX)
718
  if (TestCpuFlag(kCpuHasLSX)) {
719
    if (!filtering) {
720
      ScaleRowDown38_3 = ScaleRowDown38_Any_LSX;
721
      ScaleRowDown38_2 = ScaleRowDown38_Any_LSX;
722
    } else {
723
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX;
724
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX;
725
    }
726
    if (dst_width % 12 == 0) {
727
      if (!filtering) {
728
        ScaleRowDown38_3 = ScaleRowDown38_LSX;
729
        ScaleRowDown38_2 = ScaleRowDown38_LSX;
730
      } else {
731
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX;
732
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX;
733
      }
734
    }
735
  }
736
#endif
737
#if defined(HAS_SCALEROWDOWN38_RVV)
738
  if (TestCpuFlag(kCpuHasRVV)) {
739
    if (!filtering) {
740
      ScaleRowDown38_3 = ScaleRowDown38_RVV;
741
      ScaleRowDown38_2 = ScaleRowDown38_RVV;
742
    } else {
743
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
744
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
745
    }
746
  }
747
#endif
748
749
257
  for (y = 0; y < dst_height - 2; y += 3) {
750
235
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
751
235
    src_ptr += src_stride * 3;
752
235
    dst_ptr += dst_stride;
753
235
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
754
235
    src_ptr += src_stride * 3;
755
235
    dst_ptr += dst_stride;
756
235
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
757
235
    src_ptr += src_stride * 2;
758
235
    dst_ptr += dst_stride;
759
235
  }
760
761
  // Remainder 1 or 2 rows with last row vertically unfiltered
762
22
  if ((dst_height % 3) == 2) {
763
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
764
0
    src_ptr += src_stride * 3;
765
0
    dst_ptr += dst_stride;
766
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
767
22
  } else if ((dst_height % 3) == 1) {
768
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
769
0
  }
770
22
}
771
772
static void ScalePlaneDown38_16(int src_width,
773
                                int src_height,
774
                                int dst_width,
775
                                int dst_height,
776
                                int src_stride,
777
                                int dst_stride,
778
                                const uint16_t* src_ptr,
779
                                uint16_t* dst_ptr,
780
24
                                enum FilterMode filtering) {
781
24
  int y;
782
24
  void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
783
24
                           uint16_t* dst_ptr, int dst_width);
784
24
  void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
785
24
                           uint16_t* dst_ptr, int dst_width);
786
24
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
787
24
  (void)src_width;
788
24
  (void)src_height;
789
24
  assert(dst_width % 3 == 0);
790
24
  if (!filtering) {
791
0
    ScaleRowDown38_3 = ScaleRowDown38_16_C;
792
0
    ScaleRowDown38_2 = ScaleRowDown38_16_C;
793
24
  } else {
794
24
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
795
24
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
796
24
  }
797
#if defined(HAS_SCALEROWDOWN38_16_NEON)
798
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
799
    if (!filtering) {
800
      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
801
      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
802
    } else {
803
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
804
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
805
    }
806
  }
807
#endif
808
#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
809
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
810
    if (!filtering) {
811
      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
812
      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
813
    } else {
814
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
815
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
816
    }
817
  }
818
#endif
819
820
303
  for (y = 0; y < dst_height - 2; y += 3) {
821
279
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
822
279
    src_ptr += src_stride * 3;
823
279
    dst_ptr += dst_stride;
824
279
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
825
279
    src_ptr += src_stride * 3;
826
279
    dst_ptr += dst_stride;
827
279
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
828
279
    src_ptr += src_stride * 2;
829
279
    dst_ptr += dst_stride;
830
279
  }
831
832
  // Remainder 1 or 2 rows with last row vertically unfiltered
833
24
  if ((dst_height % 3) == 2) {
834
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
835
0
    src_ptr += src_stride * 3;
836
0
    dst_ptr += dst_stride;
837
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
838
24
  } else if ((dst_height % 3) == 1) {
839
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
840
0
  }
841
24
}
842
843
4.26M
#define MIN1(x) ((x) < 1 ? 1 : (x))
844
845
4.69M
static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
846
4.69M
  uint32_t sum = 0u;
847
4.69M
  int x;
848
4.69M
  assert(iboxwidth > 0);
849
25.9M
  for (x = 0; x < iboxwidth; ++x) {
850
21.2M
    sum += src_ptr[x];
851
21.2M
  }
852
4.69M
  return sum;
853
4.69M
}
854
855
1.73M
static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
856
1.73M
  uint32_t sum = 0u;
857
1.73M
  int x;
858
1.73M
  assert(iboxwidth > 0);
859
10.9M
  for (x = 0; x < iboxwidth; ++x) {
860
9.18M
    sum += src_ptr[x];
861
9.18M
  }
862
1.73M
  return sum;
863
1.73M
}
864
865
static void ScaleAddCols2_C(int dst_width,
866
                            int boxheight,
867
                            int x,
868
                            int dx,
869
                            const uint16_t* src_ptr,
870
35.2k
                            uint8_t* dst_ptr) {
871
35.2k
  int i;
872
35.2k
  int scaletbl[2];
873
35.2k
  int minboxwidth = dx >> 16;
874
35.2k
  int boxwidth;
875
35.2k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
876
35.2k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
877
2.50M
  for (i = 0; i < dst_width; ++i) {
878
2.46M
    int ix = x >> 16;
879
2.46M
    x += dx;
880
2.46M
    boxwidth = MIN1((x >> 16) - ix);
881
2.46M
    int scaletbl_index = boxwidth - minboxwidth;
882
2.46M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
883
2.46M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
884
2.46M
                               scaletbl[scaletbl_index] >>
885
2.46M
                           16);
886
2.46M
  }
887
35.2k
}
888
889
static void ScaleAddCols2_16_C(int dst_width,
890
                               int boxheight,
891
                               int x,
892
                               int dx,
893
                               const uint32_t* src_ptr,
894
47.3k
                               uint16_t* dst_ptr) {
895
47.3k
  int i;
896
47.3k
  int scaletbl[2];
897
47.3k
  int minboxwidth = dx >> 16;
898
47.3k
  int boxwidth;
899
47.3k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
900
47.3k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
901
1.42M
  for (i = 0; i < dst_width; ++i) {
902
1.37M
    int ix = x >> 16;
903
1.37M
    x += dx;
904
1.37M
    boxwidth = MIN1((x >> 16) - ix);
905
1.37M
    int scaletbl_index = boxwidth - minboxwidth;
906
1.37M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
907
1.37M
    *dst_ptr++ =
908
1.37M
        SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
909
1.37M
  }
910
47.3k
}
911
912
static void ScaleAddCols0_C(int dst_width,
913
                            int boxheight,
914
                            int x,
915
                            int dx,
916
                            const uint16_t* src_ptr,
917
0
                            uint8_t* dst_ptr) {
918
0
  int scaleval = 65536 / boxheight;
919
0
  int i;
920
0
  (void)dx;
921
0
  src_ptr += (x >> 16);
922
0
  for (i = 0; i < dst_width; ++i) {
923
0
    *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
924
0
  }
925
0
}
926
927
static void ScaleAddCols1_C(int dst_width,
928
                            int boxheight,
929
                            int x,
930
                            int dx,
931
                            const uint16_t* src_ptr,
932
54.6k
                            uint8_t* dst_ptr) {
933
54.6k
  int boxwidth = MIN1(dx >> 16);
934
54.6k
  int scaleval = 65536 / (boxwidth * boxheight);
935
54.6k
  int i;
936
54.6k
  x >>= 16;
937
2.28M
  for (i = 0; i < dst_width; ++i) {
938
2.23M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
939
2.23M
    x += boxwidth;
940
2.23M
  }
941
54.6k
}
942
943
static void ScaleAddCols1_16_C(int dst_width,
944
                               int boxheight,
945
                               int x,
946
                               int dx,
947
                               const uint32_t* src_ptr,
948
30.1k
                               uint16_t* dst_ptr) {
949
30.1k
  int boxwidth = MIN1(dx >> 16);
950
30.1k
  int scaleval = 65536 / (boxwidth * boxheight);
951
30.1k
  int i;
952
390k
  for (i = 0; i < dst_width; ++i) {
953
360k
    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
954
360k
    x += boxwidth;
955
360k
  }
956
30.1k
}
957
958
// Scale plane down to any dimensions, with interpolation.
959
// (boxfilter).
960
//
961
// Same method as SimpleScale, which is fixed point, outputting
962
// one pixel of destination using fixed point (16.16) to step
963
// through source, sampling a box of pixel with simple
964
// averaging.
965
static int ScalePlaneBox(int src_width,
966
                         int src_height,
967
                         int dst_width,
968
                         int dst_height,
969
                         int src_stride,
970
                         int dst_stride,
971
                         const uint8_t* src_ptr,
972
1.18k
                         uint8_t* dst_ptr) {
973
1.18k
  int j, k;
974
  // Initial source x/y coordinate and step values as 16.16 fixed point.
975
1.18k
  int x = 0;
976
1.18k
  int y = 0;
977
1.18k
  int dx = 0;
978
1.18k
  int dy = 0;
979
1.18k
  const int max_y = (src_height << 16);
980
1.18k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
981
1.18k
             &dx, &dy);
982
1.18k
  src_width = Abs(src_width);
983
1.18k
  {
984
    // Allocate a row buffer of uint16_t.
985
1.18k
    align_buffer_64(row16, src_width * 2);
986
1.18k
    if (!row16)
987
0
      return 1;
988
1.18k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
989
1.18k
                         const uint16_t* src_ptr, uint8_t* dst_ptr) =
990
1.18k
        (dx & 0xffff) ? ScaleAddCols2_C
991
1.18k
                      : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
992
1.18k
    void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
993
1.18k
                        int src_width) = ScaleAddRow_C;
994
1.18k
#if defined(HAS_SCALEADDROW_SSE2)
995
1.18k
    if (TestCpuFlag(kCpuHasSSE2)) {
996
1.18k
      ScaleAddRow = ScaleAddRow_Any_SSE2;
997
1.18k
      if (IS_ALIGNED(src_width, 16)) {
998
158
        ScaleAddRow = ScaleAddRow_SSE2;
999
158
      }
1000
1.18k
    }
1001
1.18k
#endif
1002
1.18k
#if defined(HAS_SCALEADDROW_AVX2)
1003
1.18k
    if (TestCpuFlag(kCpuHasAVX2)) {
1004
1.18k
      ScaleAddRow = ScaleAddRow_Any_AVX2;
1005
1.18k
      if (IS_ALIGNED(src_width, 32)) {
1006
103
        ScaleAddRow = ScaleAddRow_AVX2;
1007
103
      }
1008
1.18k
    }
1009
1.18k
#endif
1010
#if defined(HAS_SCALEADDROW_NEON)
1011
    if (TestCpuFlag(kCpuHasNEON)) {
1012
      ScaleAddRow = ScaleAddRow_Any_NEON;
1013
      if (IS_ALIGNED(src_width, 16)) {
1014
        ScaleAddRow = ScaleAddRow_NEON;
1015
      }
1016
    }
1017
#endif
1018
#if defined(HAS_SCALEADDROW_MSA)
1019
    if (TestCpuFlag(kCpuHasMSA)) {
1020
      ScaleAddRow = ScaleAddRow_Any_MSA;
1021
      if (IS_ALIGNED(src_width, 16)) {
1022
        ScaleAddRow = ScaleAddRow_MSA;
1023
      }
1024
    }
1025
#endif
1026
#if defined(HAS_SCALEADDROW_LSX)
1027
    if (TestCpuFlag(kCpuHasLSX)) {
1028
      ScaleAddRow = ScaleAddRow_Any_LSX;
1029
      if (IS_ALIGNED(src_width, 16)) {
1030
        ScaleAddRow = ScaleAddRow_LSX;
1031
      }
1032
    }
1033
#endif
1034
#if defined(HAS_SCALEADDROW_RVV)
1035
    if (TestCpuFlag(kCpuHasRVV)) {
1036
      ScaleAddRow = ScaleAddRow_RVV;
1037
    }
1038
#endif
1039
1040
91.1k
    for (j = 0; j < dst_height; ++j) {
1041
89.9k
      int boxheight;
1042
89.9k
      int iy = y >> 16;
1043
89.9k
      const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
1044
89.9k
      y += dy;
1045
89.9k
      if (y > max_y) {
1046
0
        y = max_y;
1047
0
      }
1048
89.9k
      boxheight = MIN1((y >> 16) - iy);
1049
89.9k
      memset(row16, 0, src_width * 2);
1050
901k
      for (k = 0; k < boxheight; ++k) {
1051
811k
        ScaleAddRow(src, (uint16_t*)(row16), src_width);
1052
811k
        src += src_stride;
1053
811k
      }
1054
89.9k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
1055
89.9k
      dst_ptr += dst_stride;
1056
89.9k
    }
1057
1.18k
    free_aligned_buffer_64(row16);
1058
1.18k
  }
1059
0
  return 0;
1060
1.18k
}
1061
1062
static int ScalePlaneBox_16(int src_width,
1063
                            int src_height,
1064
                            int dst_width,
1065
                            int dst_height,
1066
                            int src_stride,
1067
                            int dst_stride,
1068
                            const uint16_t* src_ptr,
1069
1.25k
                            uint16_t* dst_ptr) {
1070
1.25k
  int j, k;
1071
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1072
1.25k
  int x = 0;
1073
1.25k
  int y = 0;
1074
1.25k
  int dx = 0;
1075
1.25k
  int dy = 0;
1076
1.25k
  const int max_y = (src_height << 16);
1077
1.25k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
1078
1.25k
             &dx, &dy);
1079
1.25k
  src_width = Abs(src_width);
1080
1.25k
  {
1081
    // Allocate a row buffer of uint32_t.
1082
1.25k
    align_buffer_64(row32, src_width * 4);
1083
1.25k
    if (!row32)
1084
0
      return 1;
1085
1.25k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
1086
1.25k
                         const uint32_t* src_ptr, uint16_t* dst_ptr) =
1087
1.25k
        (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
1088
1.25k
    void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
1089
1.25k
                        int src_width) = ScaleAddRow_16_C;
1090
1091
#if defined(HAS_SCALEADDROW_16_SSE2)
1092
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
1093
      ScaleAddRow = ScaleAddRow_16_SSE2;
1094
    }
1095
#endif
1096
1097
78.7k
    for (j = 0; j < dst_height; ++j) {
1098
77.4k
      int boxheight;
1099
77.4k
      int iy = y >> 16;
1100
77.4k
      const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
1101
77.4k
      y += dy;
1102
77.4k
      if (y > max_y) {
1103
0
        y = max_y;
1104
0
      }
1105
77.4k
      boxheight = MIN1((y >> 16) - iy);
1106
77.4k
      memset(row32, 0, src_width * 4);
1107
783k
      for (k = 0; k < boxheight; ++k) {
1108
705k
        ScaleAddRow(src, (uint32_t*)(row32), src_width);
1109
705k
        src += src_stride;
1110
705k
      }
1111
77.4k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
1112
77.4k
      dst_ptr += dst_stride;
1113
77.4k
    }
1114
1.25k
    free_aligned_buffer_64(row32);
1115
1.25k
  }
1116
0
  return 0;
1117
1.25k
}
1118
1119
// Scale plane down with bilinear interpolation.
1120
static int ScalePlaneBilinearDown(int src_width,
1121
                                  int src_height,
1122
                                  int dst_width,
1123
                                  int dst_height,
1124
                                  int src_stride,
1125
                                  int dst_stride,
1126
                                  const uint8_t* src_ptr,
1127
                                  uint8_t* dst_ptr,
1128
3.75k
                                  enum FilterMode filtering) {
1129
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1130
3.75k
  int x = 0;
1131
3.75k
  int y = 0;
1132
3.75k
  int dx = 0;
1133
3.75k
  int dy = 0;
1134
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1135
  // Allocate a row buffer.
1136
3.75k
  align_buffer_64(row, src_width);
1137
3.75k
  if (!row)
1138
0
    return 1;
1139
1140
3.75k
  const int max_y = (src_height - 1) << 16;
1141
3.75k
  int j;
1142
3.75k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1143
3.75k
                          int dst_width, int x, int dx) =
1144
3.75k
      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1145
3.75k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1146
3.75k
                         ptrdiff_t src_stride, int dst_width,
1147
3.75k
                         int source_y_fraction) = InterpolateRow_C;
1148
3.75k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1149
3.75k
             &dx, &dy);
1150
3.75k
  src_width = Abs(src_width);
1151
1152
3.75k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1153
3.75k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1154
3.75k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1155
3.75k
    if (IS_ALIGNED(src_width, 16)) {
1156
923
      InterpolateRow = InterpolateRow_SSSE3;
1157
923
    }
1158
3.75k
  }
1159
3.75k
#endif
1160
3.75k
#if defined(HAS_INTERPOLATEROW_AVX2)
1161
3.75k
  if (TestCpuFlag(kCpuHasAVX2)) {
1162
3.75k
    InterpolateRow = InterpolateRow_Any_AVX2;
1163
3.75k
    if (IS_ALIGNED(src_width, 32)) {
1164
603
      InterpolateRow = InterpolateRow_AVX2;
1165
603
    }
1166
3.75k
  }
1167
3.75k
#endif
1168
#if defined(HAS_INTERPOLATEROW_NEON)
1169
  if (TestCpuFlag(kCpuHasNEON)) {
1170
    InterpolateRow = InterpolateRow_Any_NEON;
1171
    if (IS_ALIGNED(src_width, 16)) {
1172
      InterpolateRow = InterpolateRow_NEON;
1173
    }
1174
  }
1175
#endif
1176
#if defined(HAS_INTERPOLATEROW_SME)
1177
  if (TestCpuFlag(kCpuHasSME)) {
1178
    InterpolateRow = InterpolateRow_SME;
1179
  }
1180
#endif
1181
#if defined(HAS_INTERPOLATEROW_MSA)
1182
  if (TestCpuFlag(kCpuHasMSA)) {
1183
    InterpolateRow = InterpolateRow_Any_MSA;
1184
    if (IS_ALIGNED(src_width, 32)) {
1185
      InterpolateRow = InterpolateRow_MSA;
1186
    }
1187
  }
1188
#endif
1189
#if defined(HAS_INTERPOLATEROW_LSX)
1190
  if (TestCpuFlag(kCpuHasLSX)) {
1191
    InterpolateRow = InterpolateRow_Any_LSX;
1192
    if (IS_ALIGNED(src_width, 32)) {
1193
      InterpolateRow = InterpolateRow_LSX;
1194
    }
1195
  }
1196
#endif
1197
#if defined(HAS_INTERPOLATEROW_RVV)
1198
  if (TestCpuFlag(kCpuHasRVV)) {
1199
    InterpolateRow = InterpolateRow_RVV;
1200
  }
1201
#endif
1202
1203
3.75k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1204
3.75k
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1205
3.75k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1206
3.75k
  }
1207
3.75k
#endif
1208
#if defined(HAS_SCALEFILTERCOLS_NEON)
1209
  if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1210
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1211
    if (IS_ALIGNED(dst_width, 8)) {
1212
      ScaleFilterCols = ScaleFilterCols_NEON;
1213
    }
1214
  }
1215
#endif
1216
#if defined(HAS_SCALEFILTERCOLS_MSA)
1217
  if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1218
    ScaleFilterCols = ScaleFilterCols_Any_MSA;
1219
    if (IS_ALIGNED(dst_width, 16)) {
1220
      ScaleFilterCols = ScaleFilterCols_MSA;
1221
    }
1222
  }
1223
#endif
1224
#if defined(HAS_SCALEFILTERCOLS_LSX)
1225
  if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1226
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1227
    if (IS_ALIGNED(dst_width, 16)) {
1228
      ScaleFilterCols = ScaleFilterCols_LSX;
1229
    }
1230
  }
1231
#endif
1232
3.75k
  if (y > max_y) {
1233
43
    y = max_y;
1234
43
  }
1235
1236
264k
  for (j = 0; j < dst_height; ++j) {
1237
260k
    int yi = y >> 16;
1238
260k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1239
260k
    if (filtering == kFilterLinear) {
1240
78.3k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1241
182k
    } else {
1242
182k
      int yf = (y >> 8) & 255;
1243
182k
      InterpolateRow(row, src, src_stride, src_width, yf);
1244
182k
      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1245
182k
    }
1246
260k
    dst_ptr += dst_stride;
1247
260k
    y += dy;
1248
260k
    if (y > max_y) {
1249
4.89k
      y = max_y;
1250
4.89k
    }
1251
260k
  }
1252
3.75k
  free_aligned_buffer_64(row);
1253
3.75k
  return 0;
1254
3.75k
}
1255
1256
static int ScalePlaneBilinearDown_16(int src_width,
1257
                                     int src_height,
1258
                                     int dst_width,
1259
                                     int dst_height,
1260
                                     int src_stride,
1261
                                     int dst_stride,
1262
                                     const uint16_t* src_ptr,
1263
                                     uint16_t* dst_ptr,
1264
5.89k
                                     enum FilterMode filtering) {
1265
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1266
5.89k
  int x = 0;
1267
5.89k
  int y = 0;
1268
5.89k
  int dx = 0;
1269
5.89k
  int dy = 0;
1270
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1271
  // Allocate a row buffer.
1272
5.89k
  align_buffer_64(row, src_width * 2);
1273
5.89k
  if (!row)
1274
0
    return 1;
1275
1276
5.89k
  const int max_y = (src_height - 1) << 16;
1277
5.89k
  int j;
1278
5.89k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1279
5.89k
                          int dst_width, int x, int dx) =
1280
5.89k
      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1281
5.89k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1282
5.89k
                         ptrdiff_t src_stride, int dst_width,
1283
5.89k
                         int source_y_fraction) = InterpolateRow_16_C;
1284
5.89k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1285
5.89k
             &dx, &dy);
1286
5.89k
  src_width = Abs(src_width);
1287
1288
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1289
  if (TestCpuFlag(kCpuHasSSE2)) {
1290
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1291
    if (IS_ALIGNED(src_width, 16)) {
1292
      InterpolateRow = InterpolateRow_16_SSE2;
1293
    }
1294
  }
1295
#endif
1296
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1297
  if (TestCpuFlag(kCpuHasSSSE3)) {
1298
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1299
    if (IS_ALIGNED(src_width, 16)) {
1300
      InterpolateRow = InterpolateRow_16_SSSE3;
1301
    }
1302
  }
1303
#endif
1304
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1305
  if (TestCpuFlag(kCpuHasAVX2)) {
1306
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1307
    if (IS_ALIGNED(src_width, 32)) {
1308
      InterpolateRow = InterpolateRow_16_AVX2;
1309
    }
1310
  }
1311
#endif
1312
#if defined(HAS_INTERPOLATEROW_16_NEON)
1313
  if (TestCpuFlag(kCpuHasNEON)) {
1314
    InterpolateRow = InterpolateRow_16_Any_NEON;
1315
    if (IS_ALIGNED(src_width, 16)) {
1316
      InterpolateRow = InterpolateRow_16_NEON;
1317
    }
1318
  }
1319
#endif
1320
#if defined(HAS_INTERPOLATEROW_16_SME)
1321
  if (TestCpuFlag(kCpuHasSME)) {
1322
    InterpolateRow = InterpolateRow_16_SME;
1323
  }
1324
#endif
1325
1326
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1327
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1328
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1329
  }
1330
#endif
1331
5.89k
  if (y > max_y) {
1332
107
    y = max_y;
1333
107
  }
1334
1335
380k
  for (j = 0; j < dst_height; ++j) {
1336
374k
    int yi = y >> 16;
1337
374k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1338
374k
    if (filtering == kFilterLinear) {
1339
231k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1340
231k
    } else {
1341
143k
      int yf = (y >> 8) & 255;
1342
143k
      InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1343
143k
      ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1344
143k
    }
1345
374k
    dst_ptr += dst_stride;
1346
374k
    y += dy;
1347
374k
    if (y > max_y) {
1348
8.27k
      y = max_y;
1349
8.27k
    }
1350
374k
  }
1351
5.89k
  free_aligned_buffer_64(row);
1352
5.89k
  return 0;
1353
5.89k
}
1354
1355
// Scale up down with bilinear interpolation.
1356
static int ScalePlaneBilinearUp(int src_width,
1357
                                int src_height,
1358
                                int dst_width,
1359
                                int dst_height,
1360
                                int src_stride,
1361
                                int dst_stride,
1362
                                const uint8_t* src_ptr,
1363
                                uint8_t* dst_ptr,
1364
4.75k
                                enum FilterMode filtering) {
1365
4.75k
  int j;
1366
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1367
4.75k
  int x = 0;
1368
4.75k
  int y = 0;
1369
4.75k
  int dx = 0;
1370
4.75k
  int dy = 0;
1371
4.75k
  const int max_y = (src_height - 1) << 16;
1372
4.75k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1373
4.75k
                         ptrdiff_t src_stride, int dst_width,
1374
4.75k
                         int source_y_fraction) = InterpolateRow_C;
1375
4.75k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1376
4.75k
                          int dst_width, int x, int dx) =
1377
4.75k
      filtering ? ScaleFilterCols_C : ScaleCols_C;
1378
4.75k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1379
4.75k
             &dx, &dy);
1380
4.75k
  src_width = Abs(src_width);
1381
1382
4.75k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1383
4.75k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1384
4.75k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1385
4.75k
    if (IS_ALIGNED(dst_width, 16)) {
1386
2.63k
      InterpolateRow = InterpolateRow_SSSE3;
1387
2.63k
    }
1388
4.75k
  }
1389
4.75k
#endif
1390
4.75k
#if defined(HAS_INTERPOLATEROW_AVX2)
1391
4.75k
  if (TestCpuFlag(kCpuHasAVX2)) {
1392
4.75k
    InterpolateRow = InterpolateRow_Any_AVX2;
1393
4.75k
    if (IS_ALIGNED(dst_width, 32)) {
1394
2.29k
      InterpolateRow = InterpolateRow_AVX2;
1395
2.29k
    }
1396
4.75k
  }
1397
4.75k
#endif
1398
#if defined(HAS_INTERPOLATEROW_NEON)
1399
  if (TestCpuFlag(kCpuHasNEON)) {
1400
    InterpolateRow = InterpolateRow_Any_NEON;
1401
    if (IS_ALIGNED(dst_width, 16)) {
1402
      InterpolateRow = InterpolateRow_NEON;
1403
    }
1404
  }
1405
#endif
1406
#if defined(HAS_INTERPOLATEROW_SME)
1407
  if (TestCpuFlag(kCpuHasSME)) {
1408
    InterpolateRow = InterpolateRow_SME;
1409
  }
1410
#endif
1411
#if defined(HAS_INTERPOLATEROW_RVV)
1412
  if (TestCpuFlag(kCpuHasRVV)) {
1413
    InterpolateRow = InterpolateRow_RVV;
1414
  }
1415
#endif
1416
1417
4.75k
  if (filtering && src_width >= 32768) {
1418
0
    ScaleFilterCols = ScaleFilterCols64_C;
1419
0
  }
1420
4.75k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1421
4.75k
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1422
4.75k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1423
4.75k
  }
1424
4.75k
#endif
1425
#if defined(HAS_SCALEFILTERCOLS_NEON)
1426
  if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1427
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1428
    if (IS_ALIGNED(dst_width, 8)) {
1429
      ScaleFilterCols = ScaleFilterCols_NEON;
1430
    }
1431
  }
1432
#endif
1433
#if defined(HAS_SCALEFILTERCOLS_MSA)
1434
  if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1435
    ScaleFilterCols = ScaleFilterCols_Any_MSA;
1436
    if (IS_ALIGNED(dst_width, 16)) {
1437
      ScaleFilterCols = ScaleFilterCols_MSA;
1438
    }
1439
  }
1440
#endif
1441
#if defined(HAS_SCALEFILTERCOLS_LSX)
1442
  if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1443
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1444
    if (IS_ALIGNED(dst_width, 16)) {
1445
      ScaleFilterCols = ScaleFilterCols_LSX;
1446
    }
1447
  }
1448
#endif
1449
4.75k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1450
0
    ScaleFilterCols = ScaleColsUp2_C;
1451
#if defined(HAS_SCALECOLS_SSE2)
1452
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1453
      ScaleFilterCols = ScaleColsUp2_SSE2;
1454
    }
1455
#endif
1456
0
  }
1457
1458
4.75k
  if (y > max_y) {
1459
631
    y = max_y;
1460
631
  }
1461
4.75k
  {
1462
4.75k
    int yi = y >> 16;
1463
4.75k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1464
1465
    // Allocate 2 row buffers.
1466
4.75k
    const int row_size = (dst_width + 31) & ~31;
1467
4.75k
    align_buffer_64(row, row_size * 2);
1468
4.75k
    if (!row)
1469
0
      return 1;
1470
1471
4.75k
    uint8_t* rowptr = row;
1472
4.75k
    int rowstride = row_size;
1473
4.75k
    int lasty = yi;
1474
1475
4.75k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1476
4.75k
    if (src_height > 1) {
1477
4.11k
      src += src_stride;
1478
4.11k
    }
1479
4.75k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1480
4.75k
    if (src_height > 2) {
1481
3.65k
      src += src_stride;
1482
3.65k
    }
1483
1484
2.17M
    for (j = 0; j < dst_height; ++j) {
1485
2.17M
      yi = y >> 16;
1486
2.17M
      if (yi != lasty) {
1487
300k
        if (y > max_y) {
1488
0
          y = max_y;
1489
0
          yi = y >> 16;
1490
0
          src = src_ptr + yi * (int64_t)src_stride;
1491
0
        }
1492
300k
        if (yi != lasty) {
1493
300k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1494
300k
          rowptr += rowstride;
1495
300k
          rowstride = -rowstride;
1496
300k
          lasty = yi;
1497
300k
          if ((y + 65536) < max_y) {
1498
296k
            src += src_stride;
1499
296k
          }
1500
300k
        }
1501
300k
      }
1502
2.17M
      if (filtering == kFilterLinear) {
1503
92.8k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1504
2.07M
      } else {
1505
2.07M
        int yf = (y >> 8) & 255;
1506
2.07M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1507
2.07M
      }
1508
2.17M
      dst_ptr += dst_stride;
1509
2.17M
      y += dy;
1510
2.17M
    }
1511
4.75k
    free_aligned_buffer_64(row);
1512
4.75k
  }
1513
0
  return 0;
1514
4.75k
}
1515
1516
// Scale plane, horizontally up by 2 times.
1517
// Uses linear filter horizontally, nearest vertically.
1518
// This is an optimized version for scaling up a plane to 2 times of
1519
// its original width, using linear interpolation.
1520
// This is used to scale U and V planes of I422 to I444.
1521
static void ScalePlaneUp2_Linear(int src_width,
1522
                                 int src_height,
1523
                                 int dst_width,
1524
                                 int dst_height,
1525
                                 int src_stride,
1526
                                 int dst_stride,
1527
                                 const uint8_t* src_ptr,
1528
225
                                 uint8_t* dst_ptr) {
1529
225
  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
1530
225
      ScaleRowUp2_Linear_Any_C;
1531
225
  int i;
1532
225
  int y;
1533
225
  int dy;
1534
1535
225
  (void)src_width;
1536
  // This function can only scale up by 2 times horizontally.
1537
225
  assert(src_width == ((dst_width + 1) / 2));
1538
1539
225
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
1540
225
  if (TestCpuFlag(kCpuHasSSE2)) {
1541
225
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
1542
225
  }
1543
225
#endif
1544
1545
225
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
1546
225
  if (TestCpuFlag(kCpuHasSSSE3)) {
1547
225
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
1548
225
  }
1549
225
#endif
1550
1551
225
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
1552
225
  if (TestCpuFlag(kCpuHasAVX2)) {
1553
225
    ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
1554
225
  }
1555
225
#endif
1556
1557
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
1558
  if (TestCpuFlag(kCpuHasNEON)) {
1559
    ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
1560
  }
1561
#endif
1562
#ifdef HAS_SCALEROWUP2_LINEAR_RVV
1563
  if (TestCpuFlag(kCpuHasRVV)) {
1564
    ScaleRowUp = ScaleRowUp2_Linear_RVV;
1565
  }
1566
#endif
1567
1568
225
  if (dst_height == 1) {
1569
37
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1570
37
               dst_width);
1571
188
  } else {
1572
188
    dy = FixedDiv(src_height - 1, dst_height - 1);
1573
188
    y = (1 << 15) - 1;
1574
122k
    for (i = 0; i < dst_height; ++i) {
1575
122k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1576
122k
      dst_ptr += dst_stride;
1577
122k
      y += dy;
1578
122k
    }
1579
188
  }
1580
225
}
1581
1582
// Scale plane, up by 2 times.
1583
// This is an optimized version for scaling up a plane to 2 times of
1584
// its original size, using bilinear interpolation.
1585
// This is used to scale U and V planes of I420 to I444.
1586
static void ScalePlaneUp2_Bilinear(int src_width,
1587
                                   int src_height,
1588
                                   int dst_width,
1589
                                   int dst_height,
1590
                                   int src_stride,
1591
                                   int dst_stride,
1592
                                   const uint8_t* src_ptr,
1593
277
                                   uint8_t* dst_ptr) {
1594
277
  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1595
277
                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1596
277
      ScaleRowUp2_Bilinear_Any_C;
1597
277
  int x;
1598
1599
277
  (void)src_width;
1600
  // This function can only scale up by 2 times.
1601
277
  assert(src_width == ((dst_width + 1) / 2));
1602
277
  assert(src_height == ((dst_height + 1) / 2));
1603
1604
277
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
1605
277
  if (TestCpuFlag(kCpuHasSSE2)) {
1606
277
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
1607
277
  }
1608
277
#endif
1609
1610
277
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
1611
277
  if (TestCpuFlag(kCpuHasSSSE3)) {
1612
277
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
1613
277
  }
1614
277
#endif
1615
1616
277
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
1617
277
  if (TestCpuFlag(kCpuHasAVX2)) {
1618
277
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
1619
277
  }
1620
277
#endif
1621
1622
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
1623
  if (TestCpuFlag(kCpuHasNEON)) {
1624
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
1625
  }
1626
#endif
1627
#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
1628
  if (TestCpuFlag(kCpuHasRVV)) {
1629
    Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
1630
  }
1631
#endif
1632
1633
277
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1634
277
  dst_ptr += dst_stride;
1635
15.7k
  for (x = 0; x < src_height - 1; ++x) {
1636
15.4k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1637
15.4k
    src_ptr += src_stride;
1638
    // TODO(fbarchard): Test performance of writing one row of destination at a
1639
    // time.
1640
15.4k
    dst_ptr += 2 * dst_stride;
1641
15.4k
  }
1642
277
  if (!(dst_height & 1)) {
1643
146
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1644
146
  }
1645
277
}
1646
1647
// Scale at most 14 bit plane, horizontally up by 2 times.
1648
// This is an optimized version for scaling up a plane to 2 times of
1649
// its original width, using linear interpolation.
1650
// stride is in count of uint16_t.
1651
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
1652
static void ScalePlaneUp2_12_Linear(int src_width,
1653
                                    int src_height,
1654
                                    int dst_width,
1655
                                    int dst_height,
1656
                                    int src_stride,
1657
                                    int dst_stride,
1658
                                    const uint16_t* src_ptr,
1659
272
                                    uint16_t* dst_ptr) {
1660
272
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1661
272
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1662
272
  int i;
1663
272
  int y;
1664
272
  int dy;
1665
1666
272
  (void)src_width;
1667
  // This function can only scale up by 2 times horizontally.
1668
272
  assert(src_width == ((dst_width + 1) / 2));
1669
1670
272
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
1671
272
  if (TestCpuFlag(kCpuHasSSSE3)) {
1672
272
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
1673
272
  }
1674
272
#endif
1675
1676
272
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
1677
272
  if (TestCpuFlag(kCpuHasAVX2)) {
1678
272
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
1679
272
  }
1680
272
#endif
1681
1682
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
1683
  if (TestCpuFlag(kCpuHasNEON)) {
1684
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
1685
  }
1686
#endif
1687
1688
272
  if (dst_height == 1) {
1689
19
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1690
19
               dst_width);
1691
253
  } else {
1692
253
    dy = FixedDiv(src_height - 1, dst_height - 1);
1693
253
    y = (1 << 15) - 1;
1694
198k
    for (i = 0; i < dst_height; ++i) {
1695
198k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1696
198k
      dst_ptr += dst_stride;
1697
198k
      y += dy;
1698
198k
    }
1699
253
  }
1700
272
}
1701
1702
// Scale at most 12 bit plane, up by 2 times.
1703
// This is an optimized version for scaling up a plane to 2 times of
1704
// its original size, using bilinear interpolation.
1705
// stride is in count of uint16_t.
1706
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
1707
static void ScalePlaneUp2_12_Bilinear(int src_width,
1708
                                      int src_height,
1709
                                      int dst_width,
1710
                                      int dst_height,
1711
                                      int src_stride,
1712
                                      int dst_stride,
1713
                                      const uint16_t* src_ptr,
1714
357
                                      uint16_t* dst_ptr) {
1715
357
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1716
357
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1717
357
      ScaleRowUp2_Bilinear_16_Any_C;
1718
357
  int x;
1719
1720
357
  (void)src_width;
1721
  // This function can only scale up by 2 times.
1722
357
  assert(src_width == ((dst_width + 1) / 2));
1723
357
  assert(src_height == ((dst_height + 1) / 2));
1724
1725
357
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
1726
357
  if (TestCpuFlag(kCpuHasSSSE3)) {
1727
357
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
1728
357
  }
1729
357
#endif
1730
1731
357
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
1732
357
  if (TestCpuFlag(kCpuHasAVX2)) {
1733
357
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
1734
357
  }
1735
357
#endif
1736
1737
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
1738
  if (TestCpuFlag(kCpuHasNEON)) {
1739
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
1740
  }
1741
#endif
1742
1743
357
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1744
357
  dst_ptr += dst_stride;
1745
16.1k
  for (x = 0; x < src_height - 1; ++x) {
1746
15.8k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1747
15.8k
    src_ptr += src_stride;
1748
15.8k
    dst_ptr += 2 * dst_stride;
1749
15.8k
  }
1750
357
  if (!(dst_height & 1)) {
1751
115
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1752
115
  }
1753
357
}
1754
1755
static void ScalePlaneUp2_16_Linear(int src_width,
1756
                                    int src_height,
1757
                                    int dst_width,
1758
                                    int dst_height,
1759
                                    int src_stride,
1760
                                    int dst_stride,
1761
                                    const uint16_t* src_ptr,
1762
0
                                    uint16_t* dst_ptr) {
1763
0
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1764
0
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1765
0
  int i;
1766
0
  int y;
1767
0
  int dy;
1768
1769
0
  (void)src_width;
1770
  // This function can only scale up by 2 times horizontally.
1771
0
  assert(src_width == ((dst_width + 1) / 2));
1772
1773
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
1774
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1775
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
1776
0
  }
1777
0
#endif
1778
1779
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
1780
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1781
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
1782
0
  }
1783
0
#endif
1784
1785
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
1786
  if (TestCpuFlag(kCpuHasNEON)) {
1787
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
1788
  }
1789
#endif
1790
1791
0
  if (dst_height == 1) {
1792
0
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1793
0
               dst_width);
1794
0
  } else {
1795
0
    dy = FixedDiv(src_height - 1, dst_height - 1);
1796
0
    y = (1 << 15) - 1;
1797
0
    for (i = 0; i < dst_height; ++i) {
1798
0
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1799
0
      dst_ptr += dst_stride;
1800
0
      y += dy;
1801
0
    }
1802
0
  }
1803
0
}
1804
1805
static void ScalePlaneUp2_16_Bilinear(int src_width,
1806
                                      int src_height,
1807
                                      int dst_width,
1808
                                      int dst_height,
1809
                                      int src_stride,
1810
                                      int dst_stride,
1811
                                      const uint16_t* src_ptr,
1812
0
                                      uint16_t* dst_ptr) {
1813
0
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1814
0
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1815
0
      ScaleRowUp2_Bilinear_16_Any_C;
1816
0
  int x;
1817
1818
0
  (void)src_width;
1819
  // This function can only scale up by 2 times.
1820
0
  assert(src_width == ((dst_width + 1) / 2));
1821
0
  assert(src_height == ((dst_height + 1) / 2));
1822
1823
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
1824
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1825
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
1826
0
  }
1827
0
#endif
1828
1829
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
1830
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1831
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
1832
0
  }
1833
0
#endif
1834
1835
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
1836
  if (TestCpuFlag(kCpuHasNEON)) {
1837
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
1838
  }
1839
#endif
1840
1841
0
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1842
0
  dst_ptr += dst_stride;
1843
0
  for (x = 0; x < src_height - 1; ++x) {
1844
0
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1845
0
    src_ptr += src_stride;
1846
0
    dst_ptr += 2 * dst_stride;
1847
0
  }
1848
0
  if (!(dst_height & 1)) {
1849
0
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1850
0
  }
1851
0
}
1852
1853
static int ScalePlaneBilinearUp_16(int src_width,
1854
                                   int src_height,
1855
                                   int dst_width,
1856
                                   int dst_height,
1857
                                   int src_stride,
1858
                                   int dst_stride,
1859
                                   const uint16_t* src_ptr,
1860
                                   uint16_t* dst_ptr,
1861
4.43k
                                   enum FilterMode filtering) {
1862
4.43k
  int j;
1863
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1864
4.43k
  int x = 0;
1865
4.43k
  int y = 0;
1866
4.43k
  int dx = 0;
1867
4.43k
  int dy = 0;
1868
4.43k
  const int max_y = (src_height - 1) << 16;
1869
4.43k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1870
4.43k
                         ptrdiff_t src_stride, int dst_width,
1871
4.43k
                         int source_y_fraction) = InterpolateRow_16_C;
1872
4.43k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1873
4.43k
                          int dst_width, int x, int dx) =
1874
4.43k
      filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1875
4.43k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1876
4.43k
             &dx, &dy);
1877
4.43k
  src_width = Abs(src_width);
1878
1879
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1880
  if (TestCpuFlag(kCpuHasSSE2)) {
1881
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1882
    if (IS_ALIGNED(dst_width, 16)) {
1883
      InterpolateRow = InterpolateRow_16_SSE2;
1884
    }
1885
  }
1886
#endif
1887
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1888
  if (TestCpuFlag(kCpuHasSSSE3)) {
1889
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1890
    if (IS_ALIGNED(dst_width, 16)) {
1891
      InterpolateRow = InterpolateRow_16_SSSE3;
1892
    }
1893
  }
1894
#endif
1895
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1896
  if (TestCpuFlag(kCpuHasAVX2)) {
1897
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1898
    if (IS_ALIGNED(dst_width, 32)) {
1899
      InterpolateRow = InterpolateRow_16_AVX2;
1900
    }
1901
  }
1902
#endif
1903
#if defined(HAS_INTERPOLATEROW_16_NEON)
1904
  if (TestCpuFlag(kCpuHasNEON)) {
1905
    InterpolateRow = InterpolateRow_16_Any_NEON;
1906
    if (IS_ALIGNED(dst_width, 16)) {
1907
      InterpolateRow = InterpolateRow_16_NEON;
1908
    }
1909
  }
1910
#endif
1911
#if defined(HAS_INTERPOLATEROW_16_SME)
1912
  if (TestCpuFlag(kCpuHasSME)) {
1913
    InterpolateRow = InterpolateRow_16_SME;
1914
  }
1915
#endif
1916
1917
4.43k
  if (filtering && src_width >= 32768) {
1918
0
    ScaleFilterCols = ScaleFilterCols64_16_C;
1919
0
  }
1920
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1921
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1922
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1923
  }
1924
#endif
1925
4.43k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1926
0
    ScaleFilterCols = ScaleColsUp2_16_C;
1927
#if defined(HAS_SCALECOLS_16_SSE2)
1928
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1929
      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1930
    }
1931
#endif
1932
0
  }
1933
4.43k
  if (y > max_y) {
1934
651
    y = max_y;
1935
651
  }
1936
4.43k
  {
1937
4.43k
    int yi = y >> 16;
1938
4.43k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1939
1940
    // Allocate 2 row buffers.
1941
4.43k
    const int row_size = (dst_width + 31) & ~31;
1942
4.43k
    align_buffer_64(row, row_size * 4);
1943
4.43k
    int rowstride = row_size;
1944
4.43k
    int lasty = yi;
1945
4.43k
    uint16_t* rowptr = (uint16_t*)row;
1946
4.43k
    if (!row)
1947
0
      return 1;
1948
1949
4.43k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1950
4.43k
    if (src_height > 1) {
1951
3.78k
      src += src_stride;
1952
3.78k
    }
1953
4.43k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1954
4.43k
    if (src_height > 2) {
1955
2.95k
      src += src_stride;
1956
2.95k
    }
1957
1958
2.45M
    for (j = 0; j < dst_height; ++j) {
1959
2.44M
      yi = y >> 16;
1960
2.44M
      if (yi != lasty) {
1961
193k
        if (y > max_y) {
1962
0
          y = max_y;
1963
0
          yi = y >> 16;
1964
0
          src = src_ptr + yi * (int64_t)src_stride;
1965
0
        }
1966
193k
        if (yi != lasty) {
1967
193k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1968
193k
          rowptr += rowstride;
1969
193k
          rowstride = -rowstride;
1970
193k
          lasty = yi;
1971
193k
          if ((y + 65536) < max_y) {
1972
190k
            src += src_stride;
1973
190k
          }
1974
193k
        }
1975
193k
      }
1976
2.44M
      if (filtering == kFilterLinear) {
1977
249k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1978
2.19M
      } else {
1979
2.19M
        int yf = (y >> 8) & 255;
1980
2.19M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1981
2.19M
      }
1982
2.44M
      dst_ptr += dst_stride;
1983
2.44M
      y += dy;
1984
2.44M
    }
1985
4.43k
    free_aligned_buffer_64(row);
1986
4.43k
  }
1987
0
  return 0;
1988
4.43k
}
1989
1990
// Scale Plane to/from any dimensions, without interpolation.
1991
// Fixed point math is used for performance: The upper 16 bits
1992
// of x and dx is the integer part of the source position and
1993
// the lower 16 bits are the fixed decimal part.
1994
1995
static void ScalePlaneSimple(int src_width,
1996
                             int src_height,
1997
                             int dst_width,
1998
                             int dst_height,
1999
                             int src_stride,
2000
                             int dst_stride,
2001
                             const uint8_t* src_ptr,
2002
988
                             uint8_t* dst_ptr) {
2003
988
  int i;
2004
988
  void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
2005
988
                    int x, int dx) = ScaleCols_C;
2006
  // Initial source x/y coordinate and step values as 16.16 fixed point.
2007
988
  int x = 0;
2008
988
  int y = 0;
2009
988
  int dx = 0;
2010
988
  int dy = 0;
2011
988
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
2012
988
             &dx, &dy);
2013
988
  src_width = Abs(src_width);
2014
2015
988
  if (src_width * 2 == dst_width && x < 0x8000) {
2016
62
    ScaleCols = ScaleColsUp2_C;
2017
#if defined(HAS_SCALECOLS_SSE2)
2018
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
2019
      ScaleCols = ScaleColsUp2_SSE2;
2020
    }
2021
#endif
2022
62
  }
2023
2024
648k
  for (i = 0; i < dst_height; ++i) {
2025
647k
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
2026
647k
              dx);
2027
647k
    dst_ptr += dst_stride;
2028
647k
    y += dy;
2029
647k
  }
2030
988
}
2031
2032
static void ScalePlaneSimple_16(int src_width,
2033
                                int src_height,
2034
                                int dst_width,
2035
                                int dst_height,
2036
                                int src_stride,
2037
                                int dst_stride,
2038
                                const uint16_t* src_ptr,
2039
844
                                uint16_t* dst_ptr) {
2040
844
  int i;
2041
844
  void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
2042
844
                    int x, int dx) = ScaleCols_16_C;
2043
  // Initial source x/y coordinate and step values as 16.16 fixed point.
2044
844
  int x = 0;
2045
844
  int y = 0;
2046
844
  int dx = 0;
2047
844
  int dy = 0;
2048
844
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
2049
844
             &dx, &dy);
2050
844
  src_width = Abs(src_width);
2051
2052
844
  if (src_width * 2 == dst_width && x < 0x8000) {
2053
66
    ScaleCols = ScaleColsUp2_16_C;
2054
#if defined(HAS_SCALECOLS_16_SSE2)
2055
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
2056
      ScaleCols = ScaleColsUp2_16_SSE2;
2057
    }
2058
#endif
2059
66
  }
2060
2061
537k
  for (i = 0; i < dst_height; ++i) {
2062
536k
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
2063
536k
              dx);
2064
536k
    dst_ptr += dst_stride;
2065
536k
    y += dy;
2066
536k
  }
2067
844
}
2068
2069
// Scale a plane.
2070
// This function dispatches to a specialized scaler based on scale factor.
2071
LIBYUV_API
2072
int ScalePlane(const uint8_t* src,
2073
               int src_stride,
2074
               int src_width,
2075
               int src_height,
2076
               uint8_t* dst,
2077
               int dst_stride,
2078
               int dst_width,
2079
               int dst_height,
2080
16.6k
               enum FilterMode filtering) {
2081
  // Simplify filtering when possible.
2082
16.6k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2083
16.6k
                                filtering);
2084
2085
  // Negative height means invert the image.
2086
16.6k
  if (src_height < 0) {
2087
0
    src_height = -src_height;
2088
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2089
0
    src_stride = -src_stride;
2090
0
  }
2091
  // Use specialized scales to improve performance for common resolutions.
2092
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
2093
16.6k
  if (dst_width == src_width && dst_height == src_height) {
2094
    // Straight copy.
2095
92
    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
2096
92
    return 0;
2097
92
  }
2098
16.5k
  if (dst_width == src_width && filtering != kFilterBox) {
2099
5.25k
    int dy = 0;
2100
5.25k
    int y = 0;
2101
    // When scaling down, use the center 2 rows to filter.
2102
    // When scaling up, last row of destination uses the last 2 source rows.
2103
5.25k
    if (dst_height <= src_height) {
2104
634
      dy = FixedDiv(src_height, dst_height);
2105
634
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2106
4.61k
    } else if (src_height > 1 && dst_height > 1) {
2107
4.47k
      dy = FixedDiv1(src_height, dst_height);
2108
4.47k
    }
2109
    // Arbitrary scale vertically, but unscaled horizontally.
2110
5.25k
    ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
2111
5.25k
                       dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2112
5.25k
    return 0;
2113
5.25k
  }
2114
11.3k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2115
    // Scale down.
2116
2.55k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2117
      // optimized, 3/4
2118
27
      ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
2119
27
                       dst_stride, src, dst, filtering);
2120
27
      return 0;
2121
27
    }
2122
2.53k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2123
      // optimized, 1/2
2124
65
      ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
2125
65
                      dst_stride, src, dst, filtering);
2126
65
      return 0;
2127
65
    }
2128
    // 3/8 rounded up for odd sized chroma height.
2129
2.46k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2130
      // optimized, 3/8
2131
22
      ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
2132
22
                       dst_stride, src, dst, filtering);
2133
22
      return 0;
2134
22
    }
2135
2.44k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2136
2.44k
        (filtering == kFilterBox || filtering == kFilterNone)) {
2137
      // optimized, 1/4
2138
44
      ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
2139
44
                      dst_stride, src, dst, filtering);
2140
44
      return 0;
2141
44
    }
2142
2.44k
  }
2143
11.1k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2144
1.18k
    return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
2145
1.18k
                         src_stride, dst_stride, src, dst);
2146
1.18k
  }
2147
9.99k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2148
225
    ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
2149
225
                         src_stride, dst_stride, src, dst);
2150
225
    return 0;
2151
225
  }
2152
9.77k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2153
9.77k
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2154
277
    ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
2155
277
                           src_stride, dst_stride, src, dst);
2156
277
    return 0;
2157
277
  }
2158
9.49k
  if (filtering && dst_height > src_height) {
2159
4.75k
    return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
2160
4.75k
                                src_stride, dst_stride, src, dst, filtering);
2161
4.75k
  }
2162
4.74k
  if (filtering) {
2163
3.75k
    return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
2164
3.75k
                                  src_stride, dst_stride, src, dst, filtering);
2165
3.75k
  }
2166
988
  ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
2167
988
                   dst_stride, src, dst);
2168
988
  return 0;
2169
4.74k
}
2170
2171
LIBYUV_API
2172
int ScalePlane_16(const uint16_t* src,
2173
                  int src_stride,
2174
                  int src_width,
2175
                  int src_height,
2176
                  uint16_t* dst,
2177
                  int dst_stride,
2178
                  int dst_width,
2179
                  int dst_height,
2180
14.5k
                  enum FilterMode filtering) {
2181
  // Simplify filtering when possible.
2182
14.5k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2183
14.5k
                                filtering);
2184
2185
  // Negative height means invert the image.
2186
14.5k
  if (src_height < 0) {
2187
0
    src_height = -src_height;
2188
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2189
0
    src_stride = -src_stride;
2190
0
  }
2191
  // Use specialized scales to improve performance for common resolutions.
2192
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
2193
14.5k
  if (dst_width == src_width && dst_height == src_height) {
2194
    // Straight copy.
2195
362
    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
2196
362
    return 0;
2197
362
  }
2198
14.2k
  if (dst_width == src_width && filtering != kFilterBox) {
2199
1.63k
    int dy = 0;
2200
1.63k
    int y = 0;
2201
    // When scaling down, use the center 2 rows to filter.
2202
    // When scaling up, last row of destination uses the last 2 source rows.
2203
1.63k
    if (dst_height <= src_height) {
2204
616
      dy = FixedDiv(src_height, dst_height);
2205
616
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2206
      // When scaling up, ensure the last row of destination uses the last
2207
      // source. Avoid divide by zero for dst_height but will do no scaling
2208
      // later.
2209
1.01k
    } else if (src_height > 1 && dst_height > 1) {
2210
931
      dy = FixedDiv1(src_height, dst_height);
2211
931
    }
2212
    // Arbitrary scale vertically, but unscaled horizontally.
2213
1.63k
    ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
2214
1.63k
                          dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2215
1.63k
    return 0;
2216
1.63k
  }
2217
12.5k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2218
    // Scale down.
2219
4.42k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2220
      // optimized, 3/4
2221
30
      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
2222
30
                          src_stride, dst_stride, src, dst, filtering);
2223
30
      return 0;
2224
30
    }
2225
4.39k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2226
      // optimized, 1/2
2227
74
      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
2228
74
                         src_stride, dst_stride, src, dst, filtering);
2229
74
      return 0;
2230
74
    }
2231
    // 3/8 rounded up for odd sized chroma height.
2232
4.32k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2233
      // optimized, 3/8
2234
24
      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
2235
24
                          src_stride, dst_stride, src, dst, filtering);
2236
24
      return 0;
2237
24
    }
2238
4.30k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2239
4.30k
        (filtering == kFilterBox || filtering == kFilterNone)) {
2240
      // optimized, 1/4
2241
38
      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
2242
38
                         src_stride, dst_stride, src, dst, filtering);
2243
38
      return 0;
2244
38
    }
2245
4.30k
  }
2246
12.4k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2247
1.25k
    return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
2248
1.25k
                            src_stride, dst_stride, src, dst);
2249
1.25k
  }
2250
11.1k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2251
0
    ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
2252
0
                            src_stride, dst_stride, src, dst);
2253
0
    return 0;
2254
0
  }
2255
11.1k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2256
11.1k
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2257
0
    ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
2258
0
                              src_stride, dst_stride, src, dst);
2259
0
    return 0;
2260
0
  }
2261
11.1k
  if (filtering && dst_height > src_height) {
2262
4.43k
    return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
2263
4.43k
                                   src_stride, dst_stride, src, dst, filtering);
2264
4.43k
  }
2265
6.74k
  if (filtering) {
2266
5.89k
    return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
2267
5.89k
                                     dst_height, src_stride, dst_stride, src,
2268
5.89k
                                     dst, filtering);
2269
5.89k
  }
2270
844
  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
2271
844
                      dst_stride, src, dst);
2272
844
  return 0;
2273
6.74k
}
2274
2275
LIBYUV_API
2276
int ScalePlane_12(const uint16_t* src,
2277
                  int src_stride,
2278
                  int src_width,
2279
                  int src_height,
2280
                  uint16_t* dst,
2281
                  int dst_stride,
2282
                  int dst_width,
2283
                  int dst_height,
2284
15.2k
                  enum FilterMode filtering) {
2285
  // Simplify filtering when possible.
2286
15.2k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2287
15.2k
                                filtering);
2288
2289
  // Negative height means invert the image.
2290
15.2k
  if (src_height < 0) {
2291
0
    src_height = -src_height;
2292
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2293
0
    src_stride = -src_stride;
2294
0
  }
2295
2296
15.2k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2297
272
    ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
2298
272
                            src_stride, dst_stride, src, dst);
2299
272
    return 0;
2300
272
  }
2301
14.9k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2302
14.9k
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2303
357
    ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
2304
357
                              src_stride, dst_stride, src, dst);
2305
357
    return 0;
2306
357
  }
2307
2308
14.5k
  return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
2309
14.5k
                       dst_width, dst_height, filtering);
2310
14.9k
}
2311
2312
// Scale an I420 image.
2313
// This function in turn calls a scaling function for each plane.
2314
2315
LIBYUV_API
2316
int I420Scale(const uint8_t* src_y,
2317
              int src_stride_y,
2318
              const uint8_t* src_u,
2319
              int src_stride_u,
2320
              const uint8_t* src_v,
2321
              int src_stride_v,
2322
              int src_width,
2323
              int src_height,
2324
              uint8_t* dst_y,
2325
              int dst_stride_y,
2326
              uint8_t* dst_u,
2327
              int dst_stride_u,
2328
              uint8_t* dst_v,
2329
              int dst_stride_v,
2330
              int dst_width,
2331
              int dst_height,
2332
0
              enum FilterMode filtering) {
2333
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2334
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2335
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2336
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2337
0
  int r;
2338
2339
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2340
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2341
0
      dst_width <= 0 || dst_height <= 0) {
2342
0
    return -1;
2343
0
  }
2344
2345
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2346
0
                 dst_stride_y, dst_width, dst_height, filtering);
2347
0
  if (r != 0) {
2348
0
    return r;
2349
0
  }
2350
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2351
0
                 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2352
0
  if (r != 0) {
2353
0
    return r;
2354
0
  }
2355
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2356
0
                 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2357
0
  return r;
2358
0
}
2359
2360
LIBYUV_API
2361
int I420Scale_16(const uint16_t* src_y,
2362
                 int src_stride_y,
2363
                 const uint16_t* src_u,
2364
                 int src_stride_u,
2365
                 const uint16_t* src_v,
2366
                 int src_stride_v,
2367
                 int src_width,
2368
                 int src_height,
2369
                 uint16_t* dst_y,
2370
                 int dst_stride_y,
2371
                 uint16_t* dst_u,
2372
                 int dst_stride_u,
2373
                 uint16_t* dst_v,
2374
                 int dst_stride_v,
2375
                 int dst_width,
2376
                 int dst_height,
2377
0
                 enum FilterMode filtering) {
2378
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2379
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2380
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2381
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2382
0
  int r;
2383
2384
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2385
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2386
0
      dst_width <= 0 || dst_height <= 0) {
2387
0
    return -1;
2388
0
  }
2389
2390
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2391
0
                    dst_stride_y, dst_width, dst_height, filtering);
2392
0
  if (r != 0) {
2393
0
    return r;
2394
0
  }
2395
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2396
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2397
0
  if (r != 0) {
2398
0
    return r;
2399
0
  }
2400
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2401
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2402
0
  return r;
2403
0
}
2404
2405
LIBYUV_API
2406
int I420Scale_12(const uint16_t* src_y,
2407
                 int src_stride_y,
2408
                 const uint16_t* src_u,
2409
                 int src_stride_u,
2410
                 const uint16_t* src_v,
2411
                 int src_stride_v,
2412
                 int src_width,
2413
                 int src_height,
2414
                 uint16_t* dst_y,
2415
                 int dst_stride_y,
2416
                 uint16_t* dst_u,
2417
                 int dst_stride_u,
2418
                 uint16_t* dst_v,
2419
                 int dst_stride_v,
2420
                 int dst_width,
2421
                 int dst_height,
2422
0
                 enum FilterMode filtering) {
2423
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2424
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2425
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2426
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2427
0
  int r;
2428
2429
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2430
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2431
0
      dst_width <= 0 || dst_height <= 0) {
2432
0
    return -1;
2433
0
  }
2434
2435
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2436
0
                    dst_stride_y, dst_width, dst_height, filtering);
2437
0
  if (r != 0) {
2438
0
    return r;
2439
0
  }
2440
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2441
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2442
0
  if (r != 0) {
2443
0
    return r;
2444
0
  }
2445
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2446
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2447
0
  return r;
2448
0
}
2449
2450
// Scale an I444 image.
2451
// This function in turn calls a scaling function for each plane.
2452
2453
LIBYUV_API
2454
int I444Scale(const uint8_t* src_y,
2455
              int src_stride_y,
2456
              const uint8_t* src_u,
2457
              int src_stride_u,
2458
              const uint8_t* src_v,
2459
              int src_stride_v,
2460
              int src_width,
2461
              int src_height,
2462
              uint8_t* dst_y,
2463
              int dst_stride_y,
2464
              uint8_t* dst_u,
2465
              int dst_stride_u,
2466
              uint8_t* dst_v,
2467
              int dst_stride_v,
2468
              int dst_width,
2469
              int dst_height,
2470
0
              enum FilterMode filtering) {
2471
0
  int r;
2472
2473
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2474
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2475
0
      dst_width <= 0 || dst_height <= 0) {
2476
0
    return -1;
2477
0
  }
2478
2479
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2480
0
                 dst_stride_y, dst_width, dst_height, filtering);
2481
0
  if (r != 0) {
2482
0
    return r;
2483
0
  }
2484
0
  r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
2485
0
                 dst_stride_u, dst_width, dst_height, filtering);
2486
0
  if (r != 0) {
2487
0
    return r;
2488
0
  }
2489
0
  r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
2490
0
                 dst_stride_v, dst_width, dst_height, filtering);
2491
0
  return r;
2492
0
}
2493
2494
LIBYUV_API
2495
int I444Scale_16(const uint16_t* src_y,
2496
                 int src_stride_y,
2497
                 const uint16_t* src_u,
2498
                 int src_stride_u,
2499
                 const uint16_t* src_v,
2500
                 int src_stride_v,
2501
                 int src_width,
2502
                 int src_height,
2503
                 uint16_t* dst_y,
2504
                 int dst_stride_y,
2505
                 uint16_t* dst_u,
2506
                 int dst_stride_u,
2507
                 uint16_t* dst_v,
2508
                 int dst_stride_v,
2509
                 int dst_width,
2510
                 int dst_height,
2511
0
                 enum FilterMode filtering) {
2512
0
  int r;
2513
2514
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2515
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2516
0
      dst_width <= 0 || dst_height <= 0) {
2517
0
    return -1;
2518
0
  }
2519
2520
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2521
0
                    dst_stride_y, dst_width, dst_height, filtering);
2522
0
  if (r != 0) {
2523
0
    return r;
2524
0
  }
2525
0
  r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
2526
0
                    dst_stride_u, dst_width, dst_height, filtering);
2527
0
  if (r != 0) {
2528
0
    return r;
2529
0
  }
2530
0
  r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
2531
0
                    dst_stride_v, dst_width, dst_height, filtering);
2532
0
  return r;
2533
0
}
2534
2535
LIBYUV_API
2536
int I444Scale_12(const uint16_t* src_y,
2537
                 int src_stride_y,
2538
                 const uint16_t* src_u,
2539
                 int src_stride_u,
2540
                 const uint16_t* src_v,
2541
                 int src_stride_v,
2542
                 int src_width,
2543
                 int src_height,
2544
                 uint16_t* dst_y,
2545
                 int dst_stride_y,
2546
                 uint16_t* dst_u,
2547
                 int dst_stride_u,
2548
                 uint16_t* dst_v,
2549
                 int dst_stride_v,
2550
                 int dst_width,
2551
                 int dst_height,
2552
0
                 enum FilterMode filtering) {
2553
0
  int r;
2554
2555
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2556
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2557
0
      dst_width <= 0 || dst_height <= 0) {
2558
0
    return -1;
2559
0
  }
2560
2561
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2562
0
                    dst_stride_y, dst_width, dst_height, filtering);
2563
0
  if (r != 0) {
2564
0
    return r;
2565
0
  }
2566
0
  r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
2567
0
                    dst_stride_u, dst_width, dst_height, filtering);
2568
0
  if (r != 0) {
2569
0
    return r;
2570
0
  }
2571
0
  r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
2572
0
                    dst_stride_v, dst_width, dst_height, filtering);
2573
0
  return r;
2574
0
}
2575
2576
// Scale an I422 image.
2577
// This function in turn calls a scaling function for each plane.
2578
2579
LIBYUV_API
2580
int I422Scale(const uint8_t* src_y,
2581
              int src_stride_y,
2582
              const uint8_t* src_u,
2583
              int src_stride_u,
2584
              const uint8_t* src_v,
2585
              int src_stride_v,
2586
              int src_width,
2587
              int src_height,
2588
              uint8_t* dst_y,
2589
              int dst_stride_y,
2590
              uint8_t* dst_u,
2591
              int dst_stride_u,
2592
              uint8_t* dst_v,
2593
              int dst_stride_v,
2594
              int dst_width,
2595
              int dst_height,
2596
0
              enum FilterMode filtering) {
2597
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2598
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2599
0
  int r;
2600
2601
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2602
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2603
0
      dst_width <= 0 || dst_height <= 0) {
2604
0
    return -1;
2605
0
  }
2606
2607
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2608
0
                 dst_stride_y, dst_width, dst_height, filtering);
2609
0
  if (r != 0) {
2610
0
    return r;
2611
0
  }
2612
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2613
0
                 dst_stride_u, dst_halfwidth, dst_height, filtering);
2614
0
  if (r != 0) {
2615
0
    return r;
2616
0
  }
2617
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2618
0
                 dst_stride_v, dst_halfwidth, dst_height, filtering);
2619
0
  return r;
2620
0
}
2621
2622
LIBYUV_API
2623
int I422Scale_16(const uint16_t* src_y,
2624
                 int src_stride_y,
2625
                 const uint16_t* src_u,
2626
                 int src_stride_u,
2627
                 const uint16_t* src_v,
2628
                 int src_stride_v,
2629
                 int src_width,
2630
                 int src_height,
2631
                 uint16_t* dst_y,
2632
                 int dst_stride_y,
2633
                 uint16_t* dst_u,
2634
                 int dst_stride_u,
2635
                 uint16_t* dst_v,
2636
                 int dst_stride_v,
2637
                 int dst_width,
2638
                 int dst_height,
2639
0
                 enum FilterMode filtering) {
2640
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2641
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2642
0
  int r;
2643
2644
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2645
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2646
0
      dst_width <= 0 || dst_height <= 0) {
2647
0
    return -1;
2648
0
  }
2649
2650
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2651
0
                    dst_stride_y, dst_width, dst_height, filtering);
2652
0
  if (r != 0) {
2653
0
    return r;
2654
0
  }
2655
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2656
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2657
0
  if (r != 0) {
2658
0
    return r;
2659
0
  }
2660
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2661
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2662
0
  return r;
2663
0
}
2664
2665
LIBYUV_API
2666
int I422Scale_12(const uint16_t* src_y,
2667
                 int src_stride_y,
2668
                 const uint16_t* src_u,
2669
                 int src_stride_u,
2670
                 const uint16_t* src_v,
2671
                 int src_stride_v,
2672
                 int src_width,
2673
                 int src_height,
2674
                 uint16_t* dst_y,
2675
                 int dst_stride_y,
2676
                 uint16_t* dst_u,
2677
                 int dst_stride_u,
2678
                 uint16_t* dst_v,
2679
                 int dst_stride_v,
2680
                 int dst_width,
2681
                 int dst_height,
2682
0
                 enum FilterMode filtering) {
2683
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2684
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2685
0
  int r;
2686
2687
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2688
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2689
0
      dst_width <= 0 || dst_height <= 0) {
2690
0
    return -1;
2691
0
  }
2692
2693
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2694
0
                    dst_stride_y, dst_width, dst_height, filtering);
2695
0
  if (r != 0) {
2696
0
    return r;
2697
0
  }
2698
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2699
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2700
0
  if (r != 0) {
2701
0
    return r;
2702
0
  }
2703
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2704
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2705
0
  return r;
2706
0
}
2707
2708
// Scale an NV12 image.
2709
// This function in turn calls a scaling function for each plane.
2710
2711
LIBYUV_API
2712
int NV12Scale(const uint8_t* src_y,
2713
              int src_stride_y,
2714
              const uint8_t* src_uv,
2715
              int src_stride_uv,
2716
              int src_width,
2717
              int src_height,
2718
              uint8_t* dst_y,
2719
              int dst_stride_y,
2720
              uint8_t* dst_uv,
2721
              int dst_stride_uv,
2722
              int dst_width,
2723
              int dst_height,
2724
0
              enum FilterMode filtering) {
2725
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2726
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2727
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2728
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2729
0
  int r;
2730
2731
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2732
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2733
0
      dst_width <= 0 || dst_height <= 0) {
2734
0
    return -1;
2735
0
  }
2736
2737
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2738
0
                 dst_stride_y, dst_width, dst_height, filtering);
2739
0
  if (r != 0) {
2740
0
    return r;
2741
0
  }
2742
0
  r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
2743
0
              dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
2744
0
  return r;
2745
0
}
2746
2747
LIBYUV_API
2748
int NV24Scale(const uint8_t* src_y,
2749
              int src_stride_y,
2750
              const uint8_t* src_uv,
2751
              int src_stride_uv,
2752
              int src_width,
2753
              int src_height,
2754
              uint8_t* dst_y,
2755
              int dst_stride_y,
2756
              uint8_t* dst_uv,
2757
              int dst_stride_uv,
2758
              int dst_width,
2759
              int dst_height,
2760
0
              enum FilterMode filtering) {
2761
0
  int r;
2762
2763
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2764
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2765
0
      dst_width <= 0 || dst_height <= 0) {
2766
0
    return -1;
2767
0
  }
2768
2769
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2770
0
                 dst_stride_y, dst_width, dst_height, filtering);
2771
0
  if (r != 0) {
2772
0
    return r;
2773
0
  }
2774
0
  r = UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv,
2775
0
              dst_stride_uv, dst_width, dst_height, filtering);
2776
0
  return r;
2777
0
}
2778
2779
// Deprecated api
2780
LIBYUV_API
2781
int Scale(const uint8_t* src_y,
2782
          const uint8_t* src_u,
2783
          const uint8_t* src_v,
2784
          int src_stride_y,
2785
          int src_stride_u,
2786
          int src_stride_v,
2787
          int src_width,
2788
          int src_height,
2789
          uint8_t* dst_y,
2790
          uint8_t* dst_u,
2791
          uint8_t* dst_v,
2792
          int dst_stride_y,
2793
          int dst_stride_u,
2794
          int dst_stride_v,
2795
          int dst_width,
2796
          int dst_height,
2797
0
          LIBYUV_BOOL interpolate) {
2798
0
  return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
2799
0
                   src_stride_v, src_width, src_height, dst_y, dst_stride_y,
2800
0
                   dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
2801
0
                   dst_height, interpolate ? kFilterBox : kFilterNone);
2802
0
}
2803
2804
#ifdef __cplusplus
2805
}  // extern "C"
2806
}  // namespace libyuv
2807
#endif