Coverage Report

Created: 2025-10-10 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/scale.cc
Line
Count
Source
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyPlane
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
#include "libyuv/scale_uv.h"  // For UVScale
21
22
#ifdef __cplusplus
23
namespace libyuv {
24
extern "C" {
25
#endif
26
27
62.5k
static __inline int Abs(int v) {
28
62.5k
  return v >= 0 ? v : -v;
29
62.5k
}
30
31
0
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
32
1.56k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
33
34
// Scale plane, 1/2
35
// This is an optimized version for scaling down a plane to 1/2 of
36
// its original size.
37
38
static void ScalePlaneDown2(int src_width,
39
                            int src_height,
40
                            int dst_width,
41
                            int dst_height,
42
                            int src_stride,
43
                            int dst_stride,
44
                            const uint8_t* src_ptr,
45
                            uint8_t* dst_ptr,
46
89
                            enum FilterMode filtering) {
47
89
  int y;
48
89
  void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
49
89
                        uint8_t* dst_ptr, int dst_width) =
50
89
      filtering == kFilterNone
51
89
          ? ScaleRowDown2_C
52
89
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
53
89
                                        : ScaleRowDown2Box_C);
54
89
  int row_stride = src_stride * 2;
55
89
  (void)src_width;
56
89
  (void)src_height;
57
89
  if (!filtering) {
58
0
    src_ptr += src_stride;  // Point to odd rows.
59
0
    src_stride = 0;
60
0
  }
61
62
#if defined(HAS_SCALEROWDOWN2_NEON)
63
  if (TestCpuFlag(kCpuHasNEON)) {
64
    ScaleRowDown2 =
65
        filtering == kFilterNone
66
            ? ScaleRowDown2_Any_NEON
67
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
68
                                          : ScaleRowDown2Box_Any_NEON);
69
    if (IS_ALIGNED(dst_width, 16)) {
70
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
71
                                               : (filtering == kFilterLinear
72
                                                      ? ScaleRowDown2Linear_NEON
73
                                                      : ScaleRowDown2Box_NEON);
74
    }
75
  }
76
#endif
77
#if defined(HAS_SCALEROWDOWN2_SME)
78
  if (TestCpuFlag(kCpuHasSME)) {
79
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_SME
80
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_SME
81
                                                 : ScaleRowDown2Box_SME;
82
  }
83
#endif
84
89
#if defined(HAS_SCALEROWDOWN2_SSSE3)
85
89
  if (TestCpuFlag(kCpuHasSSSE3)) {
86
89
    ScaleRowDown2 =
87
89
        filtering == kFilterNone
88
89
            ? ScaleRowDown2_Any_SSSE3
89
89
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
90
89
                                          : ScaleRowDown2Box_Any_SSSE3);
91
89
    if (IS_ALIGNED(dst_width, 16)) {
92
20
      ScaleRowDown2 =
93
20
          filtering == kFilterNone
94
20
              ? ScaleRowDown2_SSSE3
95
20
              : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
96
20
                                            : ScaleRowDown2Box_SSSE3);
97
20
    }
98
89
  }
99
89
#endif
100
89
#if defined(HAS_SCALEROWDOWN2_AVX2)
101
89
  if (TestCpuFlag(kCpuHasAVX2)) {
102
89
    ScaleRowDown2 =
103
89
        filtering == kFilterNone
104
89
            ? ScaleRowDown2_Any_AVX2
105
89
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
106
89
                                          : ScaleRowDown2Box_Any_AVX2);
107
89
    if (IS_ALIGNED(dst_width, 32)) {
108
16
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
109
16
                                               : (filtering == kFilterLinear
110
16
                                                      ? ScaleRowDown2Linear_AVX2
111
16
                                                      : ScaleRowDown2Box_AVX2);
112
16
    }
113
89
  }
114
89
#endif
115
#if defined(HAS_SCALEROWDOWN2_MSA)
116
  if (TestCpuFlag(kCpuHasMSA)) {
117
    ScaleRowDown2 =
118
        filtering == kFilterNone
119
            ? ScaleRowDown2_Any_MSA
120
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
121
                                          : ScaleRowDown2Box_Any_MSA);
122
    if (IS_ALIGNED(dst_width, 32)) {
123
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
124
                                               : (filtering == kFilterLinear
125
                                                      ? ScaleRowDown2Linear_MSA
126
                                                      : ScaleRowDown2Box_MSA);
127
    }
128
  }
129
#endif
130
#if defined(HAS_SCALEROWDOWN2_LSX)
131
  if (TestCpuFlag(kCpuHasLSX)) {
132
    ScaleRowDown2 =
133
        filtering == kFilterNone
134
            ? ScaleRowDown2_Any_LSX
135
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX
136
                                          : ScaleRowDown2Box_Any_LSX);
137
    if (IS_ALIGNED(dst_width, 32)) {
138
      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX
139
                                               : (filtering == kFilterLinear
140
                                                      ? ScaleRowDown2Linear_LSX
141
                                                      : ScaleRowDown2Box_LSX);
142
    }
143
  }
144
#endif
145
#if defined(HAS_SCALEROWDOWN2_RVV)
146
  if (TestCpuFlag(kCpuHasRVV)) {
147
    ScaleRowDown2 = filtering == kFilterNone
148
                        ? ScaleRowDown2_RVV
149
                        : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
150
                                                      : ScaleRowDown2Box_RVV);
151
  }
152
#endif
153
154
89
  if (filtering == kFilterLinear) {
155
0
    src_stride = 0;
156
0
  }
157
  // TODO(fbarchard): Loop through source height to allow odd height.
158
1.26k
  for (y = 0; y < dst_height; ++y) {
159
1.17k
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
160
1.17k
    src_ptr += row_stride;
161
1.17k
    dst_ptr += dst_stride;
162
1.17k
  }
163
89
}
164
165
static void ScalePlaneDown2_16(int src_width,
166
                               int src_height,
167
                               int dst_width,
168
                               int dst_height,
169
                               int src_stride,
170
                               int dst_stride,
171
                               const uint16_t* src_ptr,
172
                               uint16_t* dst_ptr,
173
87
                               enum FilterMode filtering) {
174
87
  int y;
175
87
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
176
87
                        uint16_t* dst_ptr, int dst_width) =
177
87
      filtering == kFilterNone
178
87
          ? ScaleRowDown2_16_C
179
87
          : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
180
87
                                        : ScaleRowDown2Box_16_C);
181
87
  int row_stride = src_stride * 2;
182
87
  (void)src_width;
183
87
  (void)src_height;
184
87
  if (!filtering) {
185
0
    src_ptr += src_stride;  // Point to odd rows.
186
0
    src_stride = 0;
187
0
  }
188
189
#if defined(HAS_SCALEROWDOWN2_16_NEON)
190
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
191
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_NEON
192
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_NEON
193
                                                 : ScaleRowDown2Box_16_NEON;
194
  }
195
#endif
196
#if defined(HAS_SCALEROWDOWN2_16_SME)
197
  if (TestCpuFlag(kCpuHasSME)) {
198
    ScaleRowDown2 = filtering == kFilterNone     ? ScaleRowDown2_16_SME
199
                    : filtering == kFilterLinear ? ScaleRowDown2Linear_16_SME
200
                                                 : ScaleRowDown2Box_16_SME;
201
  }
202
#endif
203
#if defined(HAS_SCALEROWDOWN2_16_SSE2)
204
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
205
    ScaleRowDown2 =
206
        filtering == kFilterNone
207
            ? ScaleRowDown2_16_SSE2
208
            : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
209
                                          : ScaleRowDown2Box_16_SSE2);
210
  }
211
#endif
212
213
87
  if (filtering == kFilterLinear) {
214
0
    src_stride = 0;
215
0
  }
216
  // TODO(fbarchard): Loop through source height to allow odd height.
217
1.44k
  for (y = 0; y < dst_height; ++y) {
218
1.35k
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
219
1.35k
    src_ptr += row_stride;
220
1.35k
    dst_ptr += dst_stride;
221
1.35k
  }
222
87
}
223
224
void ScalePlaneDown2_16To8(int src_width,
225
                           int src_height,
226
                           int dst_width,
227
                           int dst_height,
228
                           int src_stride,
229
                           int dst_stride,
230
                           const uint16_t* src_ptr,
231
                           uint8_t* dst_ptr,
232
                           int scale,
233
0
                           enum FilterMode filtering) {
234
0
  int y;
235
0
  void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
236
0
                        uint8_t* dst_ptr, int dst_width, int scale) =
237
0
      (src_width & 1)
238
0
          ? (filtering == kFilterNone
239
0
                 ? ScaleRowDown2_16To8_Odd_C
240
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
241
0
                                               : ScaleRowDown2Box_16To8_Odd_C))
242
0
          : (filtering == kFilterNone
243
0
                 ? ScaleRowDown2_16To8_C
244
0
                 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
245
0
                                               : ScaleRowDown2Box_16To8_C));
246
0
  int row_stride = src_stride * 2;
247
0
  (void)dst_height;
248
0
  if (!filtering) {
249
0
    src_ptr += src_stride;  // Point to odd rows.
250
0
    src_stride = 0;
251
0
  }
252
253
0
  if (filtering == kFilterLinear) {
254
0
    src_stride = 0;
255
0
  }
256
0
  for (y = 0; y < src_height / 2; ++y) {
257
0
    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
258
0
    src_ptr += row_stride;
259
0
    dst_ptr += dst_stride;
260
0
  }
261
0
  if (src_height & 1) {
262
0
    if (!filtering) {
263
0
      src_ptr -= src_stride;  // Point to last row.
264
0
    }
265
0
    ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
266
0
  }
267
0
}
268
269
// Scale plane, 1/4
270
// This is an optimized version for scaling down a plane to 1/4 of
271
// its original size.
272
273
static void ScalePlaneDown4(int src_width,
274
                            int src_height,
275
                            int dst_width,
276
                            int dst_height,
277
                            int src_stride,
278
                            int dst_stride,
279
                            const uint8_t* src_ptr,
280
                            uint8_t* dst_ptr,
281
79
                            enum FilterMode filtering) {
282
79
  int y;
283
79
  void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
284
79
                        uint8_t* dst_ptr, int dst_width) =
285
79
      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
286
79
  int row_stride = src_stride * 4;
287
79
  (void)src_width;
288
79
  (void)src_height;
289
79
  if (!filtering) {
290
0
    src_ptr += src_stride * 2;  // Point to row 2.
291
0
    src_stride = 0;
292
0
  }
293
#if defined(HAS_SCALEROWDOWN4_NEON)
294
  if (TestCpuFlag(kCpuHasNEON)) {
295
    ScaleRowDown4 =
296
        filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
297
    if (IS_ALIGNED(dst_width, 16)) {
298
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
299
    }
300
  }
301
#endif
302
79
#if defined(HAS_SCALEROWDOWN4_SSSE3)
303
79
  if (TestCpuFlag(kCpuHasSSSE3)) {
304
79
    ScaleRowDown4 =
305
79
        filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
306
79
    if (IS_ALIGNED(dst_width, 8)) {
307
20
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
308
20
    }
309
79
  }
310
79
#endif
311
79
#if defined(HAS_SCALEROWDOWN4_AVX2)
312
79
  if (TestCpuFlag(kCpuHasAVX2)) {
313
79
    ScaleRowDown4 =
314
79
        filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
315
79
    if (IS_ALIGNED(dst_width, 16)) {
316
8
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
317
8
    }
318
79
  }
319
79
#endif
320
#if defined(HAS_SCALEROWDOWN4_MSA)
321
  if (TestCpuFlag(kCpuHasMSA)) {
322
    ScaleRowDown4 =
323
        filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
324
    if (IS_ALIGNED(dst_width, 16)) {
325
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
326
    }
327
  }
328
#endif
329
#if defined(HAS_SCALEROWDOWN4_LSX)
330
  if (TestCpuFlag(kCpuHasLSX)) {
331
    ScaleRowDown4 =
332
        filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX;
333
    if (IS_ALIGNED(dst_width, 16)) {
334
      ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX;
335
    }
336
  }
337
#endif
338
#if defined(HAS_SCALEROWDOWN4_RVV)
339
  if (TestCpuFlag(kCpuHasRVV)) {
340
    ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
341
  }
342
#endif
343
344
79
  if (filtering == kFilterLinear) {
345
0
    src_stride = 0;
346
0
  }
347
665
  for (y = 0; y < dst_height; ++y) {
348
586
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
349
586
    src_ptr += row_stride;
350
586
    dst_ptr += dst_stride;
351
586
  }
352
79
}
353
354
static void ScalePlaneDown4_16(int src_width,
355
                               int src_height,
356
                               int dst_width,
357
                               int dst_height,
358
                               int src_stride,
359
                               int dst_stride,
360
                               const uint16_t* src_ptr,
361
                               uint16_t* dst_ptr,
362
85
                               enum FilterMode filtering) {
363
85
  int y;
364
85
  void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
365
85
                        uint16_t* dst_ptr, int dst_width) =
366
85
      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
367
85
  int row_stride = src_stride * 4;
368
85
  (void)src_width;
369
85
  (void)src_height;
370
85
  if (!filtering) {
371
0
    src_ptr += src_stride * 2;  // Point to row 2.
372
0
    src_stride = 0;
373
0
  }
374
#if defined(HAS_SCALEROWDOWN4_16_NEON)
375
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
376
    ScaleRowDown4 =
377
        filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
378
  }
379
#endif
380
#if defined(HAS_SCALEROWDOWN4_16_SSE2)
381
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
382
    ScaleRowDown4 =
383
        filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
384
  }
385
#endif
386
387
85
  if (filtering == kFilterLinear) {
388
0
    src_stride = 0;
389
0
  }
390
674
  for (y = 0; y < dst_height; ++y) {
391
589
    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
392
589
    src_ptr += row_stride;
393
589
    dst_ptr += dst_stride;
394
589
  }
395
85
}
396
397
// Scale plane down, 3/4
398
static void ScalePlaneDown34(int src_width,
399
                             int src_height,
400
                             int dst_width,
401
                             int dst_height,
402
                             int src_stride,
403
                             int dst_stride,
404
                             const uint8_t* src_ptr,
405
                             uint8_t* dst_ptr,
406
55
                             enum FilterMode filtering) {
407
55
  int y;
408
55
  void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
409
55
                           uint8_t* dst_ptr, int dst_width);
410
55
  void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
411
55
                           uint8_t* dst_ptr, int dst_width);
412
55
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
413
55
  (void)src_width;
414
55
  (void)src_height;
415
55
  assert(dst_width % 3 == 0);
416
55
  if (!filtering) {
417
0
    ScaleRowDown34_0 = ScaleRowDown34_C;
418
0
    ScaleRowDown34_1 = ScaleRowDown34_C;
419
55
  } else {
420
55
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
421
55
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
422
55
  }
423
#if defined(HAS_SCALEROWDOWN34_NEON)
424
  if (TestCpuFlag(kCpuHasNEON)) {
425
#if defined(__aarch64__)
426
    if (dst_width % 48 == 0) {
427
#else
428
    if (dst_width % 24 == 0) {
429
#endif
430
      if (!filtering) {
431
        ScaleRowDown34_0 = ScaleRowDown34_NEON;
432
        ScaleRowDown34_1 = ScaleRowDown34_NEON;
433
      } else {
434
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
435
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
436
      }
437
    } else {
438
      if (!filtering) {
439
        ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
440
        ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
441
      } else {
442
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
443
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
444
      }
445
    }
446
  }
447
#endif
448
#if defined(HAS_SCALEROWDOWN34_MSA)
449
  if (TestCpuFlag(kCpuHasMSA)) {
450
    if (dst_width % 48 == 0) {
451
      if (!filtering) {
452
        ScaleRowDown34_0 = ScaleRowDown34_MSA;
453
        ScaleRowDown34_1 = ScaleRowDown34_MSA;
454
      } else {
455
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
456
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
457
      }
458
    } else {
459
      if (!filtering) {
460
        ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
461
        ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
462
      } else {
463
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
464
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
465
      }
466
    }
467
  }
468
#endif
469
#if defined(HAS_SCALEROWDOWN34_LSX)
470
  if (TestCpuFlag(kCpuHasLSX)) {
471
    if (dst_width % 48 == 0) {
472
      if (!filtering) {
473
        ScaleRowDown34_0 = ScaleRowDown34_LSX;
474
        ScaleRowDown34_1 = ScaleRowDown34_LSX;
475
      } else {
476
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX;
477
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX;
478
      }
479
    } else {
480
      if (!filtering) {
481
        ScaleRowDown34_0 = ScaleRowDown34_Any_LSX;
482
        ScaleRowDown34_1 = ScaleRowDown34_Any_LSX;
483
      } else {
484
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX;
485
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX;
486
      }
487
    }
488
  }
489
#endif
490
55
#if defined(HAS_SCALEROWDOWN34_SSSE3)
491
55
  if (TestCpuFlag(kCpuHasSSSE3)) {
492
55
    if (dst_width % 24 == 0) {
493
21
      if (!filtering) {
494
0
        ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
495
0
        ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
496
21
      } else {
497
21
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
498
21
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
499
21
      }
500
34
    } else {
501
34
      if (!filtering) {
502
0
        ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
503
0
        ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
504
34
      } else {
505
34
        ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
506
34
        ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
507
34
      }
508
34
    }
509
55
  }
510
55
#endif
511
#if defined(HAS_SCALEROWDOWN34_RVV)
512
  if (TestCpuFlag(kCpuHasRVV)) {
513
    if (!filtering) {
514
      ScaleRowDown34_0 = ScaleRowDown34_RVV;
515
      ScaleRowDown34_1 = ScaleRowDown34_RVV;
516
    } else {
517
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
518
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
519
    }
520
  }
521
#endif
522
523
854
  for (y = 0; y < dst_height - 2; y += 3) {
524
799
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
525
799
    src_ptr += src_stride;
526
799
    dst_ptr += dst_stride;
527
799
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
528
799
    src_ptr += src_stride;
529
799
    dst_ptr += dst_stride;
530
799
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
531
799
    src_ptr += src_stride * 2;
532
799
    dst_ptr += dst_stride;
533
799
  }
534
535
  // Remainder 1 or 2 rows with last row vertically unfiltered
536
55
  if ((dst_height % 3) == 2) {
537
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
538
0
    src_ptr += src_stride;
539
0
    dst_ptr += dst_stride;
540
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
541
55
  } else if ((dst_height % 3) == 1) {
542
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
543
0
  }
544
55
}
545
546
static void ScalePlaneDown34_16(int src_width,
547
                                int src_height,
548
                                int dst_width,
549
                                int dst_height,
550
                                int src_stride,
551
                                int dst_stride,
552
                                const uint16_t* src_ptr,
553
                                uint16_t* dst_ptr,
554
43
                                enum FilterMode filtering) {
555
43
  int y;
556
43
  void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
557
43
                           uint16_t* dst_ptr, int dst_width);
558
43
  void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
559
43
                           uint16_t* dst_ptr, int dst_width);
560
43
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
561
43
  (void)src_width;
562
43
  (void)src_height;
563
43
  assert(dst_width % 3 == 0);
564
43
  if (!filtering) {
565
0
    ScaleRowDown34_0 = ScaleRowDown34_16_C;
566
0
    ScaleRowDown34_1 = ScaleRowDown34_16_C;
567
43
  } else {
568
43
    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
569
43
    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
570
43
  }
571
#if defined(HAS_SCALEROWDOWN34_16_NEON)
572
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
573
    if (!filtering) {
574
      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
575
      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
576
    } else {
577
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
578
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
579
    }
580
  }
581
#endif
582
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
583
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
584
    if (!filtering) {
585
      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
586
      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
587
    } else {
588
      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
589
      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
590
    }
591
  }
592
#endif
593
594
709
  for (y = 0; y < dst_height - 2; y += 3) {
595
666
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
596
666
    src_ptr += src_stride;
597
666
    dst_ptr += dst_stride;
598
666
    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
599
666
    src_ptr += src_stride;
600
666
    dst_ptr += dst_stride;
601
666
    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
602
666
    src_ptr += src_stride * 2;
603
666
    dst_ptr += dst_stride;
604
666
  }
605
606
  // Remainder 1 or 2 rows with last row vertically unfiltered
607
43
  if ((dst_height % 3) == 2) {
608
0
    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
609
0
    src_ptr += src_stride;
610
0
    dst_ptr += dst_stride;
611
0
    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
612
43
  } else if ((dst_height % 3) == 1) {
613
0
    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
614
0
  }
615
43
}
616
617
// Scale plane, 3/8
618
// This is an optimized version for scaling down a plane to 3/8
619
// of its original size.
620
//
621
// Uses box filter arranges like this
622
// aaabbbcc -> abc
623
// aaabbbcc    def
624
// aaabbbcc    ghi
625
// dddeeeff
626
// dddeeeff
627
// dddeeeff
628
// ggghhhii
629
// ggghhhii
630
// Boxes are 3x3, 2x3, 3x2 and 2x2
631
632
static void ScalePlaneDown38(int src_width,
633
                             int src_height,
634
                             int dst_width,
635
                             int dst_height,
636
                             int src_stride,
637
                             int dst_stride,
638
                             const uint8_t* src_ptr,
639
                             uint8_t* dst_ptr,
640
33
                             enum FilterMode filtering) {
641
33
  int y;
642
33
  void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
643
33
                           uint8_t* dst_ptr, int dst_width);
644
33
  void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
645
33
                           uint8_t* dst_ptr, int dst_width);
646
33
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
647
33
  assert(dst_width % 3 == 0);
648
33
  (void)src_width;
649
33
  (void)src_height;
650
33
  if (!filtering) {
651
0
    ScaleRowDown38_3 = ScaleRowDown38_C;
652
0
    ScaleRowDown38_2 = ScaleRowDown38_C;
653
33
  } else {
654
33
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
655
33
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
656
33
  }
657
658
#if defined(HAS_SCALEROWDOWN38_NEON)
659
  if (TestCpuFlag(kCpuHasNEON)) {
660
    if (!filtering) {
661
      ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
662
      ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
663
    } else {
664
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
665
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
666
    }
667
    if (dst_width % 12 == 0) {
668
      if (!filtering) {
669
        ScaleRowDown38_3 = ScaleRowDown38_NEON;
670
        ScaleRowDown38_2 = ScaleRowDown38_NEON;
671
      } else {
672
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
673
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
674
      }
675
    }
676
  }
677
#endif
678
33
#if defined(HAS_SCALEROWDOWN38_SSSE3)
679
33
  if (TestCpuFlag(kCpuHasSSSE3)) {
680
33
    if (!filtering) {
681
0
      ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
682
0
      ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
683
33
    } else {
684
33
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
685
33
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
686
33
    }
687
33
    if (dst_width % 12 == 0 && !filtering) {
688
0
      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
689
0
      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
690
0
    }
691
33
    if (dst_width % 6 == 0 && filtering) {
692
0
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
693
0
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
694
0
    }
695
33
  }
696
33
#endif
697
#if defined(HAS_SCALEROWDOWN38_MSA)
698
  if (TestCpuFlag(kCpuHasMSA)) {
699
    if (!filtering) {
700
      ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
701
      ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
702
    } else {
703
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
704
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
705
    }
706
    if (dst_width % 12 == 0) {
707
      if (!filtering) {
708
        ScaleRowDown38_3 = ScaleRowDown38_MSA;
709
        ScaleRowDown38_2 = ScaleRowDown38_MSA;
710
      } else {
711
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
712
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
713
      }
714
    }
715
  }
716
#endif
717
#if defined(HAS_SCALEROWDOWN38_LSX)
718
  if (TestCpuFlag(kCpuHasLSX)) {
719
    if (!filtering) {
720
      ScaleRowDown38_3 = ScaleRowDown38_Any_LSX;
721
      ScaleRowDown38_2 = ScaleRowDown38_Any_LSX;
722
    } else {
723
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX;
724
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX;
725
    }
726
    if (dst_width % 12 == 0) {
727
      if (!filtering) {
728
        ScaleRowDown38_3 = ScaleRowDown38_LSX;
729
        ScaleRowDown38_2 = ScaleRowDown38_LSX;
730
      } else {
731
        ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX;
732
        ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX;
733
      }
734
    }
735
  }
736
#endif
737
#if defined(HAS_SCALEROWDOWN38_RVV)
738
  if (TestCpuFlag(kCpuHasRVV)) {
739
    if (!filtering) {
740
      ScaleRowDown38_3 = ScaleRowDown38_RVV;
741
      ScaleRowDown38_2 = ScaleRowDown38_RVV;
742
    } else {
743
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
744
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
745
    }
746
  }
747
#endif
748
749
796
  for (y = 0; y < dst_height - 2; y += 3) {
750
763
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
751
763
    src_ptr += src_stride * 3;
752
763
    dst_ptr += dst_stride;
753
763
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
754
763
    src_ptr += src_stride * 3;
755
763
    dst_ptr += dst_stride;
756
763
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
757
763
    src_ptr += src_stride * 2;
758
763
    dst_ptr += dst_stride;
759
763
  }
760
761
  // Remainder 1 or 2 rows with last row vertically unfiltered
762
33
  if ((dst_height % 3) == 2) {
763
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
764
0
    src_ptr += src_stride * 3;
765
0
    dst_ptr += dst_stride;
766
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
767
33
  } else if ((dst_height % 3) == 1) {
768
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
769
0
  }
770
33
}
771
772
static void ScalePlaneDown38_16(int src_width,
773
                                int src_height,
774
                                int dst_width,
775
                                int dst_height,
776
                                int src_stride,
777
                                int dst_stride,
778
                                const uint16_t* src_ptr,
779
                                uint16_t* dst_ptr,
780
32
                                enum FilterMode filtering) {
781
32
  int y;
782
32
  void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
783
32
                           uint16_t* dst_ptr, int dst_width);
784
32
  void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
785
32
                           uint16_t* dst_ptr, int dst_width);
786
32
  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
787
32
  (void)src_width;
788
32
  (void)src_height;
789
32
  assert(dst_width % 3 == 0);
790
32
  if (!filtering) {
791
0
    ScaleRowDown38_3 = ScaleRowDown38_16_C;
792
0
    ScaleRowDown38_2 = ScaleRowDown38_16_C;
793
32
  } else {
794
32
    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
795
32
    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
796
32
  }
797
#if defined(HAS_SCALEROWDOWN38_16_NEON)
798
  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
799
    if (!filtering) {
800
      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
801
      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
802
    } else {
803
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
804
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
805
    }
806
  }
807
#endif
808
#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
809
  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
810
    if (!filtering) {
811
      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
812
      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
813
    } else {
814
      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
815
      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
816
    }
817
  }
818
#endif
819
820
893
  for (y = 0; y < dst_height - 2; y += 3) {
821
861
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
822
861
    src_ptr += src_stride * 3;
823
861
    dst_ptr += dst_stride;
824
861
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
825
861
    src_ptr += src_stride * 3;
826
861
    dst_ptr += dst_stride;
827
861
    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
828
861
    src_ptr += src_stride * 2;
829
861
    dst_ptr += dst_stride;
830
861
  }
831
832
  // Remainder 1 or 2 rows with last row vertically unfiltered
833
32
  if ((dst_height % 3) == 2) {
834
0
    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
835
0
    src_ptr += src_stride * 3;
836
0
    dst_ptr += dst_stride;
837
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
838
32
  } else if ((dst_height % 3) == 1) {
839
0
    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
840
0
  }
841
32
}
842
843
5.83M
#define MIN1(x) ((x) < 1 ? 1 : (x))
844
845
4.66M
static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
846
4.66M
  uint32_t sum = 0u;
847
4.66M
  int x;
848
4.66M
  assert(iboxwidth > 0);
849
26.4M
  for (x = 0; x < iboxwidth; ++x) {
850
21.8M
    sum += src_ptr[x];
851
21.8M
  }
852
4.66M
  return sum;
853
4.66M
}
854
855
4.94M
static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
856
4.94M
  uint32_t sum = 0u;
857
4.94M
  int x;
858
4.94M
  assert(iboxwidth > 0);
859
27.5M
  for (x = 0; x < iboxwidth; ++x) {
860
22.5M
    sum += src_ptr[x];
861
22.5M
  }
862
4.94M
  return sum;
863
4.94M
}
864
865
static void ScaleAddCols2_C(int dst_width,
866
                            int boxheight,
867
                            int x,
868
                            int dx,
869
                            const uint16_t* src_ptr,
870
54.5k
                            uint8_t* dst_ptr) {
871
54.5k
  int i;
872
54.5k
  int scaletbl[2];
873
54.5k
  int minboxwidth = dx >> 16;
874
54.5k
  int boxwidth;
875
54.5k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
876
54.5k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
877
2.48M
  for (i = 0; i < dst_width; ++i) {
878
2.42M
    int ix = x >> 16;
879
2.42M
    x += dx;
880
2.42M
    boxwidth = MIN1((x >> 16) - ix);
881
2.42M
    int scaletbl_index = boxwidth - minboxwidth;
882
2.42M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
883
2.42M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
884
2.42M
                               scaletbl[scaletbl_index] >>
885
2.42M
                           16);
886
2.42M
  }
887
54.5k
}
888
889
static void ScaleAddCols2_16_C(int dst_width,
890
                               int boxheight,
891
                               int x,
892
                               int dx,
893
                               const uint32_t* src_ptr,
894
54.5k
                               uint16_t* dst_ptr) {
895
54.5k
  int i;
896
54.5k
  int scaletbl[2];
897
54.5k
  int minboxwidth = dx >> 16;
898
54.5k
  int boxwidth;
899
54.5k
  scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
900
54.5k
  scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
901
2.98M
  for (i = 0; i < dst_width; ++i) {
902
2.92M
    int ix = x >> 16;
903
2.92M
    x += dx;
904
2.92M
    boxwidth = MIN1((x >> 16) - ix);
905
2.92M
    int scaletbl_index = boxwidth - minboxwidth;
906
2.92M
    assert((scaletbl_index == 0) || (scaletbl_index == 1));
907
2.92M
    *dst_ptr++ =
908
2.92M
        SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
909
2.92M
  }
910
54.5k
}
911
912
static void ScaleAddCols0_C(int dst_width,
913
                            int boxheight,
914
                            int x,
915
                            int dx,
916
                            const uint16_t* src_ptr,
917
0
                            uint8_t* dst_ptr) {
918
0
  int scaleval = 65536 / boxheight;
919
0
  int i;
920
0
  (void)dx;
921
0
  src_ptr += (x >> 16);
922
0
  for (i = 0; i < dst_width; ++i) {
923
0
    *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
924
0
  }
925
0
}
926
927
static void ScaleAddCols1_C(int dst_width,
928
                            int boxheight,
929
                            int x,
930
                            int dx,
931
                            const uint16_t* src_ptr,
932
38.8k
                            uint8_t* dst_ptr) {
933
38.8k
  int boxwidth = MIN1(dx >> 16);
934
38.8k
  int scaleval = 65536 / (boxwidth * boxheight);
935
38.8k
  int i;
936
38.8k
  x >>= 16;
937
2.27M
  for (i = 0; i < dst_width; ++i) {
938
2.23M
    *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
939
2.23M
    x += boxwidth;
940
2.23M
  }
941
38.8k
}
942
943
static void ScaleAddCols1_16_C(int dst_width,
944
                               int boxheight,
945
                               int x,
946
                               int dx,
947
                               const uint32_t* src_ptr,
948
37.4k
                               uint16_t* dst_ptr) {
949
37.4k
  int boxwidth = MIN1(dx >> 16);
950
37.4k
  int scaleval = 65536 / (boxwidth * boxheight);
951
37.4k
  int i;
952
2.05M
  for (i = 0; i < dst_width; ++i) {
953
2.02M
    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
954
2.02M
    x += boxwidth;
955
2.02M
  }
956
37.4k
}
957
958
// Scale plane down to any dimensions, with interpolation.
959
// (boxfilter).
960
//
961
// Same method as SimpleScale, which is fixed point, outputting
962
// one pixel of destination using fixed point (16.16) to step
963
// through source, sampling a box of pixel with simple
964
// averaging.
965
static int ScalePlaneBox(int src_width,
966
                         int src_height,
967
                         int dst_width,
968
                         int dst_height,
969
                         int src_stride,
970
                         int dst_stride,
971
                         const uint8_t* src_ptr,
972
1.62k
                         uint8_t* dst_ptr) {
973
1.62k
  int j, k;
974
  // Initial source x/y coordinate and step values as 16.16 fixed point.
975
1.62k
  int x = 0;
976
1.62k
  int y = 0;
977
1.62k
  int dx = 0;
978
1.62k
  int dy = 0;
979
1.62k
  const int max_y = (src_height << 16);
980
1.62k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
981
1.62k
             &dx, &dy);
982
1.62k
  src_width = Abs(src_width);
983
1.62k
  {
984
    // Allocate a row buffer of uint16_t.
985
1.62k
    align_buffer_64(row16, src_width * 2);
986
1.62k
    if (!row16)
987
0
      return 1;
988
1.62k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
989
1.62k
                         const uint16_t* src_ptr, uint8_t* dst_ptr) =
990
1.62k
        (dx & 0xffff) ? ScaleAddCols2_C
991
1.62k
                      : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
992
1.62k
    void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
993
1.62k
                        int src_width) = ScaleAddRow_C;
994
1.62k
#if defined(HAS_SCALEADDROW_SSE2)
995
1.62k
    if (TestCpuFlag(kCpuHasSSE2)) {
996
1.62k
      ScaleAddRow = ScaleAddRow_Any_SSE2;
997
1.62k
      if (IS_ALIGNED(src_width, 16)) {
998
238
        ScaleAddRow = ScaleAddRow_SSE2;
999
238
      }
1000
1.62k
    }
1001
1.62k
#endif
1002
1.62k
#if defined(HAS_SCALEADDROW_AVX2)
1003
1.62k
    if (TestCpuFlag(kCpuHasAVX2)) {
1004
1.62k
      ScaleAddRow = ScaleAddRow_Any_AVX2;
1005
1.62k
      if (IS_ALIGNED(src_width, 32)) {
1006
139
        ScaleAddRow = ScaleAddRow_AVX2;
1007
139
      }
1008
1.62k
    }
1009
1.62k
#endif
1010
#if defined(HAS_SCALEADDROW_NEON)
1011
    if (TestCpuFlag(kCpuHasNEON)) {
1012
      ScaleAddRow = ScaleAddRow_Any_NEON;
1013
      if (IS_ALIGNED(src_width, 16)) {
1014
        ScaleAddRow = ScaleAddRow_NEON;
1015
      }
1016
    }
1017
#endif
1018
#if defined(HAS_SCALEADDROW_MSA)
1019
    if (TestCpuFlag(kCpuHasMSA)) {
1020
      ScaleAddRow = ScaleAddRow_Any_MSA;
1021
      if (IS_ALIGNED(src_width, 16)) {
1022
        ScaleAddRow = ScaleAddRow_MSA;
1023
      }
1024
    }
1025
#endif
1026
#if defined(HAS_SCALEADDROW_LSX)
1027
    if (TestCpuFlag(kCpuHasLSX)) {
1028
      ScaleAddRow = ScaleAddRow_Any_LSX;
1029
      if (IS_ALIGNED(src_width, 16)) {
1030
        ScaleAddRow = ScaleAddRow_LSX;
1031
      }
1032
    }
1033
#endif
1034
#if defined(HAS_SCALEADDROW_RVV)
1035
    if (TestCpuFlag(kCpuHasRVV)) {
1036
      ScaleAddRow = ScaleAddRow_RVV;
1037
    }
1038
#endif
1039
1040
94.9k
    for (j = 0; j < dst_height; ++j) {
1041
93.3k
      int boxheight;
1042
93.3k
      int iy = y >> 16;
1043
93.3k
      const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
1044
93.3k
      y += dy;
1045
93.3k
      if (y > max_y) {
1046
0
        y = max_y;
1047
0
      }
1048
93.3k
      boxheight = MIN1((y >> 16) - iy);
1049
93.3k
      memset(row16, 0, src_width * 2);
1050
817k
      for (k = 0; k < boxheight; ++k) {
1051
724k
        ScaleAddRow(src, (uint16_t*)(row16), src_width);
1052
724k
        src += src_stride;
1053
724k
      }
1054
93.3k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
1055
93.3k
      dst_ptr += dst_stride;
1056
93.3k
    }
1057
1.62k
    free_aligned_buffer_64(row16);
1058
1.62k
  }
1059
0
  return 0;
1060
1.62k
}
1061
1062
static int ScalePlaneBox_16(int src_width,
1063
                            int src_height,
1064
                            int dst_width,
1065
                            int dst_height,
1066
                            int src_stride,
1067
                            int dst_stride,
1068
                            const uint16_t* src_ptr,
1069
1.53k
                            uint16_t* dst_ptr) {
1070
1.53k
  int j, k;
1071
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1072
1.53k
  int x = 0;
1073
1.53k
  int y = 0;
1074
1.53k
  int dx = 0;
1075
1.53k
  int dy = 0;
1076
1.53k
  const int max_y = (src_height << 16);
1077
1.53k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
1078
1.53k
             &dx, &dy);
1079
1.53k
  src_width = Abs(src_width);
1080
1.53k
  {
1081
    // Allocate a row buffer of uint32_t.
1082
1.53k
    align_buffer_64(row32, src_width * 4);
1083
1.53k
    if (!row32)
1084
0
      return 1;
1085
1.53k
    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
1086
1.53k
                         const uint32_t* src_ptr, uint16_t* dst_ptr) =
1087
1.53k
        (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
1088
1.53k
    void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
1089
1.53k
                        int src_width) = ScaleAddRow_16_C;
1090
1091
#if defined(HAS_SCALEADDROW_16_SSE2)
1092
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
1093
      ScaleAddRow = ScaleAddRow_16_SSE2;
1094
    }
1095
#endif
1096
1097
93.5k
    for (j = 0; j < dst_height; ++j) {
1098
91.9k
      int boxheight;
1099
91.9k
      int iy = y >> 16;
1100
91.9k
      const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
1101
91.9k
      y += dy;
1102
91.9k
      if (y > max_y) {
1103
0
        y = max_y;
1104
0
      }
1105
91.9k
      boxheight = MIN1((y >> 16) - iy);
1106
91.9k
      memset(row32, 0, src_width * 4);
1107
828k
      for (k = 0; k < boxheight; ++k) {
1108
736k
        ScaleAddRow(src, (uint32_t*)(row32), src_width);
1109
736k
        src += src_stride;
1110
736k
      }
1111
91.9k
      ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
1112
91.9k
      dst_ptr += dst_stride;
1113
91.9k
    }
1114
1.53k
    free_aligned_buffer_64(row32);
1115
1.53k
  }
1116
0
  return 0;
1117
1.53k
}
1118
1119
// Scale plane down with bilinear interpolation.
1120
static int ScalePlaneBilinearDown(int src_width,
1121
                                  int src_height,
1122
                                  int dst_width,
1123
                                  int dst_height,
1124
                                  int src_stride,
1125
                                  int dst_stride,
1126
                                  const uint8_t* src_ptr,
1127
                                  uint8_t* dst_ptr,
1128
5.25k
                                  enum FilterMode filtering) {
1129
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1130
5.25k
  int x = 0;
1131
5.25k
  int y = 0;
1132
5.25k
  int dx = 0;
1133
5.25k
  int dy = 0;
1134
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1135
  // Allocate a row buffer.
1136
5.25k
  align_buffer_64(row, src_width);
1137
5.25k
  if (!row)
1138
0
    return 1;
1139
1140
5.25k
  const int max_y = (src_height - 1) << 16;
1141
5.25k
  int j;
1142
5.25k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1143
5.25k
                          int dst_width, int x, int dx) =
1144
5.25k
      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1145
5.25k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1146
5.25k
                         ptrdiff_t src_stride, int dst_width,
1147
5.25k
                         int source_y_fraction) = InterpolateRow_C;
1148
5.25k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1149
5.25k
             &dx, &dy);
1150
5.25k
  src_width = Abs(src_width);
1151
1152
5.25k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1153
5.25k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1154
5.25k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1155
5.25k
    if (IS_ALIGNED(src_width, 16)) {
1156
1.28k
      InterpolateRow = InterpolateRow_SSSE3;
1157
1.28k
    }
1158
5.25k
  }
1159
5.25k
#endif
1160
5.25k
#if defined(HAS_INTERPOLATEROW_AVX2)
1161
5.25k
  if (TestCpuFlag(kCpuHasAVX2)) {
1162
5.25k
    InterpolateRow = InterpolateRow_Any_AVX2;
1163
5.25k
    if (IS_ALIGNED(src_width, 32)) {
1164
912
      InterpolateRow = InterpolateRow_AVX2;
1165
912
    }
1166
5.25k
  }
1167
5.25k
#endif
1168
#if defined(HAS_INTERPOLATEROW_NEON)
1169
  if (TestCpuFlag(kCpuHasNEON)) {
1170
    InterpolateRow = InterpolateRow_Any_NEON;
1171
    if (IS_ALIGNED(src_width, 16)) {
1172
      InterpolateRow = InterpolateRow_NEON;
1173
    }
1174
  }
1175
#endif
1176
#if defined(HAS_INTERPOLATEROW_SME)
1177
  if (TestCpuFlag(kCpuHasSME)) {
1178
    InterpolateRow = InterpolateRow_SME;
1179
  }
1180
#endif
1181
#if defined(HAS_INTERPOLATEROW_MSA)
1182
  if (TestCpuFlag(kCpuHasMSA)) {
1183
    InterpolateRow = InterpolateRow_Any_MSA;
1184
    if (IS_ALIGNED(src_width, 32)) {
1185
      InterpolateRow = InterpolateRow_MSA;
1186
    }
1187
  }
1188
#endif
1189
#if defined(HAS_INTERPOLATEROW_LSX)
1190
  if (TestCpuFlag(kCpuHasLSX)) {
1191
    InterpolateRow = InterpolateRow_Any_LSX;
1192
    if (IS_ALIGNED(src_width, 32)) {
1193
      InterpolateRow = InterpolateRow_LSX;
1194
    }
1195
  }
1196
#endif
1197
#if defined(HAS_INTERPOLATEROW_RVV)
1198
  if (TestCpuFlag(kCpuHasRVV)) {
1199
    InterpolateRow = InterpolateRow_RVV;
1200
  }
1201
#endif
1202
1203
5.25k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1204
5.25k
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1205
5.25k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1206
5.25k
  }
1207
5.25k
#endif
1208
#if defined(HAS_SCALEFILTERCOLS_NEON)
1209
  if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1210
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1211
    if (IS_ALIGNED(dst_width, 8)) {
1212
      ScaleFilterCols = ScaleFilterCols_NEON;
1213
    }
1214
  }
1215
#endif
1216
#if defined(HAS_SCALEFILTERCOLS_MSA)
1217
  if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1218
    ScaleFilterCols = ScaleFilterCols_Any_MSA;
1219
    if (IS_ALIGNED(dst_width, 16)) {
1220
      ScaleFilterCols = ScaleFilterCols_MSA;
1221
    }
1222
  }
1223
#endif
1224
#if defined(HAS_SCALEFILTERCOLS_LSX)
1225
  if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1226
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1227
    if (IS_ALIGNED(dst_width, 16)) {
1228
      ScaleFilterCols = ScaleFilterCols_LSX;
1229
    }
1230
  }
1231
#endif
1232
5.25k
  if (y > max_y) {
1233
74
    y = max_y;
1234
74
  }
1235
1236
388k
  for (j = 0; j < dst_height; ++j) {
1237
383k
    int yi = y >> 16;
1238
383k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1239
383k
    if (filtering == kFilterLinear) {
1240
88.5k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1241
295k
    } else {
1242
295k
      int yf = (y >> 8) & 255;
1243
295k
      InterpolateRow(row, src, src_stride, src_width, yf);
1244
295k
      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1245
295k
    }
1246
383k
    dst_ptr += dst_stride;
1247
383k
    y += dy;
1248
383k
    if (y > max_y) {
1249
6.49k
      y = max_y;
1250
6.49k
    }
1251
383k
  }
1252
5.25k
  free_aligned_buffer_64(row);
1253
5.25k
  return 0;
1254
5.25k
}
1255
1256
static int ScalePlaneBilinearDown_16(int src_width,
1257
                                     int src_height,
1258
                                     int dst_width,
1259
                                     int dst_height,
1260
                                     int src_stride,
1261
                                     int dst_stride,
1262
                                     const uint16_t* src_ptr,
1263
                                     uint16_t* dst_ptr,
1264
6.87k
                                     enum FilterMode filtering) {
1265
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1266
6.87k
  int x = 0;
1267
6.87k
  int y = 0;
1268
6.87k
  int dx = 0;
1269
6.87k
  int dy = 0;
1270
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1271
  // Allocate a row buffer.
1272
6.87k
  align_buffer_64(row, src_width * 2);
1273
6.87k
  if (!row)
1274
0
    return 1;
1275
1276
6.87k
  const int max_y = (src_height - 1) << 16;
1277
6.87k
  int j;
1278
6.87k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1279
6.87k
                          int dst_width, int x, int dx) =
1280
6.87k
      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1281
6.87k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1282
6.87k
                         ptrdiff_t src_stride, int dst_width,
1283
6.87k
                         int source_y_fraction) = InterpolateRow_16_C;
1284
6.87k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1285
6.87k
             &dx, &dy);
1286
6.87k
  src_width = Abs(src_width);
1287
1288
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1289
  if (TestCpuFlag(kCpuHasSSE2)) {
1290
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1291
    if (IS_ALIGNED(src_width, 16)) {
1292
      InterpolateRow = InterpolateRow_16_SSE2;
1293
    }
1294
  }
1295
#endif
1296
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1297
  if (TestCpuFlag(kCpuHasSSSE3)) {
1298
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1299
    if (IS_ALIGNED(src_width, 16)) {
1300
      InterpolateRow = InterpolateRow_16_SSSE3;
1301
    }
1302
  }
1303
#endif
1304
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1305
  if (TestCpuFlag(kCpuHasAVX2)) {
1306
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1307
    if (IS_ALIGNED(src_width, 32)) {
1308
      InterpolateRow = InterpolateRow_16_AVX2;
1309
    }
1310
  }
1311
#endif
1312
#if defined(HAS_INTERPOLATEROW_16_NEON)
1313
  if (TestCpuFlag(kCpuHasNEON)) {
1314
    InterpolateRow = InterpolateRow_16_Any_NEON;
1315
    if (IS_ALIGNED(src_width, 16)) {
1316
      InterpolateRow = InterpolateRow_16_NEON;
1317
    }
1318
  }
1319
#endif
1320
#if defined(HAS_INTERPOLATEROW_16_SME)
1321
  if (TestCpuFlag(kCpuHasSME)) {
1322
    InterpolateRow = InterpolateRow_16_SME;
1323
  }
1324
#endif
1325
1326
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1327
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1328
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1329
  }
1330
#endif
1331
6.87k
  if (y > max_y) {
1332
59
    y = max_y;
1333
59
  }
1334
1335
477k
  for (j = 0; j < dst_height; ++j) {
1336
471k
    int yi = y >> 16;
1337
471k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1338
471k
    if (filtering == kFilterLinear) {
1339
255k
      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1340
255k
    } else {
1341
215k
      int yf = (y >> 8) & 255;
1342
215k
      InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1343
215k
      ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1344
215k
    }
1345
471k
    dst_ptr += dst_stride;
1346
471k
    y += dy;
1347
471k
    if (y > max_y) {
1348
9.54k
      y = max_y;
1349
9.54k
    }
1350
471k
  }
1351
6.87k
  free_aligned_buffer_64(row);
1352
6.87k
  return 0;
1353
6.87k
}
1354
1355
// Scale up down with bilinear interpolation.
1356
static int ScalePlaneBilinearUp(int src_width,
1357
                                int src_height,
1358
                                int dst_width,
1359
                                int dst_height,
1360
                                int src_stride,
1361
                                int dst_stride,
1362
                                const uint8_t* src_ptr,
1363
                                uint8_t* dst_ptr,
1364
6.42k
                                enum FilterMode filtering) {
1365
6.42k
  int j;
1366
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1367
6.42k
  int x = 0;
1368
6.42k
  int y = 0;
1369
6.42k
  int dx = 0;
1370
6.42k
  int dy = 0;
1371
6.42k
  const int max_y = (src_height - 1) << 16;
1372
6.42k
  void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1373
6.42k
                         ptrdiff_t src_stride, int dst_width,
1374
6.42k
                         int source_y_fraction) = InterpolateRow_C;
1375
6.42k
  void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1376
6.42k
                          int dst_width, int x, int dx) =
1377
6.42k
      filtering ? ScaleFilterCols_C : ScaleCols_C;
1378
6.42k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1379
6.42k
             &dx, &dy);
1380
6.42k
  src_width = Abs(src_width);
1381
1382
6.42k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1383
6.42k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1384
6.42k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1385
6.42k
    if (IS_ALIGNED(dst_width, 16)) {
1386
2.82k
      InterpolateRow = InterpolateRow_SSSE3;
1387
2.82k
    }
1388
6.42k
  }
1389
6.42k
#endif
1390
6.42k
#if defined(HAS_INTERPOLATEROW_AVX2)
1391
6.42k
  if (TestCpuFlag(kCpuHasAVX2)) {
1392
6.42k
    InterpolateRow = InterpolateRow_Any_AVX2;
1393
6.42k
    if (IS_ALIGNED(dst_width, 32)) {
1394
2.43k
      InterpolateRow = InterpolateRow_AVX2;
1395
2.43k
    }
1396
6.42k
  }
1397
6.42k
#endif
1398
#if defined(HAS_INTERPOLATEROW_NEON)
1399
  if (TestCpuFlag(kCpuHasNEON)) {
1400
    InterpolateRow = InterpolateRow_Any_NEON;
1401
    if (IS_ALIGNED(dst_width, 16)) {
1402
      InterpolateRow = InterpolateRow_NEON;
1403
    }
1404
  }
1405
#endif
1406
#if defined(HAS_INTERPOLATEROW_SME)
1407
  if (TestCpuFlag(kCpuHasSME)) {
1408
    InterpolateRow = InterpolateRow_SME;
1409
  }
1410
#endif
1411
#if defined(HAS_INTERPOLATEROW_RVV)
1412
  if (TestCpuFlag(kCpuHasRVV)) {
1413
    InterpolateRow = InterpolateRow_RVV;
1414
  }
1415
#endif
1416
1417
6.42k
  if (filtering && src_width >= 32768) {
1418
0
    ScaleFilterCols = ScaleFilterCols64_C;
1419
0
  }
1420
6.42k
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
1421
6.42k
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1422
6.42k
    ScaleFilterCols = ScaleFilterCols_SSSE3;
1423
6.42k
  }
1424
6.42k
#endif
1425
#if defined(HAS_SCALEFILTERCOLS_NEON)
1426
  if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1427
    ScaleFilterCols = ScaleFilterCols_Any_NEON;
1428
    if (IS_ALIGNED(dst_width, 8)) {
1429
      ScaleFilterCols = ScaleFilterCols_NEON;
1430
    }
1431
  }
1432
#endif
1433
#if defined(HAS_SCALEFILTERCOLS_MSA)
1434
  if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1435
    ScaleFilterCols = ScaleFilterCols_Any_MSA;
1436
    if (IS_ALIGNED(dst_width, 16)) {
1437
      ScaleFilterCols = ScaleFilterCols_MSA;
1438
    }
1439
  }
1440
#endif
1441
#if defined(HAS_SCALEFILTERCOLS_LSX)
1442
  if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1443
    ScaleFilterCols = ScaleFilterCols_Any_LSX;
1444
    if (IS_ALIGNED(dst_width, 16)) {
1445
      ScaleFilterCols = ScaleFilterCols_LSX;
1446
    }
1447
  }
1448
#endif
1449
6.42k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1450
0
    ScaleFilterCols = ScaleColsUp2_C;
1451
#if defined(HAS_SCALECOLS_SSE2)
1452
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1453
      ScaleFilterCols = ScaleColsUp2_SSE2;
1454
    }
1455
#endif
1456
0
  }
1457
1458
6.42k
  if (y > max_y) {
1459
960
    y = max_y;
1460
960
  }
1461
6.42k
  {
1462
6.42k
    int yi = y >> 16;
1463
6.42k
    const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1464
1465
    // Allocate 2 row buffers.
1466
6.42k
    const int row_size = (dst_width + 31) & ~31;
1467
6.42k
    align_buffer_64(row, row_size * 2);
1468
6.42k
    if (!row)
1469
0
      return 1;
1470
1471
6.42k
    uint8_t* rowptr = row;
1472
6.42k
    int rowstride = row_size;
1473
6.42k
    int lasty = yi;
1474
1475
6.42k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1476
6.42k
    if (src_height > 1) {
1477
5.46k
      src += src_stride;
1478
5.46k
    }
1479
6.42k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1480
6.42k
    if (src_height > 2) {
1481
4.80k
      src += src_stride;
1482
4.80k
    }
1483
1484
3.56M
    for (j = 0; j < dst_height; ++j) {
1485
3.55M
      yi = y >> 16;
1486
3.55M
      if (yi != lasty) {
1487
385k
        if (y > max_y) {
1488
0
          y = max_y;
1489
0
          yi = y >> 16;
1490
0
          src = src_ptr + yi * (int64_t)src_stride;
1491
0
        }
1492
385k
        if (yi != lasty) {
1493
385k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1494
385k
          rowptr += rowstride;
1495
385k
          rowstride = -rowstride;
1496
385k
          lasty = yi;
1497
385k
          if ((y + 65536) < max_y) {
1498
380k
            src += src_stride;
1499
380k
          }
1500
385k
        }
1501
385k
      }
1502
3.55M
      if (filtering == kFilterLinear) {
1503
244k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1504
3.31M
      } else {
1505
3.31M
        int yf = (y >> 8) & 255;
1506
3.31M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1507
3.31M
      }
1508
3.55M
      dst_ptr += dst_stride;
1509
3.55M
      y += dy;
1510
3.55M
    }
1511
6.42k
    free_aligned_buffer_64(row);
1512
6.42k
  }
1513
0
  return 0;
1514
6.42k
}
1515
1516
// Scale plane, horizontally up by 2 times.
1517
// Uses linear filter horizontally, nearest vertically.
1518
// This is an optimized version for scaling up a plane to 2 times of
1519
// its original width, using linear interpolation.
1520
// This is used to scale U and V planes of I422 to I444.
1521
static void ScalePlaneUp2_Linear(int src_width,
1522
                                 int src_height,
1523
                                 int dst_width,
1524
                                 int dst_height,
1525
                                 int src_stride,
1526
                                 int dst_stride,
1527
                                 const uint8_t* src_ptr,
1528
325
                                 uint8_t* dst_ptr) {
1529
325
  void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
1530
325
      ScaleRowUp2_Linear_Any_C;
1531
325
  int i;
1532
325
  int y;
1533
325
  int dy;
1534
1535
325
  (void)src_width;
1536
  // This function can only scale up by 2 times horizontally.
1537
325
  assert(src_width == ((dst_width + 1) / 2));
1538
1539
325
#ifdef HAS_SCALEROWUP2_LINEAR_SSE2
1540
325
  if (TestCpuFlag(kCpuHasSSE2)) {
1541
325
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
1542
325
  }
1543
325
#endif
1544
1545
325
#ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
1546
325
  if (TestCpuFlag(kCpuHasSSSE3)) {
1547
325
    ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
1548
325
  }
1549
325
#endif
1550
1551
325
#ifdef HAS_SCALEROWUP2_LINEAR_AVX2
1552
325
  if (TestCpuFlag(kCpuHasAVX2)) {
1553
325
    ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
1554
325
  }
1555
325
#endif
1556
1557
#ifdef HAS_SCALEROWUP2_LINEAR_NEON
1558
  if (TestCpuFlag(kCpuHasNEON)) {
1559
    ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
1560
  }
1561
#endif
1562
#ifdef HAS_SCALEROWUP2_LINEAR_RVV
1563
  if (TestCpuFlag(kCpuHasRVV)) {
1564
    ScaleRowUp = ScaleRowUp2_Linear_RVV;
1565
  }
1566
#endif
1567
1568
325
  if (dst_height == 1) {
1569
41
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1570
41
               dst_width);
1571
284
  } else {
1572
284
    dy = FixedDiv(src_height - 1, dst_height - 1);
1573
284
    y = (1 << 15) - 1;
1574
427k
    for (i = 0; i < dst_height; ++i) {
1575
427k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1576
427k
      dst_ptr += dst_stride;
1577
427k
      y += dy;
1578
427k
    }
1579
284
  }
1580
325
}
1581
1582
// Scale plane, up by 2 times.
1583
// This is an optimized version for scaling up a plane to 2 times of
1584
// its original size, using bilinear interpolation.
1585
// This is used to scale U and V planes of I420 to I444.
1586
static void ScalePlaneUp2_Bilinear(int src_width,
1587
                                   int src_height,
1588
                                   int dst_width,
1589
                                   int dst_height,
1590
                                   int src_stride,
1591
                                   int dst_stride,
1592
                                   const uint8_t* src_ptr,
1593
334
                                   uint8_t* dst_ptr) {
1594
334
  void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1595
334
                      uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1596
334
      ScaleRowUp2_Bilinear_Any_C;
1597
334
  int x;
1598
1599
334
  (void)src_width;
1600
  // This function can only scale up by 2 times.
1601
334
  assert(src_width == ((dst_width + 1) / 2));
1602
334
  assert(src_height == ((dst_height + 1) / 2));
1603
1604
334
#ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
1605
334
  if (TestCpuFlag(kCpuHasSSE2)) {
1606
334
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
1607
334
  }
1608
334
#endif
1609
1610
334
#ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
1611
334
  if (TestCpuFlag(kCpuHasSSSE3)) {
1612
334
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
1613
334
  }
1614
334
#endif
1615
1616
334
#ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
1617
334
  if (TestCpuFlag(kCpuHasAVX2)) {
1618
334
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
1619
334
  }
1620
334
#endif
1621
1622
#ifdef HAS_SCALEROWUP2_BILINEAR_NEON
1623
  if (TestCpuFlag(kCpuHasNEON)) {
1624
    Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
1625
  }
1626
#endif
1627
#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
1628
  if (TestCpuFlag(kCpuHasRVV)) {
1629
    Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
1630
  }
1631
#endif
1632
1633
334
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1634
334
  dst_ptr += dst_stride;
1635
16.7k
  for (x = 0; x < src_height - 1; ++x) {
1636
16.3k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1637
16.3k
    src_ptr += src_stride;
1638
    // TODO(fbarchard): Test performance of writing one row of destination at a
1639
    // time.
1640
16.3k
    dst_ptr += 2 * dst_stride;
1641
16.3k
  }
1642
334
  if (!(dst_height & 1)) {
1643
168
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1644
168
  }
1645
334
}
1646
1647
// Scale at most 14 bit plane, horizontally up by 2 times.
1648
// This is an optimized version for scaling up a plane to 2 times of
1649
// its original width, using linear interpolation.
1650
// stride is in count of uint16_t.
1651
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
1652
static void ScalePlaneUp2_12_Linear(int src_width,
1653
                                    int src_height,
1654
                                    int dst_width,
1655
                                    int dst_height,
1656
                                    int src_stride,
1657
                                    int dst_stride,
1658
                                    const uint16_t* src_ptr,
1659
422
                                    uint16_t* dst_ptr) {
1660
422
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1661
422
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1662
422
  int i;
1663
422
  int y;
1664
422
  int dy;
1665
1666
422
  (void)src_width;
1667
  // This function can only scale up by 2 times horizontally.
1668
422
  assert(src_width == ((dst_width + 1) / 2));
1669
1670
422
#ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
1671
422
  if (TestCpuFlag(kCpuHasSSSE3)) {
1672
422
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
1673
422
  }
1674
422
#endif
1675
1676
422
#ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
1677
422
  if (TestCpuFlag(kCpuHasAVX2)) {
1678
422
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
1679
422
  }
1680
422
#endif
1681
1682
#ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
1683
  if (TestCpuFlag(kCpuHasNEON)) {
1684
    ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
1685
  }
1686
#endif
1687
1688
422
  if (dst_height == 1) {
1689
39
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1690
39
               dst_width);
1691
383
  } else {
1692
383
    dy = FixedDiv(src_height - 1, dst_height - 1);
1693
383
    y = (1 << 15) - 1;
1694
381k
    for (i = 0; i < dst_height; ++i) {
1695
381k
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1696
381k
      dst_ptr += dst_stride;
1697
381k
      y += dy;
1698
381k
    }
1699
383
  }
1700
422
}
1701
1702
// Scale at most 12 bit plane, up by 2 times.
1703
// This is an optimized version for scaling up a plane to 2 times of
1704
// its original size, using bilinear interpolation.
1705
// stride is in count of uint16_t.
1706
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
1707
static void ScalePlaneUp2_12_Bilinear(int src_width,
1708
                                      int src_height,
1709
                                      int dst_width,
1710
                                      int dst_height,
1711
                                      int src_stride,
1712
                                      int dst_stride,
1713
                                      const uint16_t* src_ptr,
1714
427
                                      uint16_t* dst_ptr) {
1715
427
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1716
427
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1717
427
      ScaleRowUp2_Bilinear_16_Any_C;
1718
427
  int x;
1719
1720
427
  (void)src_width;
1721
  // This function can only scale up by 2 times.
1722
427
  assert(src_width == ((dst_width + 1) / 2));
1723
427
  assert(src_height == ((dst_height + 1) / 2));
1724
1725
427
#ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
1726
427
  if (TestCpuFlag(kCpuHasSSSE3)) {
1727
427
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
1728
427
  }
1729
427
#endif
1730
1731
427
#ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
1732
427
  if (TestCpuFlag(kCpuHasAVX2)) {
1733
427
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
1734
427
  }
1735
427
#endif
1736
1737
#ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
1738
  if (TestCpuFlag(kCpuHasNEON)) {
1739
    Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
1740
  }
1741
#endif
1742
1743
427
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1744
427
  dst_ptr += dst_stride;
1745
17.0k
  for (x = 0; x < src_height - 1; ++x) {
1746
16.6k
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1747
16.6k
    src_ptr += src_stride;
1748
16.6k
    dst_ptr += 2 * dst_stride;
1749
16.6k
  }
1750
427
  if (!(dst_height & 1)) {
1751
150
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1752
150
  }
1753
427
}
1754
1755
static void ScalePlaneUp2_16_Linear(int src_width,
1756
                                    int src_height,
1757
                                    int dst_width,
1758
                                    int dst_height,
1759
                                    int src_stride,
1760
                                    int dst_stride,
1761
                                    const uint16_t* src_ptr,
1762
0
                                    uint16_t* dst_ptr) {
1763
0
  void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1764
0
                     int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1765
0
  int i;
1766
0
  int y;
1767
0
  int dy;
1768
1769
0
  (void)src_width;
1770
  // This function can only scale up by 2 times horizontally.
1771
0
  assert(src_width == ((dst_width + 1) / 2));
1772
1773
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
1774
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1775
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
1776
0
  }
1777
0
#endif
1778
1779
0
#ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
1780
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1781
0
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
1782
0
  }
1783
0
#endif
1784
1785
#ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
1786
  if (TestCpuFlag(kCpuHasNEON)) {
1787
    ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
1788
  }
1789
#endif
1790
1791
0
  if (dst_height == 1) {
1792
0
    ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1793
0
               dst_width);
1794
0
  } else {
1795
0
    dy = FixedDiv(src_height - 1, dst_height - 1);
1796
0
    y = (1 << 15) - 1;
1797
0
    for (i = 0; i < dst_height; ++i) {
1798
0
      ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1799
0
      dst_ptr += dst_stride;
1800
0
      y += dy;
1801
0
    }
1802
0
  }
1803
0
}
1804
1805
static void ScalePlaneUp2_16_Bilinear(int src_width,
1806
                                      int src_height,
1807
                                      int dst_width,
1808
                                      int dst_height,
1809
                                      int src_stride,
1810
                                      int dst_stride,
1811
                                      const uint16_t* src_ptr,
1812
0
                                      uint16_t* dst_ptr) {
1813
0
  void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1814
0
                      uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1815
0
      ScaleRowUp2_Bilinear_16_Any_C;
1816
0
  int x;
1817
1818
0
  (void)src_width;
1819
  // This function can only scale up by 2 times.
1820
0
  assert(src_width == ((dst_width + 1) / 2));
1821
0
  assert(src_height == ((dst_height + 1) / 2));
1822
1823
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
1824
0
  if (TestCpuFlag(kCpuHasSSE2)) {
1825
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
1826
0
  }
1827
0
#endif
1828
1829
0
#ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
1830
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1831
0
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
1832
0
  }
1833
0
#endif
1834
1835
#ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
1836
  if (TestCpuFlag(kCpuHasNEON)) {
1837
    Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
1838
  }
1839
#endif
1840
1841
0
  Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1842
0
  dst_ptr += dst_stride;
1843
0
  for (x = 0; x < src_height - 1; ++x) {
1844
0
    Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1845
0
    src_ptr += src_stride;
1846
0
    dst_ptr += 2 * dst_stride;
1847
0
  }
1848
0
  if (!(dst_height & 1)) {
1849
0
    Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1850
0
  }
1851
0
}
1852
1853
static int ScalePlaneBilinearUp_16(int src_width,
1854
                                   int src_height,
1855
                                   int dst_width,
1856
                                   int dst_height,
1857
                                   int src_stride,
1858
                                   int dst_stride,
1859
                                   const uint16_t* src_ptr,
1860
                                   uint16_t* dst_ptr,
1861
6.33k
                                   enum FilterMode filtering) {
1862
6.33k
  int j;
1863
  // Initial source x/y coordinate and step values as 16.16 fixed point.
1864
6.33k
  int x = 0;
1865
6.33k
  int y = 0;
1866
6.33k
  int dx = 0;
1867
6.33k
  int dy = 0;
1868
6.33k
  const int max_y = (src_height - 1) << 16;
1869
6.33k
  void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1870
6.33k
                         ptrdiff_t src_stride, int dst_width,
1871
6.33k
                         int source_y_fraction) = InterpolateRow_16_C;
1872
6.33k
  void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1873
6.33k
                          int dst_width, int x, int dx) =
1874
6.33k
      filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1875
6.33k
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1876
6.33k
             &dx, &dy);
1877
6.33k
  src_width = Abs(src_width);
1878
1879
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1880
  if (TestCpuFlag(kCpuHasSSE2)) {
1881
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1882
    if (IS_ALIGNED(dst_width, 16)) {
1883
      InterpolateRow = InterpolateRow_16_SSE2;
1884
    }
1885
  }
1886
#endif
1887
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1888
  if (TestCpuFlag(kCpuHasSSSE3)) {
1889
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1890
    if (IS_ALIGNED(dst_width, 16)) {
1891
      InterpolateRow = InterpolateRow_16_SSSE3;
1892
    }
1893
  }
1894
#endif
1895
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1896
  if (TestCpuFlag(kCpuHasAVX2)) {
1897
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1898
    if (IS_ALIGNED(dst_width, 32)) {
1899
      InterpolateRow = InterpolateRow_16_AVX2;
1900
    }
1901
  }
1902
#endif
1903
#if defined(HAS_INTERPOLATEROW_16_NEON)
1904
  if (TestCpuFlag(kCpuHasNEON)) {
1905
    InterpolateRow = InterpolateRow_16_Any_NEON;
1906
    if (IS_ALIGNED(dst_width, 16)) {
1907
      InterpolateRow = InterpolateRow_16_NEON;
1908
    }
1909
  }
1910
#endif
1911
#if defined(HAS_INTERPOLATEROW_16_SME)
1912
  if (TestCpuFlag(kCpuHasSME)) {
1913
    InterpolateRow = InterpolateRow_16_SME;
1914
  }
1915
#endif
1916
1917
6.33k
  if (filtering && src_width >= 32768) {
1918
0
    ScaleFilterCols = ScaleFilterCols64_16_C;
1919
0
  }
1920
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1921
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1922
    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1923
  }
1924
#endif
1925
6.33k
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1926
0
    ScaleFilterCols = ScaleColsUp2_16_C;
1927
#if defined(HAS_SCALECOLS_16_SSE2)
1928
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1929
      ScaleFilterCols = ScaleColsUp2_16_SSE2;
1930
    }
1931
#endif
1932
0
  }
1933
6.33k
  if (y > max_y) {
1934
1.02k
    y = max_y;
1935
1.02k
  }
1936
6.33k
  {
1937
6.33k
    int yi = y >> 16;
1938
6.33k
    const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1939
1940
    // Allocate 2 row buffers.
1941
6.33k
    const int row_size = (dst_width + 31) & ~31;
1942
6.33k
    align_buffer_64(row, row_size * 4);
1943
6.33k
    int rowstride = row_size;
1944
6.33k
    int lasty = yi;
1945
6.33k
    uint16_t* rowptr = (uint16_t*)row;
1946
6.33k
    if (!row)
1947
0
      return 1;
1948
1949
6.33k
    ScaleFilterCols(rowptr, src, dst_width, x, dx);
1950
6.33k
    if (src_height > 1) {
1951
5.31k
      src += src_stride;
1952
5.31k
    }
1953
6.33k
    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1954
6.33k
    if (src_height > 2) {
1955
4.26k
      src += src_stride;
1956
4.26k
    }
1957
1958
4.10M
    for (j = 0; j < dst_height; ++j) {
1959
4.09M
      yi = y >> 16;
1960
4.09M
      if (yi != lasty) {
1961
264k
        if (y > max_y) {
1962
0
          y = max_y;
1963
0
          yi = y >> 16;
1964
0
          src = src_ptr + yi * (int64_t)src_stride;
1965
0
        }
1966
264k
        if (yi != lasty) {
1967
264k
          ScaleFilterCols(rowptr, src, dst_width, x, dx);
1968
264k
          rowptr += rowstride;
1969
264k
          rowstride = -rowstride;
1970
264k
          lasty = yi;
1971
264k
          if ((y + 65536) < max_y) {
1972
259k
            src += src_stride;
1973
259k
          }
1974
264k
        }
1975
264k
      }
1976
4.09M
      if (filtering == kFilterLinear) {
1977
398k
        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1978
3.69M
      } else {
1979
3.69M
        int yf = (y >> 8) & 255;
1980
3.69M
        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1981
3.69M
      }
1982
4.09M
      dst_ptr += dst_stride;
1983
4.09M
      y += dy;
1984
4.09M
    }
1985
6.33k
    free_aligned_buffer_64(row);
1986
6.33k
  }
1987
0
  return 0;
1988
6.33k
}
1989
1990
// Scale Plane to/from any dimensions, without interpolation.
1991
// Fixed point math is used for performance: The upper 16 bits
1992
// of x and dx is the integer part of the source position and
1993
// the lower 16 bits are the fixed decimal part.
1994
1995
static void ScalePlaneSimple(int src_width,
1996
                             int src_height,
1997
                             int dst_width,
1998
                             int dst_height,
1999
                             int src_stride,
2000
                             int dst_stride,
2001
                             const uint8_t* src_ptr,
2002
1.11k
                             uint8_t* dst_ptr) {
2003
1.11k
  int i;
2004
1.11k
  void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
2005
1.11k
                    int x, int dx) = ScaleCols_C;
2006
  // Initial source x/y coordinate and step values as 16.16 fixed point.
2007
1.11k
  int x = 0;
2008
1.11k
  int y = 0;
2009
1.11k
  int dx = 0;
2010
1.11k
  int dy = 0;
2011
1.11k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
2012
1.11k
             &dx, &dy);
2013
1.11k
  src_width = Abs(src_width);
2014
2015
1.11k
  if (src_width * 2 == dst_width && x < 0x8000) {
2016
72
    ScaleCols = ScaleColsUp2_C;
2017
#if defined(HAS_SCALECOLS_SSE2)
2018
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
2019
      ScaleCols = ScaleColsUp2_SSE2;
2020
    }
2021
#endif
2022
72
  }
2023
2024
405k
  for (i = 0; i < dst_height; ++i) {
2025
404k
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
2026
404k
              dx);
2027
404k
    dst_ptr += dst_stride;
2028
404k
    y += dy;
2029
404k
  }
2030
1.11k
}
2031
2032
static void ScalePlaneSimple_16(int src_width,
2033
                                int src_height,
2034
                                int dst_width,
2035
                                int dst_height,
2036
                                int src_stride,
2037
                                int dst_stride,
2038
                                const uint16_t* src_ptr,
2039
1.54k
                                uint16_t* dst_ptr) {
2040
1.54k
  int i;
2041
1.54k
  void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
2042
1.54k
                    int x, int dx) = ScaleCols_16_C;
2043
  // Initial source x/y coordinate and step values as 16.16 fixed point.
2044
1.54k
  int x = 0;
2045
1.54k
  int y = 0;
2046
1.54k
  int dx = 0;
2047
1.54k
  int dy = 0;
2048
1.54k
  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
2049
1.54k
             &dx, &dy);
2050
1.54k
  src_width = Abs(src_width);
2051
2052
1.54k
  if (src_width * 2 == dst_width && x < 0x8000) {
2053
106
    ScaleCols = ScaleColsUp2_16_C;
2054
#if defined(HAS_SCALECOLS_16_SSE2)
2055
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
2056
      ScaleCols = ScaleColsUp2_16_SSE2;
2057
    }
2058
#endif
2059
106
  }
2060
2061
329k
  for (i = 0; i < dst_height; ++i) {
2062
327k
    ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
2063
327k
              dx);
2064
327k
    dst_ptr += dst_stride;
2065
327k
    y += dy;
2066
327k
  }
2067
1.54k
}
2068
2069
// Scale a plane.
2070
// This function dispatches to a specialized scaler based on scale factor.
2071
LIBYUV_API
2072
int ScalePlane(const uint8_t* src,
2073
               int src_stride,
2074
               int src_width,
2075
               int src_height,
2076
               uint8_t* dst,
2077
               int dst_stride,
2078
               int dst_width,
2079
               int dst_height,
2080
22.8k
               enum FilterMode filtering) {
2081
  // Simplify filtering when possible.
2082
22.8k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2083
22.8k
                                filtering);
2084
2085
  // Negative height means invert the image.
2086
22.8k
  if (src_height < 0) {
2087
0
    src_height = -src_height;
2088
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2089
0
    src_stride = -src_stride;
2090
0
  }
2091
  // Use specialized scales to improve performance for common resolutions.
2092
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
2093
22.8k
  if (dst_width == src_width && dst_height == src_height) {
2094
    // Straight copy.
2095
130
    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
2096
130
    return 0;
2097
130
  }
2098
22.7k
  if (dst_width == src_width && filtering != kFilterBox) {
2099
7.39k
    int dy = 0;
2100
7.39k
    int y = 0;
2101
    // When scaling down, use the center 2 rows to filter.
2102
    // When scaling up, last row of destination uses the last 2 source rows.
2103
7.39k
    if (dst_height <= src_height) {
2104
734
      dy = FixedDiv(src_height, dst_height);
2105
734
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2106
6.66k
    } else if (src_height > 1 && dst_height > 1) {
2107
6.55k
      dy = FixedDiv1(src_height, dst_height);
2108
6.55k
    }
2109
    // Arbitrary scale vertically, but unscaled horizontally.
2110
7.39k
    ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
2111
7.39k
                       dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2112
7.39k
    return 0;
2113
7.39k
  }
2114
15.3k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2115
    // Scale down.
2116
3.43k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2117
      // optimized, 3/4
2118
55
      ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
2119
55
                       dst_stride, src, dst, filtering);
2120
55
      return 0;
2121
55
    }
2122
3.37k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2123
      // optimized, 1/2
2124
89
      ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
2125
89
                      dst_stride, src, dst, filtering);
2126
89
      return 0;
2127
89
    }
2128
    // 3/8 rounded up for odd sized chroma height.
2129
3.28k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2130
      // optimized, 3/8
2131
33
      ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
2132
33
                       dst_stride, src, dst, filtering);
2133
33
      return 0;
2134
33
    }
2135
3.25k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2136
79
        (filtering == kFilterBox || filtering == kFilterNone)) {
2137
      // optimized, 1/4
2138
79
      ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
2139
79
                      dst_stride, src, dst, filtering);
2140
79
      return 0;
2141
79
    }
2142
3.25k
  }
2143
15.0k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2144
1.62k
    return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
2145
1.62k
                         src_stride, dst_stride, src, dst);
2146
1.62k
  }
2147
13.4k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2148
325
    ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
2149
325
                         src_stride, dst_stride, src, dst);
2150
325
    return 0;
2151
325
  }
2152
13.1k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2153
363
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2154
334
    ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
2155
334
                           src_stride, dst_stride, src, dst);
2156
334
    return 0;
2157
334
  }
2158
12.7k
  if (filtering && dst_height > src_height) {
2159
6.42k
    return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
2160
6.42k
                                src_stride, dst_stride, src, dst, filtering);
2161
6.42k
  }
2162
6.36k
  if (filtering) {
2163
5.25k
    return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
2164
5.25k
                                  src_stride, dst_stride, src, dst, filtering);
2165
5.25k
  }
2166
1.11k
  ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
2167
1.11k
                   dst_stride, src, dst);
2168
1.11k
  return 0;
2169
6.36k
}
2170
2171
LIBYUV_API
2172
int ScalePlane_16(const uint16_t* src,
2173
                  int src_stride,
2174
                  int src_width,
2175
                  int src_height,
2176
                  uint16_t* dst,
2177
                  int dst_stride,
2178
                  int dst_width,
2179
                  int dst_height,
2180
18.9k
                  enum FilterMode filtering) {
2181
  // Simplify filtering when possible.
2182
18.9k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2183
18.9k
                                filtering);
2184
2185
  // Negative height means invert the image.
2186
18.9k
  if (src_height < 0) {
2187
0
    src_height = -src_height;
2188
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2189
0
    src_stride = -src_stride;
2190
0
  }
2191
  // Use specialized scales to improve performance for common resolutions.
2192
  // For example, all the 1/2 scalings will use ScalePlaneDown2()
2193
18.9k
  if (dst_width == src_width && dst_height == src_height) {
2194
    // Straight copy.
2195
400
    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
2196
400
    return 0;
2197
400
  }
2198
18.5k
  if (dst_width == src_width && filtering != kFilterBox) {
2199
1.98k
    int dy = 0;
2200
1.98k
    int y = 0;
2201
    // When scaling down, use the center 2 rows to filter.
2202
    // When scaling up, last row of destination uses the last 2 source rows.
2203
1.98k
    if (dst_height <= src_height) {
2204
830
      dy = FixedDiv(src_height, dst_height);
2205
830
      y = CENTERSTART(dy, -32768);  // Subtract 0.5 (32768) to center filter.
2206
      // When scaling up, ensure the last row of destination uses the last
2207
      // source. Avoid divide by zero for dst_height but will do no scaling
2208
      // later.
2209
1.15k
    } else if (src_height > 1 && dst_height > 1) {
2210
989
      dy = FixedDiv1(src_height, dst_height);
2211
989
    }
2212
    // Arbitrary scale vertically, but unscaled horizontally.
2213
1.98k
    ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
2214
1.98k
                          dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2215
1.98k
    return 0;
2216
1.98k
  }
2217
16.5k
  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2218
    // Scale down.
2219
5.09k
    if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2220
      // optimized, 3/4
2221
43
      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
2222
43
                          src_stride, dst_stride, src, dst, filtering);
2223
43
      return 0;
2224
43
    }
2225
5.05k
    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2226
      // optimized, 1/2
2227
87
      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
2228
87
                         src_stride, dst_stride, src, dst, filtering);
2229
87
      return 0;
2230
87
    }
2231
    // 3/8 rounded up for odd sized chroma height.
2232
4.96k
    if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2233
      // optimized, 3/8
2234
32
      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
2235
32
                          src_stride, dst_stride, src, dst, filtering);
2236
32
      return 0;
2237
32
    }
2238
4.93k
    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2239
85
        (filtering == kFilterBox || filtering == kFilterNone)) {
2240
      // optimized, 1/4
2241
85
      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
2242
85
                         src_stride, dst_stride, src, dst, filtering);
2243
85
      return 0;
2244
85
    }
2245
4.93k
  }
2246
16.2k
  if (filtering == kFilterBox && dst_height * 2 < src_height) {
2247
1.53k
    return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
2248
1.53k
                            src_stride, dst_stride, src, dst);
2249
1.53k
  }
2250
14.7k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2251
0
    ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
2252
0
                            src_stride, dst_stride, src, dst);
2253
0
    return 0;
2254
0
  }
2255
14.7k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2256
18
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2257
0
    ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
2258
0
                              src_stride, dst_stride, src, dst);
2259
0
    return 0;
2260
0
  }
2261
14.7k
  if (filtering && dst_height > src_height) {
2262
6.33k
    return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
2263
6.33k
                                   src_stride, dst_stride, src, dst, filtering);
2264
6.33k
  }
2265
8.41k
  if (filtering) {
2266
6.87k
    return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
2267
6.87k
                                     dst_height, src_stride, dst_stride, src,
2268
6.87k
                                     dst, filtering);
2269
6.87k
  }
2270
1.54k
  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
2271
1.54k
                      dst_stride, src, dst);
2272
1.54k
  return 0;
2273
8.41k
}
2274
2275
LIBYUV_API
2276
int ScalePlane_12(const uint16_t* src,
2277
                  int src_stride,
2278
                  int src_width,
2279
                  int src_height,
2280
                  uint16_t* dst,
2281
                  int dst_stride,
2282
                  int dst_width,
2283
                  int dst_height,
2284
19.7k
                  enum FilterMode filtering) {
2285
  // Simplify filtering when possible.
2286
19.7k
  filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2287
19.7k
                                filtering);
2288
2289
  // Negative height means invert the image.
2290
19.7k
  if (src_height < 0) {
2291
0
    src_height = -src_height;
2292
0
    src = src + (src_height - 1) * (int64_t)src_stride;
2293
0
    src_stride = -src_stride;
2294
0
  }
2295
2296
19.7k
  if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2297
422
    ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
2298
422
                            src_stride, dst_stride, src, dst);
2299
422
    return 0;
2300
422
  }
2301
19.3k
  if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2302
463
      (filtering == kFilterBilinear || filtering == kFilterBox)) {
2303
427
    ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
2304
427
                              src_stride, dst_stride, src, dst);
2305
427
    return 0;
2306
427
  }
2307
2308
18.9k
  return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
2309
18.9k
                       dst_width, dst_height, filtering);
2310
19.3k
}
2311
2312
// Scale an I420 image.
2313
// This function in turn calls a scaling function for each plane.
2314
2315
LIBYUV_API
2316
int I420Scale(const uint8_t* src_y,
2317
              int src_stride_y,
2318
              const uint8_t* src_u,
2319
              int src_stride_u,
2320
              const uint8_t* src_v,
2321
              int src_stride_v,
2322
              int src_width,
2323
              int src_height,
2324
              uint8_t* dst_y,
2325
              int dst_stride_y,
2326
              uint8_t* dst_u,
2327
              int dst_stride_u,
2328
              uint8_t* dst_v,
2329
              int dst_stride_v,
2330
              int dst_width,
2331
              int dst_height,
2332
0
              enum FilterMode filtering) {
2333
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2334
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2335
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2336
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2337
0
  int r;
2338
2339
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2340
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2341
0
      dst_width <= 0 || dst_height <= 0) {
2342
0
    return -1;
2343
0
  }
2344
2345
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2346
0
                 dst_stride_y, dst_width, dst_height, filtering);
2347
0
  if (r != 0) {
2348
0
    return r;
2349
0
  }
2350
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2351
0
                 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2352
0
  if (r != 0) {
2353
0
    return r;
2354
0
  }
2355
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2356
0
                 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2357
0
  return r;
2358
0
}
2359
2360
LIBYUV_API
2361
int I420Scale_16(const uint16_t* src_y,
2362
                 int src_stride_y,
2363
                 const uint16_t* src_u,
2364
                 int src_stride_u,
2365
                 const uint16_t* src_v,
2366
                 int src_stride_v,
2367
                 int src_width,
2368
                 int src_height,
2369
                 uint16_t* dst_y,
2370
                 int dst_stride_y,
2371
                 uint16_t* dst_u,
2372
                 int dst_stride_u,
2373
                 uint16_t* dst_v,
2374
                 int dst_stride_v,
2375
                 int dst_width,
2376
                 int dst_height,
2377
0
                 enum FilterMode filtering) {
2378
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2379
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2380
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2381
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2382
0
  int r;
2383
2384
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2385
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2386
0
      dst_width <= 0 || dst_height <= 0) {
2387
0
    return -1;
2388
0
  }
2389
2390
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2391
0
                    dst_stride_y, dst_width, dst_height, filtering);
2392
0
  if (r != 0) {
2393
0
    return r;
2394
0
  }
2395
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2396
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2397
0
  if (r != 0) {
2398
0
    return r;
2399
0
  }
2400
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2401
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2402
0
  return r;
2403
0
}
2404
2405
LIBYUV_API
2406
int I420Scale_12(const uint16_t* src_y,
2407
                 int src_stride_y,
2408
                 const uint16_t* src_u,
2409
                 int src_stride_u,
2410
                 const uint16_t* src_v,
2411
                 int src_stride_v,
2412
                 int src_width,
2413
                 int src_height,
2414
                 uint16_t* dst_y,
2415
                 int dst_stride_y,
2416
                 uint16_t* dst_u,
2417
                 int dst_stride_u,
2418
                 uint16_t* dst_v,
2419
                 int dst_stride_v,
2420
                 int dst_width,
2421
                 int dst_height,
2422
0
                 enum FilterMode filtering) {
2423
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2424
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2425
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2426
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2427
0
  int r;
2428
2429
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2430
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2431
0
      dst_width <= 0 || dst_height <= 0) {
2432
0
    return -1;
2433
0
  }
2434
2435
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2436
0
                    dst_stride_y, dst_width, dst_height, filtering);
2437
0
  if (r != 0) {
2438
0
    return r;
2439
0
  }
2440
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2441
0
                    dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2442
0
  if (r != 0) {
2443
0
    return r;
2444
0
  }
2445
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2446
0
                    dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2447
0
  return r;
2448
0
}
2449
2450
// Scale an I444 image.
2451
// This function in turn calls a scaling function for each plane.
2452
2453
LIBYUV_API
2454
int I444Scale(const uint8_t* src_y,
2455
              int src_stride_y,
2456
              const uint8_t* src_u,
2457
              int src_stride_u,
2458
              const uint8_t* src_v,
2459
              int src_stride_v,
2460
              int src_width,
2461
              int src_height,
2462
              uint8_t* dst_y,
2463
              int dst_stride_y,
2464
              uint8_t* dst_u,
2465
              int dst_stride_u,
2466
              uint8_t* dst_v,
2467
              int dst_stride_v,
2468
              int dst_width,
2469
              int dst_height,
2470
0
              enum FilterMode filtering) {
2471
0
  int r;
2472
2473
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2474
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2475
0
      dst_width <= 0 || dst_height <= 0) {
2476
0
    return -1;
2477
0
  }
2478
2479
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2480
0
                 dst_stride_y, dst_width, dst_height, filtering);
2481
0
  if (r != 0) {
2482
0
    return r;
2483
0
  }
2484
0
  r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
2485
0
                 dst_stride_u, dst_width, dst_height, filtering);
2486
0
  if (r != 0) {
2487
0
    return r;
2488
0
  }
2489
0
  r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
2490
0
                 dst_stride_v, dst_width, dst_height, filtering);
2491
0
  return r;
2492
0
}
2493
2494
LIBYUV_API
2495
int I444Scale_16(const uint16_t* src_y,
2496
                 int src_stride_y,
2497
                 const uint16_t* src_u,
2498
                 int src_stride_u,
2499
                 const uint16_t* src_v,
2500
                 int src_stride_v,
2501
                 int src_width,
2502
                 int src_height,
2503
                 uint16_t* dst_y,
2504
                 int dst_stride_y,
2505
                 uint16_t* dst_u,
2506
                 int dst_stride_u,
2507
                 uint16_t* dst_v,
2508
                 int dst_stride_v,
2509
                 int dst_width,
2510
                 int dst_height,
2511
0
                 enum FilterMode filtering) {
2512
0
  int r;
2513
2514
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2515
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2516
0
      dst_width <= 0 || dst_height <= 0) {
2517
0
    return -1;
2518
0
  }
2519
2520
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2521
0
                    dst_stride_y, dst_width, dst_height, filtering);
2522
0
  if (r != 0) {
2523
0
    return r;
2524
0
  }
2525
0
  r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
2526
0
                    dst_stride_u, dst_width, dst_height, filtering);
2527
0
  if (r != 0) {
2528
0
    return r;
2529
0
  }
2530
0
  r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
2531
0
                    dst_stride_v, dst_width, dst_height, filtering);
2532
0
  return r;
2533
0
}
2534
2535
LIBYUV_API
2536
int I444Scale_12(const uint16_t* src_y,
2537
                 int src_stride_y,
2538
                 const uint16_t* src_u,
2539
                 int src_stride_u,
2540
                 const uint16_t* src_v,
2541
                 int src_stride_v,
2542
                 int src_width,
2543
                 int src_height,
2544
                 uint16_t* dst_y,
2545
                 int dst_stride_y,
2546
                 uint16_t* dst_u,
2547
                 int dst_stride_u,
2548
                 uint16_t* dst_v,
2549
                 int dst_stride_v,
2550
                 int dst_width,
2551
                 int dst_height,
2552
0
                 enum FilterMode filtering) {
2553
0
  int r;
2554
2555
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2556
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2557
0
      dst_width <= 0 || dst_height <= 0) {
2558
0
    return -1;
2559
0
  }
2560
2561
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2562
0
                    dst_stride_y, dst_width, dst_height, filtering);
2563
0
  if (r != 0) {
2564
0
    return r;
2565
0
  }
2566
0
  r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
2567
0
                    dst_stride_u, dst_width, dst_height, filtering);
2568
0
  if (r != 0) {
2569
0
    return r;
2570
0
  }
2571
0
  r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
2572
0
                    dst_stride_v, dst_width, dst_height, filtering);
2573
0
  return r;
2574
0
}
2575
2576
// Scale an I422 image.
2577
// This function in turn calls a scaling function for each plane.
2578
2579
LIBYUV_API
2580
int I422Scale(const uint8_t* src_y,
2581
              int src_stride_y,
2582
              const uint8_t* src_u,
2583
              int src_stride_u,
2584
              const uint8_t* src_v,
2585
              int src_stride_v,
2586
              int src_width,
2587
              int src_height,
2588
              uint8_t* dst_y,
2589
              int dst_stride_y,
2590
              uint8_t* dst_u,
2591
              int dst_stride_u,
2592
              uint8_t* dst_v,
2593
              int dst_stride_v,
2594
              int dst_width,
2595
              int dst_height,
2596
0
              enum FilterMode filtering) {
2597
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2598
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2599
0
  int r;
2600
2601
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2602
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2603
0
      dst_width <= 0 || dst_height <= 0) {
2604
0
    return -1;
2605
0
  }
2606
2607
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2608
0
                 dst_stride_y, dst_width, dst_height, filtering);
2609
0
  if (r != 0) {
2610
0
    return r;
2611
0
  }
2612
0
  r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2613
0
                 dst_stride_u, dst_halfwidth, dst_height, filtering);
2614
0
  if (r != 0) {
2615
0
    return r;
2616
0
  }
2617
0
  r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2618
0
                 dst_stride_v, dst_halfwidth, dst_height, filtering);
2619
0
  return r;
2620
0
}
2621
2622
LIBYUV_API
2623
int I422Scale_16(const uint16_t* src_y,
2624
                 int src_stride_y,
2625
                 const uint16_t* src_u,
2626
                 int src_stride_u,
2627
                 const uint16_t* src_v,
2628
                 int src_stride_v,
2629
                 int src_width,
2630
                 int src_height,
2631
                 uint16_t* dst_y,
2632
                 int dst_stride_y,
2633
                 uint16_t* dst_u,
2634
                 int dst_stride_u,
2635
                 uint16_t* dst_v,
2636
                 int dst_stride_v,
2637
                 int dst_width,
2638
                 int dst_height,
2639
0
                 enum FilterMode filtering) {
2640
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2641
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2642
0
  int r;
2643
2644
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2645
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2646
0
      dst_width <= 0 || dst_height <= 0) {
2647
0
    return -1;
2648
0
  }
2649
2650
0
  r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2651
0
                    dst_stride_y, dst_width, dst_height, filtering);
2652
0
  if (r != 0) {
2653
0
    return r;
2654
0
  }
2655
0
  r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2656
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2657
0
  if (r != 0) {
2658
0
    return r;
2659
0
  }
2660
0
  r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2661
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2662
0
  return r;
2663
0
}
2664
2665
LIBYUV_API
2666
int I422Scale_12(const uint16_t* src_y,
2667
                 int src_stride_y,
2668
                 const uint16_t* src_u,
2669
                 int src_stride_u,
2670
                 const uint16_t* src_v,
2671
                 int src_stride_v,
2672
                 int src_width,
2673
                 int src_height,
2674
                 uint16_t* dst_y,
2675
                 int dst_stride_y,
2676
                 uint16_t* dst_u,
2677
                 int dst_stride_u,
2678
                 uint16_t* dst_v,
2679
                 int dst_stride_v,
2680
                 int dst_width,
2681
                 int dst_height,
2682
0
                 enum FilterMode filtering) {
2683
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2684
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2685
0
  int r;
2686
2687
0
  if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2688
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2689
0
      dst_width <= 0 || dst_height <= 0) {
2690
0
    return -1;
2691
0
  }
2692
2693
0
  r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2694
0
                    dst_stride_y, dst_width, dst_height, filtering);
2695
0
  if (r != 0) {
2696
0
    return r;
2697
0
  }
2698
0
  r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2699
0
                    dst_stride_u, dst_halfwidth, dst_height, filtering);
2700
0
  if (r != 0) {
2701
0
    return r;
2702
0
  }
2703
0
  r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2704
0
                    dst_stride_v, dst_halfwidth, dst_height, filtering);
2705
0
  return r;
2706
0
}
2707
2708
// Scale an NV12 image.
2709
// This function in turn calls a scaling function for each plane.
2710
2711
LIBYUV_API
2712
int NV12Scale(const uint8_t* src_y,
2713
              int src_stride_y,
2714
              const uint8_t* src_uv,
2715
              int src_stride_uv,
2716
              int src_width,
2717
              int src_height,
2718
              uint8_t* dst_y,
2719
              int dst_stride_y,
2720
              uint8_t* dst_uv,
2721
              int dst_stride_uv,
2722
              int dst_width,
2723
              int dst_height,
2724
0
              enum FilterMode filtering) {
2725
0
  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2726
0
  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2727
0
  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2728
0
  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2729
0
  int r;
2730
2731
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2732
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2733
0
      dst_width <= 0 || dst_height <= 0) {
2734
0
    return -1;
2735
0
  }
2736
2737
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2738
0
                 dst_stride_y, dst_width, dst_height, filtering);
2739
0
  if (r != 0) {
2740
0
    return r;
2741
0
  }
2742
0
  r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
2743
0
              dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
2744
0
  return r;
2745
0
}
2746
2747
LIBYUV_API
2748
int NV24Scale(const uint8_t* src_y,
2749
              int src_stride_y,
2750
              const uint8_t* src_uv,
2751
              int src_stride_uv,
2752
              int src_width,
2753
              int src_height,
2754
              uint8_t* dst_y,
2755
              int dst_stride_y,
2756
              uint8_t* dst_uv,
2757
              int dst_stride_uv,
2758
              int dst_width,
2759
              int dst_height,
2760
0
              enum FilterMode filtering) {
2761
0
  int r;
2762
2763
0
  if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2764
0
      src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2765
0
      dst_width <= 0 || dst_height <= 0) {
2766
0
    return -1;
2767
0
  }
2768
2769
0
  r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2770
0
                 dst_stride_y, dst_width, dst_height, filtering);
2771
0
  if (r != 0) {
2772
0
    return r;
2773
0
  }
2774
0
  r = UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv,
2775
0
              dst_stride_uv, dst_width, dst_height, filtering);
2776
0
  return r;
2777
0
}
2778
2779
// Deprecated api
2780
LIBYUV_API
2781
int Scale(const uint8_t* src_y,
2782
          const uint8_t* src_u,
2783
          const uint8_t* src_v,
2784
          int src_stride_y,
2785
          int src_stride_u,
2786
          int src_stride_v,
2787
          int src_width,
2788
          int src_height,
2789
          uint8_t* dst_y,
2790
          uint8_t* dst_u,
2791
          uint8_t* dst_v,
2792
          int dst_stride_y,
2793
          int dst_stride_u,
2794
          int dst_stride_v,
2795
          int dst_width,
2796
          int dst_height,
2797
0
          LIBYUV_BOOL interpolate) {
2798
0
  return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
2799
0
                   src_stride_v, src_width, src_height, dst_y, dst_stride_y,
2800
0
                   dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
2801
0
                   dst_height, interpolate ? kFilterBox : kFilterNone);
2802
0
}
2803
2804
#ifdef __cplusplus
2805
}  // extern "C"
2806
}  // namespace libyuv
2807
#endif