Coverage Report

Created: 2025-07-09 07:14

/src/libavif/ext/libyuv/source/scale_common.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyARGB
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
21
#ifdef __cplusplus
22
namespace libyuv {
23
extern "C" {
24
#endif
25
26
#ifdef __cplusplus
27
0
#define STATIC_CAST(type, expr) static_cast<type>(expr)
28
#else
29
#define STATIC_CAST(type, expr) (type)(expr)
30
#endif
31
32
// TODO(fbarchard): make clamp255 preserve negative values.
33
0
static __inline int32_t clamp255(int32_t v) {
34
0
  return (-(v >= 255) | v) & 255;
35
0
}
36
37
// Use scale to convert lsb formats to msb, depending how many bits there are:
38
// 32768 = 9 bits
39
// 16384 = 10 bits
40
// 4096 = 12 bits
41
// 256 = 16 bits
42
// TODO(fbarchard): change scale to bits
43
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
44
45
40.3k
static __inline int Abs(int v) {
46
40.3k
  return v >= 0 ? v : -v;
47
40.3k
}
48
49
// CPU agnostic row functions
50
void ScaleRowDown2_C(const uint8_t* src_ptr,
51
                     ptrdiff_t src_stride,
52
                     uint8_t* dst,
53
0
                     int dst_width) {
54
0
  int x;
55
0
  (void)src_stride;
56
0
  for (x = 0; x < dst_width - 1; x += 2) {
57
0
    dst[0] = src_ptr[1];
58
0
    dst[1] = src_ptr[3];
59
0
    dst += 2;
60
0
    src_ptr += 4;
61
0
  }
62
0
  if (dst_width & 1) {
63
0
    dst[0] = src_ptr[1];
64
0
  }
65
0
}
66
67
void ScaleRowDown2_16_C(const uint16_t* src_ptr,
68
                        ptrdiff_t src_stride,
69
                        uint16_t* dst,
70
0
                        int dst_width) {
71
0
  int x;
72
0
  (void)src_stride;
73
0
  for (x = 0; x < dst_width - 1; x += 2) {
74
0
    dst[0] = src_ptr[1];
75
0
    dst[1] = src_ptr[3];
76
0
    dst += 2;
77
0
    src_ptr += 4;
78
0
  }
79
0
  if (dst_width & 1) {
80
0
    dst[0] = src_ptr[1];
81
0
  }
82
0
}
83
84
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
85
                           ptrdiff_t src_stride,
86
                           uint8_t* dst,
87
                           int dst_width,
88
0
                           int scale) {
89
0
  int x;
90
0
  (void)src_stride;
91
0
  assert(scale >= 256);
92
0
  assert(scale <= 32768);
93
0
  for (x = 0; x < dst_width - 1; x += 2) {
94
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
95
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
96
0
    dst += 2;
97
0
    src_ptr += 4;
98
0
  }
99
0
  if (dst_width & 1) {
100
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
101
0
  }
102
0
}
103
104
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
105
                               ptrdiff_t src_stride,
106
                               uint8_t* dst,
107
                               int dst_width,
108
0
                               int scale) {
109
0
  int x;
110
0
  (void)src_stride;
111
0
  assert(scale >= 256);
112
0
  assert(scale <= 32768);
113
0
  dst_width -= 1;
114
0
  for (x = 0; x < dst_width - 1; x += 2) {
115
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
116
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
117
0
    dst += 2;
118
0
    src_ptr += 4;
119
0
  }
120
0
  if (dst_width & 1) {
121
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
122
0
    dst += 1;
123
0
    src_ptr += 2;
124
0
  }
125
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
126
0
}
127
128
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
129
                           ptrdiff_t src_stride,
130
                           uint8_t* dst,
131
0
                           int dst_width) {
132
0
  const uint8_t* s = src_ptr;
133
0
  int x;
134
0
  (void)src_stride;
135
0
  for (x = 0; x < dst_width - 1; x += 2) {
136
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
137
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
138
0
    dst += 2;
139
0
    s += 4;
140
0
  }
141
0
  if (dst_width & 1) {
142
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
143
0
  }
144
0
}
145
146
void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
147
                              ptrdiff_t src_stride,
148
                              uint16_t* dst,
149
0
                              int dst_width) {
150
0
  const uint16_t* s = src_ptr;
151
0
  int x;
152
0
  (void)src_stride;
153
0
  for (x = 0; x < dst_width - 1; x += 2) {
154
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
155
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
156
0
    dst += 2;
157
0
    s += 4;
158
0
  }
159
0
  if (dst_width & 1) {
160
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
161
0
  }
162
0
}
163
164
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
165
                                 ptrdiff_t src_stride,
166
                                 uint8_t* dst,
167
                                 int dst_width,
168
0
                                 int scale) {
169
0
  const uint16_t* s = src_ptr;
170
0
  int x;
171
0
  (void)src_stride;
172
0
  assert(scale >= 256);
173
0
  assert(scale <= 32768);
174
0
  for (x = 0; x < dst_width - 1; x += 2) {
175
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
176
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
177
0
    dst += 2;
178
0
    s += 4;
179
0
  }
180
0
  if (dst_width & 1) {
181
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
182
0
  }
183
0
}
184
185
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
186
                                     ptrdiff_t src_stride,
187
                                     uint8_t* dst,
188
                                     int dst_width,
189
0
                                     int scale) {
190
0
  const uint16_t* s = src_ptr;
191
0
  int x;
192
0
  (void)src_stride;
193
0
  assert(scale >= 256);
194
0
  assert(scale <= 32768);
195
0
  dst_width -= 1;
196
0
  for (x = 0; x < dst_width - 1; x += 2) {
197
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
198
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
199
0
    dst += 2;
200
0
    s += 4;
201
0
  }
202
0
  if (dst_width & 1) {
203
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
204
0
    dst += 1;
205
0
    s += 2;
206
0
  }
207
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
208
0
}
209
210
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
211
                        ptrdiff_t src_stride,
212
                        uint8_t* dst,
213
1.61k
                        int dst_width) {
214
1.61k
  const uint8_t* s = src_ptr;
215
1.61k
  const uint8_t* t = src_ptr + src_stride;
216
1.61k
  int x;
217
16.3k
  for (x = 0; x < dst_width - 1; x += 2) {
218
14.6k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
219
14.6k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
220
14.6k
    dst += 2;
221
14.6k
    s += 4;
222
14.6k
    t += 4;
223
14.6k
  }
224
1.61k
  if (dst_width & 1) {
225
349
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
226
349
  }
227
1.61k
}
228
229
void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
230
                            ptrdiff_t src_stride,
231
                            uint8_t* dst,
232
0
                            int dst_width) {
233
0
  const uint8_t* s = src_ptr;
234
0
  const uint8_t* t = src_ptr + src_stride;
235
0
  int x;
236
0
  dst_width -= 1;
237
0
  for (x = 0; x < dst_width - 1; x += 2) {
238
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
239
0
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
240
0
    dst += 2;
241
0
    s += 4;
242
0
    t += 4;
243
0
  }
244
0
  if (dst_width & 1) {
245
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
246
0
    dst += 1;
247
0
    s += 2;
248
0
    t += 2;
249
0
  }
250
0
  dst[0] = (s[0] + t[0] + 1) >> 1;
251
0
}
252
253
void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
254
                           ptrdiff_t src_stride,
255
                           uint16_t* dst,
256
2.28k
                           int dst_width) {
257
2.28k
  const uint16_t* s = src_ptr;
258
2.28k
  const uint16_t* t = src_ptr + src_stride;
259
2.28k
  int x;
260
30.0k
  for (x = 0; x < dst_width - 1; x += 2) {
261
27.7k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
262
27.7k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
263
27.7k
    dst += 2;
264
27.7k
    s += 4;
265
27.7k
    t += 4;
266
27.7k
  }
267
2.28k
  if (dst_width & 1) {
268
211
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
269
211
  }
270
2.28k
}
271
272
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
273
                              ptrdiff_t src_stride,
274
                              uint8_t* dst,
275
                              int dst_width,
276
0
                              int scale) {
277
0
  const uint16_t* s = src_ptr;
278
0
  const uint16_t* t = src_ptr + src_stride;
279
0
  int x;
280
0
  assert(scale >= 256);
281
0
  assert(scale <= 32768);
282
0
  for (x = 0; x < dst_width - 1; x += 2) {
283
0
    dst[0] = STATIC_CAST(uint8_t,
284
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
285
0
    dst[1] = STATIC_CAST(uint8_t,
286
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
287
0
    dst += 2;
288
0
    s += 4;
289
0
    t += 4;
290
0
  }
291
0
  if (dst_width & 1) {
292
0
    dst[0] = STATIC_CAST(uint8_t,
293
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
294
0
  }
295
0
}
296
297
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
298
                                  ptrdiff_t src_stride,
299
                                  uint8_t* dst,
300
                                  int dst_width,
301
0
                                  int scale) {
302
0
  const uint16_t* s = src_ptr;
303
0
  const uint16_t* t = src_ptr + src_stride;
304
0
  int x;
305
0
  assert(scale >= 256);
306
0
  assert(scale <= 32768);
307
0
  dst_width -= 1;
308
0
  for (x = 0; x < dst_width - 1; x += 2) {
309
0
    dst[0] = STATIC_CAST(uint8_t,
310
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
311
0
    dst[1] = STATIC_CAST(uint8_t,
312
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
313
0
    dst += 2;
314
0
    s += 4;
315
0
    t += 4;
316
0
  }
317
0
  if (dst_width & 1) {
318
0
    dst[0] = STATIC_CAST(uint8_t,
319
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
320
0
    dst += 1;
321
0
    s += 2;
322
0
    t += 2;
323
0
  }
324
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
325
0
}
326
327
void ScaleRowDown4_C(const uint8_t* src_ptr,
328
                     ptrdiff_t src_stride,
329
                     uint8_t* dst,
330
0
                     int dst_width) {
331
0
  int x;
332
0
  (void)src_stride;
333
0
  for (x = 0; x < dst_width - 1; x += 2) {
334
0
    dst[0] = src_ptr[2];
335
0
    dst[1] = src_ptr[6];
336
0
    dst += 2;
337
0
    src_ptr += 8;
338
0
  }
339
0
  if (dst_width & 1) {
340
0
    dst[0] = src_ptr[2];
341
0
  }
342
0
}
343
344
void ScaleRowDown4_16_C(const uint16_t* src_ptr,
345
                        ptrdiff_t src_stride,
346
                        uint16_t* dst,
347
0
                        int dst_width) {
348
0
  int x;
349
0
  (void)src_stride;
350
0
  for (x = 0; x < dst_width - 1; x += 2) {
351
0
    dst[0] = src_ptr[2];
352
0
    dst[1] = src_ptr[6];
353
0
    dst += 2;
354
0
    src_ptr += 8;
355
0
  }
356
0
  if (dst_width & 1) {
357
0
    dst[0] = src_ptr[2];
358
0
  }
359
0
}
360
361
void ScaleRowDown4Box_C(const uint8_t* src_ptr,
362
                        ptrdiff_t src_stride,
363
                        uint8_t* dst,
364
454
                        int dst_width) {
365
454
  intptr_t stride = src_stride;
366
454
  int x;
367
2.26k
  for (x = 0; x < dst_width - 1; x += 2) {
368
1.81k
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
369
1.81k
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
370
1.81k
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
371
1.81k
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
372
1.81k
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
373
1.81k
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
374
1.81k
              src_ptr[stride * 3 + 3] + 8) >>
375
1.81k
             4;
376
1.81k
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
377
1.81k
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
378
1.81k
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
379
1.81k
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
380
1.81k
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
381
1.81k
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
382
1.81k
              src_ptr[stride * 3 + 7] + 8) >>
383
1.81k
             4;
384
1.81k
    dst += 2;
385
1.81k
    src_ptr += 8;
386
1.81k
  }
387
454
  if (dst_width & 1) {
388
205
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
389
205
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
390
205
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
391
205
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
392
205
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
393
205
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
394
205
              src_ptr[stride * 3 + 3] + 8) >>
395
205
             4;
396
205
  }
397
454
}
398
399
void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
400
                           ptrdiff_t src_stride,
401
                           uint16_t* dst,
402
268
                           int dst_width) {
403
268
  intptr_t stride = src_stride;
404
268
  int x;
405
10.4k
  for (x = 0; x < dst_width - 1; x += 2) {
406
10.2k
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
407
10.2k
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
408
10.2k
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
409
10.2k
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
410
10.2k
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
411
10.2k
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
412
10.2k
              src_ptr[stride * 3 + 3] + 8) >>
413
10.2k
             4;
414
10.2k
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
415
10.2k
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
416
10.2k
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
417
10.2k
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
418
10.2k
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
419
10.2k
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
420
10.2k
              src_ptr[stride * 3 + 7] + 8) >>
421
10.2k
             4;
422
10.2k
    dst += 2;
423
10.2k
    src_ptr += 8;
424
10.2k
  }
425
268
  if (dst_width & 1) {
426
18
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
427
18
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
428
18
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
429
18
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
430
18
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
431
18
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
432
18
              src_ptr[stride * 3 + 3] + 8) >>
433
18
             4;
434
18
  }
435
268
}
436
437
void ScaleRowDown34_C(const uint8_t* src_ptr,
438
                      ptrdiff_t src_stride,
439
                      uint8_t* dst,
440
0
                      int dst_width) {
441
0
  int x;
442
0
  (void)src_stride;
443
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
444
0
  for (x = 0; x < dst_width; x += 3) {
445
0
    dst[0] = src_ptr[0];
446
0
    dst[1] = src_ptr[1];
447
0
    dst[2] = src_ptr[3];
448
0
    dst += 3;
449
0
    src_ptr += 4;
450
0
  }
451
0
}
452
453
void ScaleRowDown34_16_C(const uint16_t* src_ptr,
454
                         ptrdiff_t src_stride,
455
                         uint16_t* dst,
456
0
                         int dst_width) {
457
0
  int x;
458
0
  (void)src_stride;
459
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
460
0
  for (x = 0; x < dst_width; x += 3) {
461
0
    dst[0] = src_ptr[0];
462
0
    dst[1] = src_ptr[1];
463
0
    dst[2] = src_ptr[3];
464
0
    dst += 3;
465
0
    src_ptr += 4;
466
0
  }
467
0
}
468
469
// Filter rows 0 and 1 together, 3 : 1
470
void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
471
                            ptrdiff_t src_stride,
472
                            uint8_t* d,
473
1.18k
                            int dst_width) {
474
1.18k
  const uint8_t* s = src_ptr;
475
1.18k
  const uint8_t* t = src_ptr + src_stride;
476
1.18k
  int x;
477
1.18k
  assert((dst_width % 3 == 0) && (dst_width > 0));
478
3.49k
  for (x = 0; x < dst_width; x += 3) {
479
2.31k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
480
2.31k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
481
2.31k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
482
2.31k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
483
2.31k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
484
2.31k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
485
2.31k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
486
2.31k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
487
2.31k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
488
2.31k
    d += 3;
489
2.31k
    s += 4;
490
2.31k
    t += 4;
491
2.31k
  }
492
1.18k
}
493
494
void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
495
                               ptrdiff_t src_stride,
496
                               uint16_t* d,
497
1.18k
                               int dst_width) {
498
1.18k
  const uint16_t* s = src_ptr;
499
1.18k
  const uint16_t* t = src_ptr + src_stride;
500
1.18k
  int x;
501
1.18k
  assert((dst_width % 3 == 0) && (dst_width > 0));
502
3.49k
  for (x = 0; x < dst_width; x += 3) {
503
2.31k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
504
2.31k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
505
2.31k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
506
2.31k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
507
2.31k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
508
2.31k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
509
2.31k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
510
2.31k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
511
2.31k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
512
2.31k
    d += 3;
513
2.31k
    s += 4;
514
2.31k
    t += 4;
515
2.31k
  }
516
1.18k
}
517
518
// Filter rows 1 and 2 together, 1 : 1
519
void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
520
                            ptrdiff_t src_stride,
521
                            uint8_t* d,
522
591
                            int dst_width) {
523
591
  const uint8_t* s = src_ptr;
524
591
  const uint8_t* t = src_ptr + src_stride;
525
591
  int x;
526
591
  assert((dst_width % 3 == 0) && (dst_width > 0));
527
1.74k
  for (x = 0; x < dst_width; x += 3) {
528
1.15k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
529
1.15k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
530
1.15k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
531
1.15k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
532
1.15k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
533
1.15k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
534
1.15k
    d[0] = (a0 + b0 + 1) >> 1;
535
1.15k
    d[1] = (a1 + b1 + 1) >> 1;
536
1.15k
    d[2] = (a2 + b2 + 1) >> 1;
537
1.15k
    d += 3;
538
1.15k
    s += 4;
539
1.15k
    t += 4;
540
1.15k
  }
541
591
}
542
543
void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
544
                               ptrdiff_t src_stride,
545
                               uint16_t* d,
546
591
                               int dst_width) {
547
591
  const uint16_t* s = src_ptr;
548
591
  const uint16_t* t = src_ptr + src_stride;
549
591
  int x;
550
591
  assert((dst_width % 3 == 0) && (dst_width > 0));
551
1.74k
  for (x = 0; x < dst_width; x += 3) {
552
1.15k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
553
1.15k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
554
1.15k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
555
1.15k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
556
1.15k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
557
1.15k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
558
1.15k
    d[0] = (a0 + b0 + 1) >> 1;
559
1.15k
    d[1] = (a1 + b1 + 1) >> 1;
560
1.15k
    d[2] = (a2 + b2 + 1) >> 1;
561
1.15k
    d += 3;
562
1.15k
    s += 4;
563
1.15k
    t += 4;
564
1.15k
  }
565
591
}
566
567
// Sample position: (O is src sample position, X is dst sample position)
568
//
569
//      v dst_ptr at here           v stop at here
570
//  X O X   X O X   X O X   X O X   X O X
571
//    ^ src_ptr at here
572
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
573
                          uint8_t* dst_ptr,
574
128k
                          int dst_width) {
575
128k
  int src_width = dst_width >> 1;
576
128k
  int x;
577
128k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
578
1.01M
  for (x = 0; x < src_width; ++x) {
579
883k
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
580
883k
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
581
883k
  }
582
128k
}
583
584
// Sample position: (O is src sample position, X is dst sample position)
585
//
586
//    src_ptr at here
587
//  X v X   X   X   X   X   X   X   X   X
588
//    O       O       O       O       O
589
//  X   X   X   X   X   X   X   X   X   X
590
//      ^ dst_ptr at here           ^ stop at here
591
//  X   X   X   X   X   X   X   X   X   X
592
//    O       O       O       O       O
593
//  X   X   X   X   X   X   X   X   X   X
594
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
595
                            ptrdiff_t src_stride,
596
                            uint8_t* dst_ptr,
597
                            ptrdiff_t dst_stride,
598
18.8k
                            int dst_width) {
599
18.8k
  const uint8_t* s = src_ptr;
600
18.8k
  const uint8_t* t = src_ptr + src_stride;
601
18.8k
  uint8_t* d = dst_ptr;
602
18.8k
  uint8_t* e = dst_ptr + dst_stride;
603
18.8k
  int src_width = dst_width >> 1;
604
18.8k
  int x;
605
18.8k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
606
264k
  for (x = 0; x < src_width; ++x) {
607
246k
    d[2 * x + 0] =
608
246k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
609
246k
    d[2 * x + 1] =
610
246k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
611
246k
    e[2 * x + 0] =
612
246k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
613
246k
    e[2 * x + 1] =
614
246k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
615
246k
  }
616
18.8k
}
617
618
// Only suitable for at most 14 bit range.
619
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
620
                             uint16_t* dst_ptr,
621
200k
                             int dst_width) {
622
200k
  int src_width = dst_width >> 1;
623
200k
  int x;
624
200k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
625
1.03M
  for (x = 0; x < src_width; ++x) {
626
838k
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
627
838k
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
628
838k
  }
629
200k
}
630
631
// Only suitable for at most 12bit range.
632
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
633
                               ptrdiff_t src_stride,
634
                               uint16_t* dst_ptr,
635
                               ptrdiff_t dst_stride,
636
17.2k
                               int dst_width) {
637
17.2k
  const uint16_t* s = src_ptr;
638
17.2k
  const uint16_t* t = src_ptr + src_stride;
639
17.2k
  uint16_t* d = dst_ptr;
640
17.2k
  uint16_t* e = dst_ptr + dst_stride;
641
17.2k
  int src_width = dst_width >> 1;
642
17.2k
  int x;
643
17.2k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
644
126k
  for (x = 0; x < src_width; ++x) {
645
109k
    d[2 * x + 0] =
646
109k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
647
109k
    d[2 * x + 1] =
648
109k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
649
109k
    e[2 * x + 0] =
650
109k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
651
109k
    e[2 * x + 1] =
652
109k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
653
109k
  }
654
17.2k
}
655
656
// Scales a single row of pixels using point sampling.
657
void ScaleCols_C(uint8_t* dst_ptr,
658
                 const uint8_t* src_ptr,
659
                 int dst_width,
660
                 int x,
661
520k
                 int dx) {
662
520k
  int j;
663
166M
  for (j = 0; j < dst_width - 1; j += 2) {
664
166M
    dst_ptr[0] = src_ptr[x >> 16];
665
166M
    x += dx;
666
166M
    dst_ptr[1] = src_ptr[x >> 16];
667
166M
    x += dx;
668
166M
    dst_ptr += 2;
669
166M
  }
670
520k
  if (dst_width & 1) {
671
341k
    dst_ptr[0] = src_ptr[x >> 16];
672
341k
  }
673
520k
}
674
675
void ScaleCols_16_C(uint16_t* dst_ptr,
676
                    const uint16_t* src_ptr,
677
                    int dst_width,
678
                    int x,
679
528k
                    int dx) {
680
528k
  int j;
681
275M
  for (j = 0; j < dst_width - 1; j += 2) {
682
274M
    dst_ptr[0] = src_ptr[x >> 16];
683
274M
    x += dx;
684
274M
    dst_ptr[1] = src_ptr[x >> 16];
685
274M
    x += dx;
686
274M
    dst_ptr += 2;
687
274M
  }
688
528k
  if (dst_width & 1) {
689
388k
    dst_ptr[0] = src_ptr[x >> 16];
690
388k
  }
691
528k
}
692
693
// Scales a single row of pixels up by 2x using point sampling.
694
void ScaleColsUp2_C(uint8_t* dst_ptr,
695
                    const uint8_t* src_ptr,
696
                    int dst_width,
697
                    int x,
698
51.9k
                    int dx) {
699
51.9k
  int j;
700
51.9k
  (void)x;
701
51.9k
  (void)dx;
702
103k
  for (j = 0; j < dst_width - 1; j += 2) {
703
51.9k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
704
51.9k
    src_ptr += 1;
705
51.9k
    dst_ptr += 2;
706
51.9k
  }
707
51.9k
  if (dst_width & 1) {
708
0
    dst_ptr[0] = src_ptr[0];
709
0
  }
710
51.9k
}
711
712
void ScaleColsUp2_16_C(uint16_t* dst_ptr,
713
                       const uint16_t* src_ptr,
714
                       int dst_width,
715
                       int x,
716
7.16k
                       int dx) {
717
7.16k
  int j;
718
7.16k
  (void)x;
719
7.16k
  (void)dx;
720
14.3k
  for (j = 0; j < dst_width - 1; j += 2) {
721
7.16k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
722
7.16k
    src_ptr += 1;
723
7.16k
    dst_ptr += 2;
724
7.16k
  }
725
7.16k
  if (dst_width & 1) {
726
0
    dst_ptr[0] = src_ptr[0];
727
0
  }
728
7.16k
}
729
730
// (1-f)a + fb can be replaced with a + f(b-a)
731
#if defined(__arm__) || defined(__aarch64__)
732
#define BLENDER(a, b, f) \
733
  (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
734
#else
735
// Intel uses 7 bit math with rounding.
736
#define BLENDER(a, b, f) \
737
0
  (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
738
#endif
739
740
void ScaleFilterCols_C(uint8_t* dst_ptr,
741
                       const uint8_t* src_ptr,
742
                       int dst_width,
743
                       int x,
744
0
                       int dx) {
745
0
  int j;
746
0
  for (j = 0; j < dst_width - 1; j += 2) {
747
0
    int xi = x >> 16;
748
0
    int a = src_ptr[xi];
749
0
    int b = src_ptr[xi + 1];
750
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
751
0
    x += dx;
752
0
    xi = x >> 16;
753
0
    a = src_ptr[xi];
754
0
    b = src_ptr[xi + 1];
755
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
756
0
    x += dx;
757
0
    dst_ptr += 2;
758
0
  }
759
0
  if (dst_width & 1) {
760
0
    int xi = x >> 16;
761
0
    int a = src_ptr[xi];
762
0
    int b = src_ptr[xi + 1];
763
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
764
0
  }
765
0
}
766
767
void ScaleFilterCols64_C(uint8_t* dst_ptr,
768
                         const uint8_t* src_ptr,
769
                         int dst_width,
770
                         int x32,
771
0
                         int dx) {
772
0
  int64_t x = (int64_t)(x32);
773
0
  int j;
774
0
  for (j = 0; j < dst_width - 1; j += 2) {
775
0
    int64_t xi = x >> 16;
776
0
    int a = src_ptr[xi];
777
0
    int b = src_ptr[xi + 1];
778
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
779
0
    x += dx;
780
0
    xi = x >> 16;
781
0
    a = src_ptr[xi];
782
0
    b = src_ptr[xi + 1];
783
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
784
0
    x += dx;
785
0
    dst_ptr += 2;
786
0
  }
787
0
  if (dst_width & 1) {
788
0
    int64_t xi = x >> 16;
789
0
    int a = src_ptr[xi];
790
0
    int b = src_ptr[xi + 1];
791
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
792
0
  }
793
0
}
794
#undef BLENDER
795
796
// Same as 8 bit arm blender but return is cast to uint16_t
797
#define BLENDER(a, b, f) \
798
70.7M
  (uint16_t)(            \
799
70.7M
      (int)(a) +         \
800
70.7M
      (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
801
802
void ScaleFilterCols_16_C(uint16_t* dst_ptr,
803
                          const uint16_t* src_ptr,
804
                          int dst_width,
805
                          int x,
806
548k
                          int dx) {
807
548k
  int j;
808
35.8M
  for (j = 0; j < dst_width - 1; j += 2) {
809
35.2M
    int xi = x >> 16;
810
35.2M
    int a = src_ptr[xi];
811
35.2M
    int b = src_ptr[xi + 1];
812
35.2M
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
813
35.2M
    x += dx;
814
35.2M
    xi = x >> 16;
815
35.2M
    a = src_ptr[xi];
816
35.2M
    b = src_ptr[xi + 1];
817
35.2M
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
818
35.2M
    x += dx;
819
35.2M
    dst_ptr += 2;
820
35.2M
  }
821
548k
  if (dst_width & 1) {
822
153k
    int xi = x >> 16;
823
153k
    int a = src_ptr[xi];
824
153k
    int b = src_ptr[xi + 1];
825
153k
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
826
153k
  }
827
548k
}
828
829
void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
830
                            const uint16_t* src_ptr,
831
                            int dst_width,
832
                            int x32,
833
0
                            int dx) {
834
0
  int64_t x = (int64_t)(x32);
835
0
  int j;
836
0
  for (j = 0; j < dst_width - 1; j += 2) {
837
0
    int64_t xi = x >> 16;
838
0
    int a = src_ptr[xi];
839
0
    int b = src_ptr[xi + 1];
840
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
841
0
    x += dx;
842
0
    xi = x >> 16;
843
0
    a = src_ptr[xi];
844
0
    b = src_ptr[xi + 1];
845
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
846
0
    x += dx;
847
0
    dst_ptr += 2;
848
0
  }
849
0
  if (dst_width & 1) {
850
0
    int64_t xi = x >> 16;
851
0
    int a = src_ptr[xi];
852
0
    int b = src_ptr[xi + 1];
853
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
854
0
  }
855
0
}
856
#undef BLENDER
857
858
void ScaleRowDown38_C(const uint8_t* src_ptr,
859
                      ptrdiff_t src_stride,
860
                      uint8_t* dst,
861
0
                      int dst_width) {
862
0
  int x;
863
0
  (void)src_stride;
864
0
  assert(dst_width % 3 == 0);
865
0
  for (x = 0; x < dst_width; x += 3) {
866
0
    dst[0] = src_ptr[0];
867
0
    dst[1] = src_ptr[3];
868
0
    dst[2] = src_ptr[6];
869
0
    dst += 3;
870
0
    src_ptr += 8;
871
0
  }
872
0
}
873
874
void ScaleRowDown38_16_C(const uint16_t* src_ptr,
875
                         ptrdiff_t src_stride,
876
                         uint16_t* dst,
877
0
                         int dst_width) {
878
0
  int x;
879
0
  (void)src_stride;
880
0
  assert(dst_width % 3 == 0);
881
0
  for (x = 0; x < dst_width; x += 3) {
882
0
    dst[0] = src_ptr[0];
883
0
    dst[1] = src_ptr[3];
884
0
    dst[2] = src_ptr[6];
885
0
    dst += 3;
886
0
    src_ptr += 8;
887
0
  }
888
0
}
889
890
// 8x3 -> 3x1
891
void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
892
                            ptrdiff_t src_stride,
893
                            uint8_t* dst_ptr,
894
470
                            int dst_width) {
895
470
  intptr_t stride = src_stride;
896
470
  int i;
897
470
  assert((dst_width % 3 == 0) && (dst_width > 0));
898
940
  for (i = 0; i < dst_width; i += 3) {
899
470
    dst_ptr[0] =
900
470
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
901
470
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
902
470
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
903
470
            (65536 / 9) >>
904
470
        16;
905
470
    dst_ptr[1] =
906
470
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
907
470
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
908
470
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
909
470
            (65536 / 9) >>
910
470
        16;
911
470
    dst_ptr[2] =
912
470
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
913
470
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
914
470
            (65536 / 6) >>
915
470
        16;
916
470
    src_ptr += 8;
917
470
    dst_ptr += 3;
918
470
  }
919
470
}
920
921
void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
922
                               ptrdiff_t src_stride,
923
                               uint16_t* dst_ptr,
924
420
                               int dst_width) {
925
420
  intptr_t stride = src_stride;
926
420
  int i;
927
420
  assert((dst_width % 3 == 0) && (dst_width > 0));
928
840
  for (i = 0; i < dst_width; i += 3) {
929
420
    dst_ptr[0] =
930
420
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
931
420
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
932
420
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
933
420
            (65536u / 9u) >>
934
420
        16;
935
420
    dst_ptr[1] =
936
420
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
937
420
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
938
420
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
939
420
            (65536u / 9u) >>
940
420
        16;
941
420
    dst_ptr[2] =
942
420
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
943
420
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
944
420
            (65536u / 6u) >>
945
420
        16;
946
420
    src_ptr += 8;
947
420
    dst_ptr += 3;
948
420
  }
949
420
}
950
951
// 8x2 -> 3x1
952
void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
953
                            ptrdiff_t src_stride,
954
                            uint8_t* dst_ptr,
955
235
                            int dst_width) {
956
235
  intptr_t stride = src_stride;
957
235
  int i;
958
235
  assert((dst_width % 3 == 0) && (dst_width > 0));
959
470
  for (i = 0; i < dst_width; i += 3) {
960
235
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
961
235
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
962
235
                     (65536 / 6) >>
963
235
                 16;
964
235
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
965
235
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
966
235
                     (65536 / 6) >>
967
235
                 16;
968
235
    dst_ptr[2] =
969
235
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
970
235
            (65536 / 4) >>
971
235
        16;
972
235
    src_ptr += 8;
973
235
    dst_ptr += 3;
974
235
  }
975
235
}
976
977
void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
978
                               ptrdiff_t src_stride,
979
                               uint16_t* dst_ptr,
980
210
                               int dst_width) {
981
210
  intptr_t stride = src_stride;
982
210
  int i;
983
210
  assert((dst_width % 3 == 0) && (dst_width > 0));
984
420
  for (i = 0; i < dst_width; i += 3) {
985
210
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
986
210
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
987
210
                     (65536u / 6u) >>
988
210
                 16;
989
210
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
990
210
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
991
210
                     (65536u / 6u) >>
992
210
                 16;
993
210
    dst_ptr[2] =
994
210
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
995
210
            (65536u / 4u) >>
996
210
        16;
997
210
    src_ptr += 8;
998
210
    dst_ptr += 3;
999
210
  }
1000
210
}
1001
1002
693k
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
1003
693k
  int x;
1004
693k
  assert(src_width > 0);
1005
7.04M
  for (x = 0; x < src_width - 1; x += 2) {
1006
6.34M
    dst_ptr[0] += src_ptr[0];
1007
6.34M
    dst_ptr[1] += src_ptr[1];
1008
6.34M
    src_ptr += 2;
1009
6.34M
    dst_ptr += 2;
1010
6.34M
  }
1011
693k
  if (src_width & 1) {
1012
71.4k
    dst_ptr[0] += src_ptr[0];
1013
71.4k
  }
1014
693k
}
1015
1016
void ScaleAddRow_16_C(const uint16_t* src_ptr,
1017
                      uint32_t* dst_ptr,
1018
620k
                      int src_width) {
1019
620k
  int x;
1020
620k
  assert(src_width > 0);
1021
44.8M
  for (x = 0; x < src_width - 1; x += 2) {
1022
44.2M
    dst_ptr[0] += src_ptr[0];
1023
44.2M
    dst_ptr[1] += src_ptr[1];
1024
44.2M
    src_ptr += 2;
1025
44.2M
    dst_ptr += 2;
1026
44.2M
  }
1027
620k
  if (src_width & 1) {
1028
253k
    dst_ptr[0] += src_ptr[0];
1029
253k
  }
1030
620k
}
1031
1032
// ARGB scale row functions
1033
1034
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
1035
                         ptrdiff_t src_stride,
1036
                         uint8_t* dst_argb,
1037
0
                         int dst_width) {
1038
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1039
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1040
0
  int x;
1041
0
  (void)src_stride;
1042
0
  for (x = 0; x < dst_width - 1; x += 2) {
1043
0
    dst[0] = src[1];
1044
0
    dst[1] = src[3];
1045
0
    src += 4;
1046
0
    dst += 2;
1047
0
  }
1048
0
  if (dst_width & 1) {
1049
0
    dst[0] = src[1];
1050
0
  }
1051
0
}
1052
1053
void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
1054
                               ptrdiff_t src_stride,
1055
                               uint8_t* dst_argb,
1056
0
                               int dst_width) {
1057
0
  int x;
1058
0
  (void)src_stride;
1059
0
  for (x = 0; x < dst_width; ++x) {
1060
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
1061
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
1062
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
1063
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
1064
0
    src_argb += 8;
1065
0
    dst_argb += 4;
1066
0
  }
1067
0
}
1068
1069
void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
1070
                            ptrdiff_t src_stride,
1071
                            uint8_t* dst_argb,
1072
0
                            int dst_width) {
1073
0
  int x;
1074
0
  for (x = 0; x < dst_width; ++x) {
1075
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1076
0
                   src_argb[src_stride + 4] + 2) >>
1077
0
                  2;
1078
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1079
0
                   src_argb[src_stride + 5] + 2) >>
1080
0
                  2;
1081
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1082
0
                   src_argb[src_stride + 6] + 2) >>
1083
0
                  2;
1084
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1085
0
                   src_argb[src_stride + 7] + 2) >>
1086
0
                  2;
1087
0
    src_argb += 8;
1088
0
    dst_argb += 4;
1089
0
  }
1090
0
}
1091
1092
void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
1093
                            ptrdiff_t src_stride,
1094
                            int src_stepx,
1095
                            uint8_t* dst_argb,
1096
0
                            int dst_width) {
1097
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1098
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1099
0
  (void)src_stride;
1100
0
  int x;
1101
0
  for (x = 0; x < dst_width - 1; x += 2) {
1102
0
    dst[0] = src[0];
1103
0
    dst[1] = src[src_stepx];
1104
0
    src += src_stepx * 2;
1105
0
    dst += 2;
1106
0
  }
1107
0
  if (dst_width & 1) {
1108
0
    dst[0] = src[0];
1109
0
  }
1110
0
}
1111
1112
void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
1113
                               ptrdiff_t src_stride,
1114
                               int src_stepx,
1115
                               uint8_t* dst_argb,
1116
0
                               int dst_width) {
1117
0
  int x;
1118
0
  for (x = 0; x < dst_width; ++x) {
1119
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1120
0
                   src_argb[src_stride + 4] + 2) >>
1121
0
                  2;
1122
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1123
0
                   src_argb[src_stride + 5] + 2) >>
1124
0
                  2;
1125
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1126
0
                   src_argb[src_stride + 6] + 2) >>
1127
0
                  2;
1128
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1129
0
                   src_argb[src_stride + 7] + 2) >>
1130
0
                  2;
1131
0
    src_argb += src_stepx * 4;
1132
0
    dst_argb += 4;
1133
0
  }
1134
0
}
1135
1136
// Scales a single row of pixels using point sampling.
1137
void ScaleARGBCols_C(uint8_t* dst_argb,
1138
                     const uint8_t* src_argb,
1139
                     int dst_width,
1140
                     int x,
1141
0
                     int dx) {
1142
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1143
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1144
0
  int j;
1145
0
  for (j = 0; j < dst_width - 1; j += 2) {
1146
0
    dst[0] = src[x >> 16];
1147
0
    x += dx;
1148
0
    dst[1] = src[x >> 16];
1149
0
    x += dx;
1150
0
    dst += 2;
1151
0
  }
1152
0
  if (dst_width & 1) {
1153
0
    dst[0] = src[x >> 16];
1154
0
  }
1155
0
}
1156
1157
void ScaleARGBCols64_C(uint8_t* dst_argb,
1158
                       const uint8_t* src_argb,
1159
                       int dst_width,
1160
                       int x32,
1161
0
                       int dx) {
1162
0
  int64_t x = (int64_t)(x32);
1163
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1164
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1165
0
  int j;
1166
0
  for (j = 0; j < dst_width - 1; j += 2) {
1167
0
    dst[0] = src[x >> 16];
1168
0
    x += dx;
1169
0
    dst[1] = src[x >> 16];
1170
0
    x += dx;
1171
0
    dst += 2;
1172
0
  }
1173
0
  if (dst_width & 1) {
1174
0
    dst[0] = src[x >> 16];
1175
0
  }
1176
0
}
1177
1178
// Scales a single row of pixels up by 2x using point sampling.
1179
void ScaleARGBColsUp2_C(uint8_t* dst_argb,
1180
                        const uint8_t* src_argb,
1181
                        int dst_width,
1182
                        int x,
1183
0
                        int dx) {
1184
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1185
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1186
0
  int j;
1187
0
  (void)x;
1188
0
  (void)dx;
1189
0
  for (j = 0; j < dst_width - 1; j += 2) {
1190
0
    dst[1] = dst[0] = src[0];
1191
0
    src += 1;
1192
0
    dst += 2;
1193
0
  }
1194
0
  if (dst_width & 1) {
1195
0
    dst[0] = src[0];
1196
0
  }
1197
0
}
1198
1199
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1200
// Mimics SSSE3 blender
1201
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1202
#define BLENDERC(a, b, f, s) \
1203
0
  (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1204
#define BLENDER(a, b, f)                                                 \
1205
0
  BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
1206
0
      BLENDERC(a, b, f, 0)
1207
1208
void ScaleARGBFilterCols_C(uint8_t* dst_argb,
1209
                           const uint8_t* src_argb,
1210
                           int dst_width,
1211
                           int x,
1212
0
                           int dx) {
1213
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1214
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1215
0
  int j;
1216
0
  for (j = 0; j < dst_width - 1; j += 2) {
1217
0
    int xi = x >> 16;
1218
0
    int xf = (x >> 9) & 0x7f;
1219
0
    uint32_t a = src[xi];
1220
0
    uint32_t b = src[xi + 1];
1221
0
    dst[0] = BLENDER(a, b, xf);
1222
0
    x += dx;
1223
0
    xi = x >> 16;
1224
0
    xf = (x >> 9) & 0x7f;
1225
0
    a = src[xi];
1226
0
    b = src[xi + 1];
1227
0
    dst[1] = BLENDER(a, b, xf);
1228
0
    x += dx;
1229
0
    dst += 2;
1230
0
  }
1231
0
  if (dst_width & 1) {
1232
0
    int xi = x >> 16;
1233
0
    int xf = (x >> 9) & 0x7f;
1234
0
    uint32_t a = src[xi];
1235
0
    uint32_t b = src[xi + 1];
1236
0
    dst[0] = BLENDER(a, b, xf);
1237
0
  }
1238
0
}
1239
1240
void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
1241
                             const uint8_t* src_argb,
1242
                             int dst_width,
1243
                             int x32,
1244
0
                             int dx) {
1245
0
  int64_t x = (int64_t)(x32);
1246
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1247
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1248
0
  int j;
1249
0
  for (j = 0; j < dst_width - 1; j += 2) {
1250
0
    int64_t xi = x >> 16;
1251
0
    int xf = (x >> 9) & 0x7f;
1252
0
    uint32_t a = src[xi];
1253
0
    uint32_t b = src[xi + 1];
1254
0
    dst[0] = BLENDER(a, b, xf);
1255
0
    x += dx;
1256
0
    xi = x >> 16;
1257
0
    xf = (x >> 9) & 0x7f;
1258
0
    a = src[xi];
1259
0
    b = src[xi + 1];
1260
0
    dst[1] = BLENDER(a, b, xf);
1261
0
    x += dx;
1262
0
    dst += 2;
1263
0
  }
1264
0
  if (dst_width & 1) {
1265
0
    int64_t xi = x >> 16;
1266
0
    int xf = (x >> 9) & 0x7f;
1267
0
    uint32_t a = src[xi];
1268
0
    uint32_t b = src[xi + 1];
1269
0
    dst[0] = BLENDER(a, b, xf);
1270
0
  }
1271
0
}
1272
#undef BLENDER1
1273
#undef BLENDERC
1274
#undef BLENDER
1275
1276
// UV scale row functions
1277
// same as ARGB but 2 channels
1278
1279
void ScaleUVRowDown2_C(const uint8_t* src_uv,
1280
                       ptrdiff_t src_stride,
1281
                       uint8_t* dst_uv,
1282
0
                       int dst_width) {
1283
0
  int x;
1284
0
  (void)src_stride;
1285
0
  for (x = 0; x < dst_width; ++x) {
1286
0
    dst_uv[0] = src_uv[2];  // Store the 2nd UV
1287
0
    dst_uv[1] = src_uv[3];
1288
0
    src_uv += 4;
1289
0
    dst_uv += 2;
1290
0
  }
1291
0
}
1292
1293
void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1294
                             ptrdiff_t src_stride,
1295
                             uint8_t* dst_uv,
1296
0
                             int dst_width) {
1297
0
  int x;
1298
0
  (void)src_stride;
1299
0
  for (x = 0; x < dst_width; ++x) {
1300
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1301
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1302
0
    src_uv += 4;
1303
0
    dst_uv += 2;
1304
0
  }
1305
0
}
1306
1307
void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1308
                          ptrdiff_t src_stride,
1309
                          uint8_t* dst_uv,
1310
0
                          int dst_width) {
1311
0
  int x;
1312
0
  for (x = 0; x < dst_width; ++x) {
1313
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1314
0
                 src_uv[src_stride + 2] + 2) >>
1315
0
                2;
1316
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1317
0
                 src_uv[src_stride + 3] + 2) >>
1318
0
                2;
1319
0
    src_uv += 4;
1320
0
    dst_uv += 2;
1321
0
  }
1322
0
}
1323
1324
void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1325
                          ptrdiff_t src_stride,
1326
                          int src_stepx,
1327
                          uint8_t* dst_uv,
1328
0
                          int dst_width) {
1329
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1330
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1331
0
  (void)src_stride;
1332
0
  int x;
1333
0
  for (x = 0; x < dst_width - 1; x += 2) {
1334
0
    dst[0] = src[0];
1335
0
    dst[1] = src[src_stepx];
1336
0
    src += src_stepx * 2;
1337
0
    dst += 2;
1338
0
  }
1339
0
  if (dst_width & 1) {
1340
0
    dst[0] = src[0];
1341
0
  }
1342
0
}
1343
1344
void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1345
                             ptrdiff_t src_stride,
1346
                             int src_stepx,
1347
                             uint8_t* dst_uv,
1348
0
                             int dst_width) {
1349
0
  int x;
1350
0
  for (x = 0; x < dst_width; ++x) {
1351
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1352
0
                 src_uv[src_stride + 2] + 2) >>
1353
0
                2;
1354
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1355
0
                 src_uv[src_stride + 3] + 2) >>
1356
0
                2;
1357
0
    src_uv += src_stepx * 2;
1358
0
    dst_uv += 2;
1359
0
  }
1360
0
}
1361
1362
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
1363
                            uint8_t* dst_ptr,
1364
0
                            int dst_width) {
1365
0
  int src_width = dst_width >> 1;
1366
0
  int x;
1367
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1368
0
  for (x = 0; x < src_width; ++x) {
1369
0
    dst_ptr[4 * x + 0] =
1370
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1371
0
    dst_ptr[4 * x + 1] =
1372
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1373
0
    dst_ptr[4 * x + 2] =
1374
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1375
0
    dst_ptr[4 * x + 3] =
1376
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1377
0
  }
1378
0
}
1379
1380
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
1381
                              ptrdiff_t src_stride,
1382
                              uint8_t* dst_ptr,
1383
                              ptrdiff_t dst_stride,
1384
0
                              int dst_width) {
1385
0
  const uint8_t* s = src_ptr;
1386
0
  const uint8_t* t = src_ptr + src_stride;
1387
0
  uint8_t* d = dst_ptr;
1388
0
  uint8_t* e = dst_ptr + dst_stride;
1389
0
  int src_width = dst_width >> 1;
1390
0
  int x;
1391
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1392
0
  for (x = 0; x < src_width; ++x) {
1393
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1394
0
                    t[2 * x + 2] * 1 + 8) >>
1395
0
                   4;
1396
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1397
0
                    t[2 * x + 3] * 1 + 8) >>
1398
0
                   4;
1399
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1400
0
                    t[2 * x + 2] * 3 + 8) >>
1401
0
                   4;
1402
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1403
0
                    t[2 * x + 3] * 3 + 8) >>
1404
0
                   4;
1405
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1406
0
                    t[2 * x + 2] * 3 + 8) >>
1407
0
                   4;
1408
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1409
0
                    t[2 * x + 3] * 3 + 8) >>
1410
0
                   4;
1411
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1412
0
                    t[2 * x + 2] * 9 + 8) >>
1413
0
                   4;
1414
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1415
0
                    t[2 * x + 3] * 9 + 8) >>
1416
0
                   4;
1417
0
  }
1418
0
}
1419
1420
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
1421
                               uint16_t* dst_ptr,
1422
0
                               int dst_width) {
1423
0
  int src_width = dst_width >> 1;
1424
0
  int x;
1425
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1426
0
  for (x = 0; x < src_width; ++x) {
1427
0
    dst_ptr[4 * x + 0] =
1428
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1429
0
    dst_ptr[4 * x + 1] =
1430
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1431
0
    dst_ptr[4 * x + 2] =
1432
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1433
0
    dst_ptr[4 * x + 3] =
1434
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1435
0
  }
1436
0
}
1437
1438
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
1439
                                 ptrdiff_t src_stride,
1440
                                 uint16_t* dst_ptr,
1441
                                 ptrdiff_t dst_stride,
1442
0
                                 int dst_width) {
1443
0
  const uint16_t* s = src_ptr;
1444
0
  const uint16_t* t = src_ptr + src_stride;
1445
0
  uint16_t* d = dst_ptr;
1446
0
  uint16_t* e = dst_ptr + dst_stride;
1447
0
  int src_width = dst_width >> 1;
1448
0
  int x;
1449
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1450
0
  for (x = 0; x < src_width; ++x) {
1451
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1452
0
                    t[2 * x + 2] * 1 + 8) >>
1453
0
                   4;
1454
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1455
0
                    t[2 * x + 3] * 1 + 8) >>
1456
0
                   4;
1457
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1458
0
                    t[2 * x + 2] * 3 + 8) >>
1459
0
                   4;
1460
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1461
0
                    t[2 * x + 3] * 3 + 8) >>
1462
0
                   4;
1463
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1464
0
                    t[2 * x + 2] * 3 + 8) >>
1465
0
                   4;
1466
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1467
0
                    t[2 * x + 3] * 3 + 8) >>
1468
0
                   4;
1469
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1470
0
                    t[2 * x + 2] * 9 + 8) >>
1471
0
                   4;
1472
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1473
0
                    t[2 * x + 3] * 9 + 8) >>
1474
0
                   4;
1475
0
  }
1476
0
}
1477
1478
// Scales a single row of pixels using point sampling.
1479
void ScaleUVCols_C(uint8_t* dst_uv,
1480
                   const uint8_t* src_uv,
1481
                   int dst_width,
1482
                   int x,
1483
0
                   int dx) {
1484
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1485
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1486
0
  int j;
1487
0
  for (j = 0; j < dst_width - 1; j += 2) {
1488
0
    dst[0] = src[x >> 16];
1489
0
    x += dx;
1490
0
    dst[1] = src[x >> 16];
1491
0
    x += dx;
1492
0
    dst += 2;
1493
0
  }
1494
0
  if (dst_width & 1) {
1495
0
    dst[0] = src[x >> 16];
1496
0
  }
1497
0
}
1498
1499
void ScaleUVCols64_C(uint8_t* dst_uv,
1500
                     const uint8_t* src_uv,
1501
                     int dst_width,
1502
                     int x32,
1503
0
                     int dx) {
1504
0
  int64_t x = (int64_t)(x32);
1505
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1506
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1507
0
  int j;
1508
0
  for (j = 0; j < dst_width - 1; j += 2) {
1509
0
    dst[0] = src[x >> 16];
1510
0
    x += dx;
1511
0
    dst[1] = src[x >> 16];
1512
0
    x += dx;
1513
0
    dst += 2;
1514
0
  }
1515
0
  if (dst_width & 1) {
1516
0
    dst[0] = src[x >> 16];
1517
0
  }
1518
0
}
1519
1520
// Scales a single row of pixels up by 2x using point sampling.
1521
void ScaleUVColsUp2_C(uint8_t* dst_uv,
1522
                      const uint8_t* src_uv,
1523
                      int dst_width,
1524
                      int x,
1525
0
                      int dx) {
1526
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1527
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1528
0
  int j;
1529
0
  (void)x;
1530
0
  (void)dx;
1531
0
  for (j = 0; j < dst_width - 1; j += 2) {
1532
0
    dst[1] = dst[0] = src[0];
1533
0
    src += 1;
1534
0
    dst += 2;
1535
0
  }
1536
0
  if (dst_width & 1) {
1537
0
    dst[0] = src[0];
1538
0
  }
1539
0
}
1540
1541
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1542
// Mimics SSSE3 blender
1543
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1544
#define BLENDERC(a, b, f, s) \
1545
0
  (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1546
0
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1547
1548
void ScaleUVFilterCols_C(uint8_t* dst_uv,
1549
                         const uint8_t* src_uv,
1550
                         int dst_width,
1551
                         int x,
1552
0
                         int dx) {
1553
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1554
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1555
0
  int j;
1556
0
  for (j = 0; j < dst_width - 1; j += 2) {
1557
0
    int xi = x >> 16;
1558
0
    int xf = (x >> 9) & 0x7f;
1559
0
    uint16_t a = src[xi];
1560
0
    uint16_t b = src[xi + 1];
1561
0
    dst[0] = BLENDER(a, b, xf);
1562
0
    x += dx;
1563
0
    xi = x >> 16;
1564
0
    xf = (x >> 9) & 0x7f;
1565
0
    a = src[xi];
1566
0
    b = src[xi + 1];
1567
0
    dst[1] = BLENDER(a, b, xf);
1568
0
    x += dx;
1569
0
    dst += 2;
1570
0
  }
1571
0
  if (dst_width & 1) {
1572
0
    int xi = x >> 16;
1573
0
    int xf = (x >> 9) & 0x7f;
1574
0
    uint16_t a = src[xi];
1575
0
    uint16_t b = src[xi + 1];
1576
0
    dst[0] = BLENDER(a, b, xf);
1577
0
  }
1578
0
}
1579
1580
void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1581
                           const uint8_t* src_uv,
1582
                           int dst_width,
1583
                           int x32,
1584
0
                           int dx) {
1585
0
  int64_t x = (int64_t)(x32);
1586
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1587
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1588
0
  int j;
1589
0
  for (j = 0; j < dst_width - 1; j += 2) {
1590
0
    int64_t xi = x >> 16;
1591
0
    int xf = (x >> 9) & 0x7f;
1592
0
    uint16_t a = src[xi];
1593
0
    uint16_t b = src[xi + 1];
1594
0
    dst[0] = BLENDER(a, b, xf);
1595
0
    x += dx;
1596
0
    xi = x >> 16;
1597
0
    xf = (x >> 9) & 0x7f;
1598
0
    a = src[xi];
1599
0
    b = src[xi + 1];
1600
0
    dst[1] = BLENDER(a, b, xf);
1601
0
    x += dx;
1602
0
    dst += 2;
1603
0
  }
1604
0
  if (dst_width & 1) {
1605
0
    int64_t xi = x >> 16;
1606
0
    int xf = (x >> 9) & 0x7f;
1607
0
    uint16_t a = src[xi];
1608
0
    uint16_t b = src[xi + 1];
1609
0
    dst[0] = BLENDER(a, b, xf);
1610
0
  }
1611
0
}
1612
#undef BLENDER1
1613
#undef BLENDERC
1614
#undef BLENDER
1615
1616
// Scale plane vertically with bilinear interpolation.
1617
void ScalePlaneVertical(int src_height,
1618
                        int dst_width,
1619
                        int dst_height,
1620
                        int src_stride,
1621
                        int dst_stride,
1622
                        const uint8_t* src_argb,
1623
                        uint8_t* dst_argb,
1624
                        int x,
1625
                        int y,
1626
                        int dy,
1627
                        int bpp,  // bytes per pixel. 4 for ARGB.
1628
5.16k
                        enum FilterMode filtering) {
1629
  // TODO(fbarchard): Allow higher bpp.
1630
5.16k
  int dst_width_bytes = dst_width * bpp;
1631
5.16k
  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
1632
5.16k
                         ptrdiff_t src_stride, int dst_width,
1633
5.16k
                         int source_y_fraction) = InterpolateRow_C;
1634
5.16k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1635
5.16k
  int j;
1636
5.16k
  assert(bpp >= 1 && bpp <= 4);
1637
5.16k
  assert(src_height != 0);
1638
5.16k
  assert(dst_width > 0);
1639
5.16k
  assert(dst_height > 0);
1640
5.16k
  src_argb += (x >> 16) * bpp;
1641
5.16k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1642
5.16k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1643
5.16k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1644
5.16k
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1645
422
      InterpolateRow = InterpolateRow_SSSE3;
1646
422
    }
1647
5.16k
  }
1648
5.16k
#endif
1649
5.16k
#if defined(HAS_INTERPOLATEROW_AVX2)
1650
5.16k
  if (TestCpuFlag(kCpuHasAVX2)) {
1651
5.16k
    InterpolateRow = InterpolateRow_Any_AVX2;
1652
5.16k
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1653
292
      InterpolateRow = InterpolateRow_AVX2;
1654
292
    }
1655
5.16k
  }
1656
5.16k
#endif
1657
#if defined(HAS_INTERPOLATEROW_NEON)
1658
  if (TestCpuFlag(kCpuHasNEON)) {
1659
    InterpolateRow = InterpolateRow_Any_NEON;
1660
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1661
      InterpolateRow = InterpolateRow_NEON;
1662
    }
1663
  }
1664
#endif
1665
#if defined(HAS_INTERPOLATEROW_SME)
1666
  if (TestCpuFlag(kCpuHasSME)) {
1667
    InterpolateRow = InterpolateRow_SME;
1668
  }
1669
#endif
1670
#if defined(HAS_INTERPOLATEROW_MSA)
1671
  if (TestCpuFlag(kCpuHasMSA)) {
1672
    InterpolateRow = InterpolateRow_Any_MSA;
1673
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1674
      InterpolateRow = InterpolateRow_MSA;
1675
    }
1676
  }
1677
#endif
1678
#if defined(HAS_INTERPOLATEROW_LSX)
1679
  if (TestCpuFlag(kCpuHasLSX)) {
1680
    InterpolateRow = InterpolateRow_Any_LSX;
1681
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1682
      InterpolateRow = InterpolateRow_LSX;
1683
    }
1684
  }
1685
#endif
1686
#if defined(HAS_INTERPOLATEROW_RVV)
1687
  if (TestCpuFlag(kCpuHasRVV)) {
1688
    InterpolateRow = InterpolateRow_RVV;
1689
  }
1690
#endif
1691
1692
733k
  for (j = 0; j < dst_height; ++j) {
1693
728k
    int yi;
1694
728k
    int yf;
1695
728k
    if (y > max_y) {
1696
0
      y = max_y;
1697
0
    }
1698
728k
    yi = y >> 16;
1699
728k
    yf = filtering ? ((y >> 8) & 255) : 0;
1700
728k
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1701
728k
                   dst_width_bytes, yf);
1702
728k
    dst_argb += dst_stride;
1703
728k
    y += dy;
1704
728k
  }
1705
5.16k
}
1706
1707
void ScalePlaneVertical_16(int src_height,
1708
                           int dst_width,
1709
                           int dst_height,
1710
                           int src_stride,
1711
                           int dst_stride,
1712
                           const uint16_t* src_argb,
1713
                           uint16_t* dst_argb,
1714
                           int x,
1715
                           int y,
1716
                           int dy,
1717
                           int wpp, /* words per pixel. normally 1 */
1718
1.58k
                           enum FilterMode filtering) {
1719
  // TODO(fbarchard): Allow higher wpp.
1720
1.58k
  int dst_width_words = dst_width * wpp;
1721
1.58k
  void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
1722
1.58k
                         ptrdiff_t src_stride, int dst_width,
1723
1.58k
                         int source_y_fraction) = InterpolateRow_16_C;
1724
1.58k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1725
1.58k
  int j;
1726
1.58k
  assert(wpp >= 1 && wpp <= 2);
1727
1.58k
  assert(src_height != 0);
1728
1.58k
  assert(dst_width > 0);
1729
1.58k
  assert(dst_height > 0);
1730
1.58k
  src_argb += (x >> 16) * wpp;
1731
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1732
  if (TestCpuFlag(kCpuHasSSE2)) {
1733
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1734
    if (IS_ALIGNED(dst_width_words, 16)) {
1735
      InterpolateRow = InterpolateRow_16_SSE2;
1736
    }
1737
  }
1738
#endif
1739
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1740
  if (TestCpuFlag(kCpuHasSSSE3)) {
1741
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1742
    if (IS_ALIGNED(dst_width_words, 16)) {
1743
      InterpolateRow = InterpolateRow_16_SSSE3;
1744
    }
1745
  }
1746
#endif
1747
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1748
  if (TestCpuFlag(kCpuHasAVX2)) {
1749
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1750
    if (IS_ALIGNED(dst_width_words, 32)) {
1751
      InterpolateRow = InterpolateRow_16_AVX2;
1752
    }
1753
  }
1754
#endif
1755
#if defined(HAS_INTERPOLATEROW_16_NEON)
1756
  if (TestCpuFlag(kCpuHasNEON)) {
1757
    InterpolateRow = InterpolateRow_16_Any_NEON;
1758
    if (IS_ALIGNED(dst_width_words, 8)) {
1759
      InterpolateRow = InterpolateRow_16_NEON;
1760
    }
1761
  }
1762
#endif
1763
#if defined(HAS_INTERPOLATEROW_16_SME)
1764
  if (TestCpuFlag(kCpuHasSME)) {
1765
    InterpolateRow = InterpolateRow_16_SME;
1766
  }
1767
#endif
1768
658k
  for (j = 0; j < dst_height; ++j) {
1769
657k
    int yi;
1770
657k
    int yf;
1771
657k
    if (y > max_y) {
1772
0
      y = max_y;
1773
0
    }
1774
657k
    yi = y >> 16;
1775
657k
    yf = filtering ? ((y >> 8) & 255) : 0;
1776
657k
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1777
657k
                   dst_width_words, yf);
1778
657k
    dst_argb += dst_stride;
1779
657k
    y += dy;
1780
657k
  }
1781
1.58k
}
1782
1783
// Use scale to convert lsb formats to msb, depending how many bits there are:
1784
// 32768 = 9 bits
1785
// 16384 = 10 bits
1786
// 4096 = 12 bits
1787
// 256 = 16 bits
1788
// TODO(fbarchard): change scale to bits
1789
void ScalePlaneVertical_16To8(int src_height,
1790
                              int dst_width,
1791
                              int dst_height,
1792
                              int src_stride,
1793
                              int dst_stride,
1794
                              const uint16_t* src_argb,
1795
                              uint8_t* dst_argb,
1796
                              int x,
1797
                              int y,
1798
                              int dy,
1799
                              int wpp, /* words per pixel. normally 1 */
1800
                              int scale,
1801
0
                              enum FilterMode filtering) {
1802
  // TODO(fbarchard): Allow higher wpp.
1803
0
  int dst_width_words = dst_width * wpp;
1804
  // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
1805
0
  void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
1806
0
                               ptrdiff_t src_stride, int scale, int dst_width,
1807
0
                               int source_y_fraction) = InterpolateRow_16To8_C;
1808
0
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1809
0
  int j;
1810
0
  assert(wpp >= 1 && wpp <= 2);
1811
0
  assert(src_height != 0);
1812
0
  assert(dst_width > 0);
1813
0
  assert(dst_height > 0);
1814
0
  src_argb += (x >> 16) * wpp;
1815
1816
#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
1817
  if (TestCpuFlag(kCpuHasNEON)) {
1818
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
1819
    if (IS_ALIGNED(dst_width, 8)) {
1820
      InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
1821
    }
1822
  }
1823
#endif
1824
#if defined(HAS_INTERPOLATEROW_16TO8_SME)
1825
  if (TestCpuFlag(kCpuHasSME)) {
1826
    InterpolateRow_16To8 = InterpolateRow_16To8_SME;
1827
  }
1828
#endif
1829
0
#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
1830
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1831
0
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
1832
0
    if (IS_ALIGNED(dst_width, 32)) {
1833
0
      InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
1834
0
    }
1835
0
  }
1836
0
#endif
1837
0
  for (j = 0; j < dst_height; ++j) {
1838
0
    int yi;
1839
0
    int yf;
1840
0
    if (y > max_y) {
1841
0
      y = max_y;
1842
0
    }
1843
0
    yi = y >> 16;
1844
0
    yf = filtering ? ((y >> 8) & 255) : 0;
1845
0
    InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
1846
0
                         scale, dst_width_words, yf);
1847
0
    dst_argb += dst_stride;
1848
0
    y += dy;
1849
0
  }
1850
0
}
1851
1852
// Simplify the filtering based on scale factors.
1853
enum FilterMode ScaleFilterReduce(int src_width,
1854
                                  int src_height,
1855
                                  int dst_width,
1856
                                  int dst_height,
1857
44.6k
                                  enum FilterMode filtering) {
1858
44.6k
  if (src_width < 0) {
1859
0
    src_width = -src_width;
1860
0
  }
1861
44.6k
  if (src_height < 0) {
1862
0
    src_height = -src_height;
1863
0
  }
1864
44.6k
  if (filtering == kFilterBox) {
1865
    // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
1866
31.9k
    if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
1867
28.4k
      filtering = kFilterBilinear;
1868
28.4k
    }
1869
31.9k
  }
1870
44.6k
  if (filtering == kFilterBilinear) {
1871
36.6k
    if (src_height == 1) {
1872
2.52k
      filtering = kFilterLinear;
1873
2.52k
    }
1874
    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1875
36.6k
    if (dst_height == src_height || dst_height * 3 == src_height) {
1876
4.54k
      filtering = kFilterLinear;
1877
4.54k
    }
1878
    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1879
    // avoid reading 2 pixels horizontally that causes memory exception.
1880
36.6k
    if (src_width == 1) {
1881
1.90k
      filtering = kFilterNone;
1882
1.90k
    }
1883
36.6k
  }
1884
44.6k
  if (filtering == kFilterLinear) {
1885
9.35k
    if (src_width == 1) {
1886
0
      filtering = kFilterNone;
1887
0
    }
1888
    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1889
9.35k
    if (dst_width == src_width || dst_width * 3 == src_width) {
1890
744
      filtering = kFilterNone;
1891
744
    }
1892
9.35k
  }
1893
44.6k
  return filtering;
1894
44.6k
}
1895
1896
// Divide num by div and return as 16.16 fixed point result.
1897
0
int FixedDiv_C(int num, int div) {
1898
0
  return (int)(((int64_t)(num) << 16) / div);
1899
0
}
1900
1901
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
1902
0
int FixedDiv1_C(int num, int div) {
1903
0
  return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1904
0
}
1905
1906
16.2k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1907
1908
// Compute slope values for stepping.
1909
void ScaleSlope(int src_width,
1910
                int src_height,
1911
                int dst_width,
1912
                int dst_height,
1913
                enum FilterMode filtering,
1914
                int* x,
1915
                int* y,
1916
                int* dx,
1917
22.2k
                int* dy) {
1918
22.2k
  assert(x != NULL);
1919
22.2k
  assert(y != NULL);
1920
22.2k
  assert(dx != NULL);
1921
22.2k
  assert(dy != NULL);
1922
22.2k
  assert(src_width != 0);
1923
22.2k
  assert(src_height != 0);
1924
22.2k
  assert(dst_width > 0);
1925
22.2k
  assert(dst_height > 0);
1926
  // Check for 1 pixel and avoid FixedDiv overflow.
1927
22.2k
  if (dst_width == 1 && src_width >= 32768) {
1928
0
    dst_width = src_width;
1929
0
  }
1930
22.2k
  if (dst_height == 1 && src_height >= 32768) {
1931
0
    dst_height = src_height;
1932
0
  }
1933
22.2k
  if (filtering == kFilterBox) {
1934
    // Scale step for point sampling duplicates all pixels equally.
1935
2.24k
    *dx = FixedDiv(Abs(src_width), dst_width);
1936
2.24k
    *dy = FixedDiv(src_height, dst_height);
1937
2.24k
    *x = 0;
1938
2.24k
    *y = 0;
1939
19.9k
  } else if (filtering == kFilterBilinear) {
1940
    // Scale step for bilinear sampling renders last pixel once for upsample.
1941
13.2k
    if (dst_width <= Abs(src_width)) {
1942
4.20k
      *dx = FixedDiv(Abs(src_width), dst_width);
1943
4.20k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1944
9.01k
    } else if (src_width > 1 && dst_width > 1) {
1945
9.01k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1946
9.01k
      *x = 0;
1947
9.01k
    }
1948
13.2k
    if (dst_height <= src_height) {
1949
5.49k
      *dy = FixedDiv(src_height, dst_height);
1950
5.49k
      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
1951
7.71k
    } else if (src_height > 1 && dst_height > 1) {
1952
7.71k
      *dy = FixedDiv1(src_height, dst_height);
1953
7.71k
      *y = 0;
1954
7.71k
    }
1955
13.2k
  } else if (filtering == kFilterLinear) {
1956
    // Scale step for bilinear sampling renders last pixel once for upsample.
1957
4.95k
    if (dst_width <= Abs(src_width)) {
1958
2.97k
      *dx = FixedDiv(Abs(src_width), dst_width);
1959
2.97k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1960
2.97k
    } else if (src_width > 1 && dst_width > 1) {
1961
1.98k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1962
1.98k
      *x = 0;
1963
1.98k
    }
1964
4.95k
    *dy = FixedDiv(src_height, dst_height);
1965
4.95k
    *y = *dy >> 1;
1966
4.95k
  } else {
1967
    // Scale step for point sampling duplicates all pixels equally.
1968
1.79k
    *dx = FixedDiv(Abs(src_width), dst_width);
1969
1.79k
    *dy = FixedDiv(src_height, dst_height);
1970
1.79k
    *x = CENTERSTART(*dx, 0);
1971
1.79k
    *y = CENTERSTART(*dy, 0);
1972
1.79k
  }
1973
  // Negative src_width means horizontally mirror.
1974
22.2k
  if (src_width < 0) {
1975
0
    *x += (dst_width - 1) * *dx;
1976
0
    *dx = -*dx;
1977
    // src_width = -src_width;   // Caller must do this.
1978
0
  }
1979
22.2k
}
1980
#undef CENTERSTART
1981
1982
#ifdef __cplusplus
1983
}  // extern "C"
1984
}  // namespace libyuv
1985
#endif