Coverage Report

Created: 2026-01-18 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/scale_common.cc
Line
Count
Source
1
/*
2
 *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyARGB
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
21
#ifdef __cplusplus
22
namespace libyuv {
23
extern "C" {
24
#endif
25
26
#ifdef __cplusplus
27
0
#define STATIC_CAST(type, expr) static_cast<type>(expr)
28
#else
29
#define STATIC_CAST(type, expr) (type)(expr)
30
#endif
31
32
// TODO(fbarchard): make clamp255 preserve negative values.
33
0
static __inline int32_t clamp255(int32_t v) {
34
0
  return (-(v >= 255) | v) & 255;
35
0
}
36
37
// Use scale to convert lsb formats to msb, depending how many bits there are:
38
// 32768 = 9 bits
39
// 16384 = 10 bits
40
// 4096 = 12 bits
41
// 256 = 16 bits
42
// TODO(fbarchard): change scale to bits
43
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
44
45
56.2k
static __inline int Abs(int v) {
46
56.2k
  return v >= 0 ? v : -v;
47
56.2k
}
48
49
// CPU agnostic row functions
50
void ScaleRowDown2_C(const uint8_t* src_ptr,
51
                     ptrdiff_t src_stride,
52
                     uint8_t* dst,
53
0
                     int dst_width) {
54
0
  int x;
55
0
  (void)src_stride;
56
0
  for (x = 0; x < dst_width - 1; x += 2) {
57
0
    dst[0] = src_ptr[1];
58
0
    dst[1] = src_ptr[3];
59
0
    dst += 2;
60
0
    src_ptr += 4;
61
0
  }
62
0
  if (dst_width & 1) {
63
0
    dst[0] = src_ptr[1];
64
0
  }
65
0
}
66
67
void ScaleRowDown2_16_C(const uint16_t* src_ptr,
68
                        ptrdiff_t src_stride,
69
                        uint16_t* dst,
70
0
                        int dst_width) {
71
0
  int x;
72
0
  (void)src_stride;
73
0
  for (x = 0; x < dst_width - 1; x += 2) {
74
0
    dst[0] = src_ptr[1];
75
0
    dst[1] = src_ptr[3];
76
0
    dst += 2;
77
0
    src_ptr += 4;
78
0
  }
79
0
  if (dst_width & 1) {
80
0
    dst[0] = src_ptr[1];
81
0
  }
82
0
}
83
84
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
85
                           ptrdiff_t src_stride,
86
                           uint8_t* dst,
87
                           int dst_width,
88
0
                           int scale) {
89
0
  int x;
90
0
  (void)src_stride;
91
0
  assert(scale >= 256);
92
0
  assert(scale <= 32768);
93
0
  for (x = 0; x < dst_width - 1; x += 2) {
94
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
95
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
96
0
    dst += 2;
97
0
    src_ptr += 4;
98
0
  }
99
0
  if (dst_width & 1) {
100
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
101
0
  }
102
0
}
103
104
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
105
                               ptrdiff_t src_stride,
106
                               uint8_t* dst,
107
                               int dst_width,
108
0
                               int scale) {
109
0
  int x;
110
0
  (void)src_stride;
111
0
  assert(scale >= 256);
112
0
  assert(scale <= 32768);
113
0
  dst_width -= 1;
114
0
  for (x = 0; x < dst_width - 1; x += 2) {
115
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
116
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
117
0
    dst += 2;
118
0
    src_ptr += 4;
119
0
  }
120
0
  if (dst_width & 1) {
121
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
122
0
    dst += 1;
123
0
    src_ptr += 2;
124
0
  }
125
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
126
0
}
127
128
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
129
                           ptrdiff_t src_stride,
130
                           uint8_t* dst,
131
0
                           int dst_width) {
132
0
  const uint8_t* s = src_ptr;
133
0
  int x;
134
0
  (void)src_stride;
135
0
  for (x = 0; x < dst_width - 1; x += 2) {
136
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
137
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
138
0
    dst += 2;
139
0
    s += 4;
140
0
  }
141
0
  if (dst_width & 1) {
142
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
143
0
  }
144
0
}
145
146
void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
147
                              ptrdiff_t src_stride,
148
                              uint16_t* dst,
149
0
                              int dst_width) {
150
0
  const uint16_t* s = src_ptr;
151
0
  int x;
152
0
  (void)src_stride;
153
0
  for (x = 0; x < dst_width - 1; x += 2) {
154
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
155
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
156
0
    dst += 2;
157
0
    s += 4;
158
0
  }
159
0
  if (dst_width & 1) {
160
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
161
0
  }
162
0
}
163
164
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
165
                                 ptrdiff_t src_stride,
166
                                 uint8_t* dst,
167
                                 int dst_width,
168
0
                                 int scale) {
169
0
  const uint16_t* s = src_ptr;
170
0
  int x;
171
0
  (void)src_stride;
172
0
  assert(scale >= 256);
173
0
  assert(scale <= 32768);
174
0
  for (x = 0; x < dst_width - 1; x += 2) {
175
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
176
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
177
0
    dst += 2;
178
0
    s += 4;
179
0
  }
180
0
  if (dst_width & 1) {
181
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
182
0
  }
183
0
}
184
185
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
186
                                     ptrdiff_t src_stride,
187
                                     uint8_t* dst,
188
                                     int dst_width,
189
0
                                     int scale) {
190
0
  const uint16_t* s = src_ptr;
191
0
  int x;
192
0
  (void)src_stride;
193
0
  assert(scale >= 256);
194
0
  assert(scale <= 32768);
195
0
  dst_width -= 1;
196
0
  for (x = 0; x < dst_width - 1; x += 2) {
197
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
198
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
199
0
    dst += 2;
200
0
    s += 4;
201
0
  }
202
0
  if (dst_width & 1) {
203
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
204
0
    dst += 1;
205
0
    s += 2;
206
0
  }
207
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
208
0
}
209
210
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
211
                        ptrdiff_t src_stride,
212
                        uint8_t* dst,
213
1.13k
                        int dst_width) {
214
1.13k
  const uint8_t* s = src_ptr;
215
1.13k
  const uint8_t* t = src_ptr + src_stride;
216
1.13k
  int x;
217
9.57k
  for (x = 0; x < dst_width - 1; x += 2) {
218
8.44k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
219
8.44k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
220
8.44k
    dst += 2;
221
8.44k
    s += 4;
222
8.44k
    t += 4;
223
8.44k
  }
224
1.13k
  if (dst_width & 1) {
225
260
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
226
260
  }
227
1.13k
}
228
229
void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
230
                            ptrdiff_t src_stride,
231
                            uint8_t* dst,
232
0
                            int dst_width) {
233
0
  const uint8_t* s = src_ptr;
234
0
  const uint8_t* t = src_ptr + src_stride;
235
0
  int x;
236
0
  dst_width -= 1;
237
0
  for (x = 0; x < dst_width - 1; x += 2) {
238
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
239
0
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
240
0
    dst += 2;
241
0
    s += 4;
242
0
    t += 4;
243
0
  }
244
0
  if (dst_width & 1) {
245
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
246
0
    dst += 1;
247
0
    s += 2;
248
0
    t += 2;
249
0
  }
250
0
  dst[0] = (s[0] + t[0] + 1) >> 1;
251
0
}
252
253
void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
254
                           ptrdiff_t src_stride,
255
                           uint16_t* dst,
256
1.28k
                           int dst_width) {
257
1.28k
  const uint16_t* s = src_ptr;
258
1.28k
  const uint16_t* t = src_ptr + src_stride;
259
1.28k
  int x;
260
22.2k
  for (x = 0; x < dst_width - 1; x += 2) {
261
21.0k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
262
21.0k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
263
21.0k
    dst += 2;
264
21.0k
    s += 4;
265
21.0k
    t += 4;
266
21.0k
  }
267
1.28k
  if (dst_width & 1) {
268
254
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
269
254
  }
270
1.28k
}
271
272
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
273
                              ptrdiff_t src_stride,
274
                              uint8_t* dst,
275
                              int dst_width,
276
0
                              int scale) {
277
0
  const uint16_t* s = src_ptr;
278
0
  const uint16_t* t = src_ptr + src_stride;
279
0
  int x;
280
0
  assert(scale >= 256);
281
0
  assert(scale <= 32768);
282
0
  for (x = 0; x < dst_width - 1; x += 2) {
283
0
    dst[0] = STATIC_CAST(uint8_t,
284
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
285
0
    dst[1] = STATIC_CAST(uint8_t,
286
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
287
0
    dst += 2;
288
0
    s += 4;
289
0
    t += 4;
290
0
  }
291
0
  if (dst_width & 1) {
292
0
    dst[0] = STATIC_CAST(uint8_t,
293
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
294
0
  }
295
0
}
296
297
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
298
                                  ptrdiff_t src_stride,
299
                                  uint8_t* dst,
300
                                  int dst_width,
301
0
                                  int scale) {
302
0
  const uint16_t* s = src_ptr;
303
0
  const uint16_t* t = src_ptr + src_stride;
304
0
  int x;
305
0
  assert(scale >= 256);
306
0
  assert(scale <= 32768);
307
0
  dst_width -= 1;
308
0
  for (x = 0; x < dst_width - 1; x += 2) {
309
0
    dst[0] = STATIC_CAST(uint8_t,
310
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
311
0
    dst[1] = STATIC_CAST(uint8_t,
312
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
313
0
    dst += 2;
314
0
    s += 4;
315
0
    t += 4;
316
0
  }
317
0
  if (dst_width & 1) {
318
0
    dst[0] = STATIC_CAST(uint8_t,
319
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
320
0
    dst += 1;
321
0
    s += 2;
322
0
    t += 2;
323
0
  }
324
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
325
0
}
326
327
void ScaleRowDown4_C(const uint8_t* src_ptr,
328
                     ptrdiff_t src_stride,
329
                     uint8_t* dst,
330
0
                     int dst_width) {
331
0
  int x;
332
0
  (void)src_stride;
333
0
  for (x = 0; x < dst_width - 1; x += 2) {
334
0
    dst[0] = src_ptr[2];
335
0
    dst[1] = src_ptr[6];
336
0
    dst += 2;
337
0
    src_ptr += 8;
338
0
  }
339
0
  if (dst_width & 1) {
340
0
    dst[0] = src_ptr[2];
341
0
  }
342
0
}
343
344
void ScaleRowDown4_16_C(const uint16_t* src_ptr,
345
                        ptrdiff_t src_stride,
346
                        uint16_t* dst,
347
0
                        int dst_width) {
348
0
  int x;
349
0
  (void)src_stride;
350
0
  for (x = 0; x < dst_width - 1; x += 2) {
351
0
    dst[0] = src_ptr[2];
352
0
    dst[1] = src_ptr[6];
353
0
    dst += 2;
354
0
    src_ptr += 8;
355
0
  }
356
0
  if (dst_width & 1) {
357
0
    dst[0] = src_ptr[2];
358
0
  }
359
0
}
360
361
void ScaleRowDown4Box_C(const uint8_t* src_ptr,
362
                        ptrdiff_t src_stride,
363
                        uint8_t* dst,
364
716
                        int dst_width) {
365
716
  intptr_t stride = src_stride;
366
716
  int x;
367
2.38k
  for (x = 0; x < dst_width - 1; x += 2) {
368
1.66k
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
369
1.66k
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
370
1.66k
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
371
1.66k
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
372
1.66k
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
373
1.66k
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
374
1.66k
              src_ptr[stride * 3 + 3] + 8) >>
375
1.66k
             4;
376
1.66k
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
377
1.66k
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
378
1.66k
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
379
1.66k
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
380
1.66k
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
381
1.66k
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
382
1.66k
              src_ptr[stride * 3 + 7] + 8) >>
383
1.66k
             4;
384
1.66k
    dst += 2;
385
1.66k
    src_ptr += 8;
386
1.66k
  }
387
716
  if (dst_width & 1) {
388
364
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
389
364
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
390
364
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
391
364
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
392
364
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
393
364
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
394
364
              src_ptr[stride * 3 + 3] + 8) >>
395
364
             4;
396
364
  }
397
716
}
398
399
void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
400
                           ptrdiff_t src_stride,
401
                           uint16_t* dst,
402
599
                           int dst_width) {
403
599
  intptr_t stride = src_stride;
404
599
  int x;
405
1.27k
  for (x = 0; x < dst_width - 1; x += 2) {
406
672
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
407
672
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
408
672
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
409
672
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
410
672
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
411
672
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
412
672
              src_ptr[stride * 3 + 3] + 8) >>
413
672
             4;
414
672
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
415
672
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
416
672
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
417
672
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
418
672
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
419
672
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
420
672
              src_ptr[stride * 3 + 7] + 8) >>
421
672
             4;
422
672
    dst += 2;
423
672
    src_ptr += 8;
424
672
  }
425
599
  if (dst_width & 1) {
426
325
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
427
325
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
428
325
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
429
325
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
430
325
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
431
325
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
432
325
              src_ptr[stride * 3 + 3] + 8) >>
433
325
             4;
434
325
  }
435
599
}
436
437
void ScaleRowDown34_C(const uint8_t* src_ptr,
438
                      ptrdiff_t src_stride,
439
                      uint8_t* dst,
440
0
                      int dst_width) {
441
0
  int x;
442
0
  (void)src_stride;
443
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
444
0
  for (x = 0; x < dst_width; x += 3) {
445
0
    dst[0] = src_ptr[0];
446
0
    dst[1] = src_ptr[1];
447
0
    dst[2] = src_ptr[3];
448
0
    dst += 3;
449
0
    src_ptr += 4;
450
0
  }
451
0
}
452
453
void ScaleRowDown34_16_C(const uint16_t* src_ptr,
454
                         ptrdiff_t src_stride,
455
                         uint16_t* dst,
456
0
                         int dst_width) {
457
0
  int x;
458
0
  (void)src_stride;
459
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
460
0
  for (x = 0; x < dst_width; x += 3) {
461
0
    dst[0] = src_ptr[0];
462
0
    dst[1] = src_ptr[1];
463
0
    dst[2] = src_ptr[3];
464
0
    dst += 3;
465
0
    src_ptr += 4;
466
0
  }
467
0
}
468
469
// Filter rows 0 and 1 together, 3 : 1
470
void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
471
                            ptrdiff_t src_stride,
472
                            uint8_t* d,
473
1.26k
                            int dst_width) {
474
1.26k
  const uint8_t* s = src_ptr;
475
1.26k
  const uint8_t* t = src_ptr + src_stride;
476
1.26k
  int x;
477
1.26k
  assert((dst_width % 3 == 0) && (dst_width > 0));
478
3.86k
  for (x = 0; x < dst_width; x += 3) {
479
2.60k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
480
2.60k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
481
2.60k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
482
2.60k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
483
2.60k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
484
2.60k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
485
2.60k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
486
2.60k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
487
2.60k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
488
2.60k
    d += 3;
489
2.60k
    s += 4;
490
2.60k
    t += 4;
491
2.60k
  }
492
1.26k
}
493
494
void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
495
                               ptrdiff_t src_stride,
496
                               uint16_t* d,
497
1.44k
                               int dst_width) {
498
1.44k
  const uint16_t* s = src_ptr;
499
1.44k
  const uint16_t* t = src_ptr + src_stride;
500
1.44k
  int x;
501
1.44k
  assert((dst_width % 3 == 0) && (dst_width > 0));
502
8.93k
  for (x = 0; x < dst_width; x += 3) {
503
7.48k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
504
7.48k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
505
7.48k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
506
7.48k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
507
7.48k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
508
7.48k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
509
7.48k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
510
7.48k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
511
7.48k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
512
7.48k
    d += 3;
513
7.48k
    s += 4;
514
7.48k
    t += 4;
515
7.48k
  }
516
1.44k
}
517
518
// Filter rows 1 and 2 together, 1 : 1
519
void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
520
                            ptrdiff_t src_stride,
521
                            uint8_t* d,
522
630
                            int dst_width) {
523
630
  const uint8_t* s = src_ptr;
524
630
  const uint8_t* t = src_ptr + src_stride;
525
630
  int x;
526
630
  assert((dst_width % 3 == 0) && (dst_width > 0));
527
1.93k
  for (x = 0; x < dst_width; x += 3) {
528
1.30k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
529
1.30k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
530
1.30k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
531
1.30k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
532
1.30k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
533
1.30k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
534
1.30k
    d[0] = (a0 + b0 + 1) >> 1;
535
1.30k
    d[1] = (a1 + b1 + 1) >> 1;
536
1.30k
    d[2] = (a2 + b2 + 1) >> 1;
537
1.30k
    d += 3;
538
1.30k
    s += 4;
539
1.30k
    t += 4;
540
1.30k
  }
541
630
}
542
543
void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
544
                               ptrdiff_t src_stride,
545
                               uint16_t* d,
546
723
                               int dst_width) {
547
723
  const uint16_t* s = src_ptr;
548
723
  const uint16_t* t = src_ptr + src_stride;
549
723
  int x;
550
723
  assert((dst_width % 3 == 0) && (dst_width > 0));
551
4.46k
  for (x = 0; x < dst_width; x += 3) {
552
3.74k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
553
3.74k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
554
3.74k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
555
3.74k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
556
3.74k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
557
3.74k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
558
3.74k
    d[0] = (a0 + b0 + 1) >> 1;
559
3.74k
    d[1] = (a1 + b1 + 1) >> 1;
560
3.74k
    d[2] = (a2 + b2 + 1) >> 1;
561
3.74k
    d += 3;
562
3.74k
    s += 4;
563
3.74k
    t += 4;
564
3.74k
  }
565
723
}
566
567
// Sample position: (O is src sample position, X is dst sample position)
568
//
569
//      v dst_ptr at here           v stop at here
570
//  X O X   X O X   X O X   X O X   X O X
571
//    ^ src_ptr at here
572
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
573
                          uint8_t* dst_ptr,
574
453k
                          int dst_width) {
575
453k
  int src_width = dst_width >> 1;
576
453k
  int x;
577
453k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
578
3.74M
  for (x = 0; x < src_width; ++x) {
579
3.29M
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
580
3.29M
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
581
3.29M
  }
582
453k
}
583
584
// Sample position: (O is src sample position, X is dst sample position)
585
//
586
//    src_ptr at here
587
//  X v X   X   X   X   X   X   X   X   X
588
//    O       O       O       O       O
589
//  X   X   X   X   X   X   X   X   X   X
590
//      ^ dst_ptr at here           ^ stop at here
591
//  X   X   X   X   X   X   X   X   X   X
592
//    O       O       O       O       O
593
//  X   X   X   X   X   X   X   X   X   X
594
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
595
                            ptrdiff_t src_stride,
596
                            uint8_t* dst_ptr,
597
                            ptrdiff_t dst_stride,
598
20.1k
                            int dst_width) {
599
20.1k
  const uint8_t* s = src_ptr;
600
20.1k
  const uint8_t* t = src_ptr + src_stride;
601
20.1k
  uint8_t* d = dst_ptr;
602
20.1k
  uint8_t* e = dst_ptr + dst_stride;
603
20.1k
  int src_width = dst_width >> 1;
604
20.1k
  int x;
605
20.1k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
606
269k
  for (x = 0; x < src_width; ++x) {
607
249k
    d[2 * x + 0] =
608
249k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
609
249k
    d[2 * x + 1] =
610
249k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
611
249k
    e[2 * x + 0] =
612
249k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
613
249k
    e[2 * x + 1] =
614
249k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
615
249k
  }
616
20.1k
}
617
618
// Only suitable for at most 14 bit range.
619
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
620
                             uint16_t* dst_ptr,
621
390k
                             int dst_width) {
622
390k
  int src_width = dst_width >> 1;
623
390k
  int x;
624
390k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
625
2.54M
  for (x = 0; x < src_width; ++x) {
626
2.15M
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
627
2.15M
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
628
2.15M
  }
629
390k
}
630
631
// Only suitable for at most 12bit range.
632
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
633
                               ptrdiff_t src_stride,
634
                               uint16_t* dst_ptr,
635
                               ptrdiff_t dst_stride,
636
17.5k
                               int dst_width) {
637
17.5k
  const uint16_t* s = src_ptr;
638
17.5k
  const uint16_t* t = src_ptr + src_stride;
639
17.5k
  uint16_t* d = dst_ptr;
640
17.5k
  uint16_t* e = dst_ptr + dst_stride;
641
17.5k
  int src_width = dst_width >> 1;
642
17.5k
  int x;
643
17.5k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
644
127k
  for (x = 0; x < src_width; ++x) {
645
109k
    d[2 * x + 0] =
646
109k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
647
109k
    d[2 * x + 1] =
648
109k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
649
109k
    e[2 * x + 0] =
650
109k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
651
109k
    e[2 * x + 1] =
652
109k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
653
109k
  }
654
17.5k
}
655
656
// Scales a single row of pixels using point sampling.
657
void ScaleCols_C(uint8_t* dst_ptr,
658
                 const uint8_t* src_ptr,
659
                 int dst_width,
660
                 int x,
661
269k
                 int dx) {
662
269k
  int j;
663
101M
  for (j = 0; j < dst_width - 1; j += 2) {
664
101M
    dst_ptr[0] = src_ptr[x >> 16];
665
101M
    x += dx;
666
101M
    dst_ptr[1] = src_ptr[x >> 16];
667
101M
    x += dx;
668
101M
    dst_ptr += 2;
669
101M
  }
670
269k
  if (dst_width & 1) {
671
166k
    dst_ptr[0] = src_ptr[x >> 16];
672
166k
  }
673
269k
}
674
675
void ScaleCols_16_C(uint16_t* dst_ptr,
676
                    const uint16_t* src_ptr,
677
                    int dst_width,
678
                    int x,
679
453k
                    int dx) {
680
453k
  int j;
681
182M
  for (j = 0; j < dst_width - 1; j += 2) {
682
181M
    dst_ptr[0] = src_ptr[x >> 16];
683
181M
    x += dx;
684
181M
    dst_ptr[1] = src_ptr[x >> 16];
685
181M
    x += dx;
686
181M
    dst_ptr += 2;
687
181M
  }
688
453k
  if (dst_width & 1) {
689
254k
    dst_ptr[0] = src_ptr[x >> 16];
690
254k
  }
691
453k
}
692
693
// Scales a single row of pixels up by 2x using point sampling.
694
void ScaleColsUp2_C(uint8_t* dst_ptr,
695
                    const uint8_t* src_ptr,
696
                    int dst_width,
697
                    int x,
698
64.7k
                    int dx) {
699
64.7k
  int j;
700
64.7k
  (void)x;
701
64.7k
  (void)dx;
702
129k
  for (j = 0; j < dst_width - 1; j += 2) {
703
64.7k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
704
64.7k
    src_ptr += 1;
705
64.7k
    dst_ptr += 2;
706
64.7k
  }
707
64.7k
  if (dst_width & 1) {
708
0
    dst_ptr[0] = src_ptr[0];
709
0
  }
710
64.7k
}
711
712
void ScaleColsUp2_16_C(uint16_t* dst_ptr,
713
                       const uint16_t* src_ptr,
714
                       int dst_width,
715
                       int x,
716
38.2k
                       int dx) {
717
38.2k
  int j;
718
38.2k
  (void)x;
719
38.2k
  (void)dx;
720
76.4k
  for (j = 0; j < dst_width - 1; j += 2) {
721
38.2k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
722
38.2k
    src_ptr += 1;
723
38.2k
    dst_ptr += 2;
724
38.2k
  }
725
38.2k
  if (dst_width & 1) {
726
0
    dst_ptr[0] = src_ptr[0];
727
0
  }
728
38.2k
}
729
730
// (1-f)a + fb can be replaced with a + f(b-a)
731
#if defined(__arm__) || defined(__aarch64__)
732
#define BLENDER(a, b, f) \
733
  (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
734
#else
735
// Intel uses 7 bit math with rounding.
736
#define BLENDER(a, b, f) \
737
0
  (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
738
#endif
739
740
void ScaleFilterCols_C(uint8_t* dst_ptr,
741
                       const uint8_t* src_ptr,
742
                       int dst_width,
743
                       int x,
744
0
                       int dx) {
745
0
  int j;
746
0
  for (j = 0; j < dst_width - 1; j += 2) {
747
0
    int xi = x >> 16;
748
0
    int a = src_ptr[xi];
749
0
    int b = src_ptr[xi + 1];
750
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
751
0
    x += dx;
752
0
    xi = x >> 16;
753
0
    a = src_ptr[xi];
754
0
    b = src_ptr[xi + 1];
755
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
756
0
    x += dx;
757
0
    dst_ptr += 2;
758
0
  }
759
0
  if (dst_width & 1) {
760
0
    int xi = x >> 16;
761
0
    int a = src_ptr[xi];
762
0
    int b = src_ptr[xi + 1];
763
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
764
0
  }
765
0
}
766
767
void ScaleFilterCols64_C(uint8_t* dst_ptr,
768
                         const uint8_t* src_ptr,
769
                         int dst_width,
770
                         int x32,
771
0
                         int dx) {
772
0
  int64_t x = (int64_t)(x32);
773
0
  int j;
774
0
  for (j = 0; j < dst_width - 1; j += 2) {
775
0
    int64_t xi = x >> 16;
776
0
    int a = src_ptr[xi];
777
0
    int b = src_ptr[xi + 1];
778
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
779
0
    x += dx;
780
0
    xi = x >> 16;
781
0
    a = src_ptr[xi];
782
0
    b = src_ptr[xi + 1];
783
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
784
0
    x += dx;
785
0
    dst_ptr += 2;
786
0
  }
787
0
  if (dst_width & 1) {
788
0
    int64_t xi = x >> 16;
789
0
    int a = src_ptr[xi];
790
0
    int b = src_ptr[xi + 1];
791
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
792
0
  }
793
0
}
794
#undef BLENDER
795
796
// Same as 8 bit arm blender but return is cast to uint16_t
797
#define BLENDER(a, b, f) \
798
392M
  (uint16_t)(            \
799
392M
      (int)(a) +         \
800
392M
      (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
801
802
void ScaleFilterCols_16_C(uint16_t* dst_ptr,
803
                          const uint16_t* src_ptr,
804
                          int dst_width,
805
                          int x,
806
738k
                          int dx) {
807
738k
  int j;
808
196M
  for (j = 0; j < dst_width - 1; j += 2) {
809
196M
    int xi = x >> 16;
810
196M
    int a = src_ptr[xi];
811
196M
    int b = src_ptr[xi + 1];
812
196M
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
813
196M
    x += dx;
814
196M
    xi = x >> 16;
815
196M
    a = src_ptr[xi];
816
196M
    b = src_ptr[xi + 1];
817
196M
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
818
196M
    x += dx;
819
196M
    dst_ptr += 2;
820
196M
  }
821
738k
  if (dst_width & 1) {
822
256k
    int xi = x >> 16;
823
256k
    int a = src_ptr[xi];
824
256k
    int b = src_ptr[xi + 1];
825
256k
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
826
256k
  }
827
738k
}
828
829
void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
830
                            const uint16_t* src_ptr,
831
                            int dst_width,
832
                            int x32,
833
0
                            int dx) {
834
0
  int64_t x = (int64_t)(x32);
835
0
  int j;
836
0
  for (j = 0; j < dst_width - 1; j += 2) {
837
0
    int64_t xi = x >> 16;
838
0
    int a = src_ptr[xi];
839
0
    int b = src_ptr[xi + 1];
840
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
841
0
    x += dx;
842
0
    xi = x >> 16;
843
0
    a = src_ptr[xi];
844
0
    b = src_ptr[xi + 1];
845
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
846
0
    x += dx;
847
0
    dst_ptr += 2;
848
0
  }
849
0
  if (dst_width & 1) {
850
0
    int64_t xi = x >> 16;
851
0
    int a = src_ptr[xi];
852
0
    int b = src_ptr[xi + 1];
853
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
854
0
  }
855
0
}
856
#undef BLENDER
857
858
void ScaleRowDown38_C(const uint8_t* src_ptr,
859
                      ptrdiff_t src_stride,
860
                      uint8_t* dst,
861
0
                      int dst_width) {
862
0
  int x;
863
0
  (void)src_stride;
864
0
  assert(dst_width % 3 == 0);
865
0
  for (x = 0; x < dst_width; x += 3) {
866
0
    dst[0] = src_ptr[0];
867
0
    dst[1] = src_ptr[3];
868
0
    dst[2] = src_ptr[6];
869
0
    dst += 3;
870
0
    src_ptr += 8;
871
0
  }
872
0
}
873
874
void ScaleRowDown38_16_C(const uint16_t* src_ptr,
875
                         ptrdiff_t src_stride,
876
                         uint16_t* dst,
877
0
                         int dst_width) {
878
0
  int x;
879
0
  (void)src_stride;
880
0
  assert(dst_width % 3 == 0);
881
0
  for (x = 0; x < dst_width; x += 3) {
882
0
    dst[0] = src_ptr[0];
883
0
    dst[1] = src_ptr[3];
884
0
    dst[2] = src_ptr[6];
885
0
    dst += 3;
886
0
    src_ptr += 8;
887
0
  }
888
0
}
889
890
// 8x3 -> 3x1
891
void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
892
                            ptrdiff_t src_stride,
893
                            uint8_t* dst_ptr,
894
1.37k
                            int dst_width) {
895
1.37k
  intptr_t stride = src_stride;
896
1.37k
  int i;
897
1.37k
  assert((dst_width % 3 == 0) && (dst_width > 0));
898
2.74k
  for (i = 0; i < dst_width; i += 3) {
899
1.37k
    dst_ptr[0] =
900
1.37k
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
901
1.37k
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
902
1.37k
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
903
1.37k
            (65536 / 9) >>
904
1.37k
        16;
905
1.37k
    dst_ptr[1] =
906
1.37k
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
907
1.37k
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
908
1.37k
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
909
1.37k
            (65536 / 9) >>
910
1.37k
        16;
911
1.37k
    dst_ptr[2] =
912
1.37k
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
913
1.37k
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
914
1.37k
            (65536 / 6) >>
915
1.37k
        16;
916
1.37k
    src_ptr += 8;
917
1.37k
    dst_ptr += 3;
918
1.37k
  }
919
1.37k
}
920
921
void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
922
                               ptrdiff_t src_stride,
923
                               uint16_t* dst_ptr,
924
1.45k
                               int dst_width) {
925
1.45k
  intptr_t stride = src_stride;
926
1.45k
  int i;
927
1.45k
  assert((dst_width % 3 == 0) && (dst_width > 0));
928
3.07k
  for (i = 0; i < dst_width; i += 3) {
929
1.62k
    dst_ptr[0] =
930
1.62k
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
931
1.62k
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
932
1.62k
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
933
1.62k
            (65536u / 9u) >>
934
1.62k
        16;
935
1.62k
    dst_ptr[1] =
936
1.62k
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
937
1.62k
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
938
1.62k
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
939
1.62k
            (65536u / 9u) >>
940
1.62k
        16;
941
1.62k
    dst_ptr[2] =
942
1.62k
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
943
1.62k
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
944
1.62k
            (65536u / 6u) >>
945
1.62k
        16;
946
1.62k
    src_ptr += 8;
947
1.62k
    dst_ptr += 3;
948
1.62k
  }
949
1.45k
}
950
951
// 8x2 -> 3x1
952
void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
953
                            ptrdiff_t src_stride,
954
                            uint8_t* dst_ptr,
955
686
                            int dst_width) {
956
686
  intptr_t stride = src_stride;
957
686
  int i;
958
686
  assert((dst_width % 3 == 0) && (dst_width > 0));
959
1.37k
  for (i = 0; i < dst_width; i += 3) {
960
686
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
961
686
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
962
686
                     (65536 / 6) >>
963
686
                 16;
964
686
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
965
686
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
966
686
                     (65536 / 6) >>
967
686
                 16;
968
686
    dst_ptr[2] =
969
686
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
970
686
            (65536 / 4) >>
971
686
        16;
972
686
    src_ptr += 8;
973
686
    dst_ptr += 3;
974
686
  }
975
686
}
976
977
void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
978
                               ptrdiff_t src_stride,
979
                               uint16_t* dst_ptr,
980
726
                               int dst_width) {
981
726
  intptr_t stride = src_stride;
982
726
  int i;
983
726
  assert((dst_width % 3 == 0) && (dst_width > 0));
984
1.53k
  for (i = 0; i < dst_width; i += 3) {
985
810
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
986
810
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
987
810
                     (65536u / 6u) >>
988
810
                 16;
989
810
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
990
810
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
991
810
                     (65536u / 6u) >>
992
810
                 16;
993
810
    dst_ptr[2] =
994
810
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
995
810
            (65536u / 4u) >>
996
810
        16;
997
810
    src_ptr += 8;
998
810
    dst_ptr += 3;
999
810
  }
1000
726
}
1001
1002
822k
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
1003
822k
  int x;
1004
822k
  assert(src_width > 0);
1005
7.21M
  for (x = 0; x < src_width - 1; x += 2) {
1006
6.39M
    dst_ptr[0] += src_ptr[0];
1007
6.39M
    dst_ptr[1] += src_ptr[1];
1008
6.39M
    src_ptr += 2;
1009
6.39M
    dst_ptr += 2;
1010
6.39M
  }
1011
822k
  if (src_width & 1) {
1012
448k
    dst_ptr[0] += src_ptr[0];
1013
448k
  }
1014
822k
}
1015
1016
void ScaleAddRow_16_C(const uint16_t* src_ptr,
1017
                      uint32_t* dst_ptr,
1018
896k
                      int src_width) {
1019
896k
  int x;
1020
896k
  assert(src_width > 0);
1021
87.1M
  for (x = 0; x < src_width - 1; x += 2) {
1022
86.2M
    dst_ptr[0] += src_ptr[0];
1023
86.2M
    dst_ptr[1] += src_ptr[1];
1024
86.2M
    src_ptr += 2;
1025
86.2M
    dst_ptr += 2;
1026
86.2M
  }
1027
896k
  if (src_width & 1) {
1028
468k
    dst_ptr[0] += src_ptr[0];
1029
468k
  }
1030
896k
}
1031
1032
// ARGB scale row functions
1033
1034
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
1035
                         ptrdiff_t src_stride,
1036
                         uint8_t* dst_argb,
1037
0
                         int dst_width) {
1038
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1039
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1040
0
  int x;
1041
0
  (void)src_stride;
1042
0
  for (x = 0; x < dst_width - 1; x += 2) {
1043
0
    dst[0] = src[1];
1044
0
    dst[1] = src[3];
1045
0
    src += 4;
1046
0
    dst += 2;
1047
0
  }
1048
0
  if (dst_width & 1) {
1049
0
    dst[0] = src[1];
1050
0
  }
1051
0
}
1052
1053
void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
1054
                               ptrdiff_t src_stride,
1055
                               uint8_t* dst_argb,
1056
0
                               int dst_width) {
1057
0
  int x;
1058
0
  (void)src_stride;
1059
0
  for (x = 0; x < dst_width; ++x) {
1060
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
1061
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
1062
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
1063
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
1064
0
    src_argb += 8;
1065
0
    dst_argb += 4;
1066
0
  }
1067
0
}
1068
1069
void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
1070
                            ptrdiff_t src_stride,
1071
                            uint8_t* dst_argb,
1072
0
                            int dst_width) {
1073
0
  int x;
1074
0
  for (x = 0; x < dst_width; ++x) {
1075
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1076
0
                   src_argb[src_stride + 4] + 2) >>
1077
0
                  2;
1078
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1079
0
                   src_argb[src_stride + 5] + 2) >>
1080
0
                  2;
1081
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1082
0
                   src_argb[src_stride + 6] + 2) >>
1083
0
                  2;
1084
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1085
0
                   src_argb[src_stride + 7] + 2) >>
1086
0
                  2;
1087
0
    src_argb += 8;
1088
0
    dst_argb += 4;
1089
0
  }
1090
0
}
1091
1092
void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
1093
                            ptrdiff_t src_stride,
1094
                            int src_stepx,
1095
                            uint8_t* dst_argb,
1096
0
                            int dst_width) {
1097
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1098
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1099
0
  (void)src_stride;
1100
0
  int x;
1101
0
  for (x = 0; x < dst_width - 1; x += 2) {
1102
0
    dst[0] = src[0];
1103
0
    dst[1] = src[src_stepx];
1104
0
    src += src_stepx * 2;
1105
0
    dst += 2;
1106
0
  }
1107
0
  if (dst_width & 1) {
1108
0
    dst[0] = src[0];
1109
0
  }
1110
0
}
1111
1112
void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
1113
                               ptrdiff_t src_stride,
1114
                               int src_stepx,
1115
                               uint8_t* dst_argb,
1116
0
                               int dst_width) {
1117
0
  int x;
1118
0
  for (x = 0; x < dst_width; ++x) {
1119
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1120
0
                   src_argb[src_stride + 4] + 2) >>
1121
0
                  2;
1122
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1123
0
                   src_argb[src_stride + 5] + 2) >>
1124
0
                  2;
1125
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1126
0
                   src_argb[src_stride + 6] + 2) >>
1127
0
                  2;
1128
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1129
0
                   src_argb[src_stride + 7] + 2) >>
1130
0
                  2;
1131
0
    src_argb += src_stepx * 4;
1132
0
    dst_argb += 4;
1133
0
  }
1134
0
}
1135
1136
// Scales a single row of pixels using point sampling.
1137
void ScaleARGBCols_C(uint8_t* dst_argb,
1138
                     const uint8_t* src_argb,
1139
                     int dst_width,
1140
                     int x,
1141
0
                     int dx) {
1142
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1143
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1144
0
  int j;
1145
0
  for (j = 0; j < dst_width - 1; j += 2) {
1146
0
    dst[0] = src[x >> 16];
1147
0
    x += dx;
1148
0
    dst[1] = src[x >> 16];
1149
0
    x += dx;
1150
0
    dst += 2;
1151
0
  }
1152
0
  if (dst_width & 1) {
1153
0
    dst[0] = src[x >> 16];
1154
0
  }
1155
0
}
1156
1157
void ScaleARGBCols64_C(uint8_t* dst_argb,
1158
                       const uint8_t* src_argb,
1159
                       int dst_width,
1160
                       int x32,
1161
0
                       int dx) {
1162
0
  int64_t x = (int64_t)(x32);
1163
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1164
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1165
0
  int j;
1166
0
  for (j = 0; j < dst_width - 1; j += 2) {
1167
0
    dst[0] = src[x >> 16];
1168
0
    x += dx;
1169
0
    dst[1] = src[x >> 16];
1170
0
    x += dx;
1171
0
    dst += 2;
1172
0
  }
1173
0
  if (dst_width & 1) {
1174
0
    dst[0] = src[x >> 16];
1175
0
  }
1176
0
}
1177
1178
// Scales a single row of pixels up by 2x using point sampling.
1179
void ScaleARGBColsUp2_C(uint8_t* dst_argb,
1180
                        const uint8_t* src_argb,
1181
                        int dst_width,
1182
                        int x,
1183
0
                        int dx) {
1184
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1185
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1186
0
  int j;
1187
0
  (void)x;
1188
0
  (void)dx;
1189
0
  for (j = 0; j < dst_width - 1; j += 2) {
1190
0
    dst[1] = dst[0] = src[0];
1191
0
    src += 1;
1192
0
    dst += 2;
1193
0
  }
1194
0
  if (dst_width & 1) {
1195
0
    dst[0] = src[0];
1196
0
  }
1197
0
}
1198
1199
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1200
// Mimics SSSE3 blender
1201
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1202
#define BLENDERC(a, b, f, s) \
1203
0
  (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1204
#define BLENDER(a, b, f)                                                 \
1205
0
  BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
1206
0
      BLENDERC(a, b, f, 0)
1207
1208
void ScaleARGBFilterCols_C(uint8_t* dst_argb,
1209
                           const uint8_t* src_argb,
1210
                           int dst_width,
1211
                           int x,
1212
0
                           int dx) {
1213
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1214
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1215
0
  int j;
1216
0
  for (j = 0; j < dst_width - 1; j += 2) {
1217
0
    int xi = x >> 16;
1218
0
    int xf = (x >> 9) & 0x7f;
1219
0
    uint32_t a = src[xi];
1220
0
    uint32_t b = src[xi + 1];
1221
0
    dst[0] = BLENDER(a, b, xf);
1222
0
    x += dx;
1223
0
    xi = x >> 16;
1224
0
    xf = (x >> 9) & 0x7f;
1225
0
    a = src[xi];
1226
0
    b = src[xi + 1];
1227
0
    dst[1] = BLENDER(a, b, xf);
1228
0
    x += dx;
1229
0
    dst += 2;
1230
0
  }
1231
0
  if (dst_width & 1) {
1232
0
    int xi = x >> 16;
1233
0
    int xf = (x >> 9) & 0x7f;
1234
0
    uint32_t a = src[xi];
1235
0
    uint32_t b = src[xi + 1];
1236
0
    dst[0] = BLENDER(a, b, xf);
1237
0
  }
1238
0
}
1239
1240
void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
1241
                             const uint8_t* src_argb,
1242
                             int dst_width,
1243
                             int x32,
1244
0
                             int dx) {
1245
0
  int64_t x = (int64_t)(x32);
1246
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1247
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1248
0
  int j;
1249
0
  for (j = 0; j < dst_width - 1; j += 2) {
1250
0
    int64_t xi = x >> 16;
1251
0
    int xf = (x >> 9) & 0x7f;
1252
0
    uint32_t a = src[xi];
1253
0
    uint32_t b = src[xi + 1];
1254
0
    dst[0] = BLENDER(a, b, xf);
1255
0
    x += dx;
1256
0
    xi = x >> 16;
1257
0
    xf = (x >> 9) & 0x7f;
1258
0
    a = src[xi];
1259
0
    b = src[xi + 1];
1260
0
    dst[1] = BLENDER(a, b, xf);
1261
0
    x += dx;
1262
0
    dst += 2;
1263
0
  }
1264
0
  if (dst_width & 1) {
1265
0
    int64_t xi = x >> 16;
1266
0
    int xf = (x >> 9) & 0x7f;
1267
0
    uint32_t a = src[xi];
1268
0
    uint32_t b = src[xi + 1];
1269
0
    dst[0] = BLENDER(a, b, xf);
1270
0
  }
1271
0
}
1272
#undef BLENDER1
1273
#undef BLENDERC
1274
#undef BLENDER
1275
1276
// UV scale row functions
1277
// same as ARGB but 2 channels
1278
1279
void ScaleUVRowDown2_C(const uint8_t* src_uv,
1280
                       ptrdiff_t src_stride,
1281
                       uint8_t* dst_uv,
1282
0
                       int dst_width) {
1283
0
  int x;
1284
0
  (void)src_stride;
1285
0
  for (x = 0; x < dst_width; ++x) {
1286
0
    dst_uv[0] = src_uv[2];  // Store the 2nd UV
1287
0
    dst_uv[1] = src_uv[3];
1288
0
    src_uv += 4;
1289
0
    dst_uv += 2;
1290
0
  }
1291
0
}
1292
1293
void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1294
                             ptrdiff_t src_stride,
1295
                             uint8_t* dst_uv,
1296
0
                             int dst_width) {
1297
0
  int x;
1298
0
  (void)src_stride;
1299
0
  for (x = 0; x < dst_width; ++x) {
1300
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1301
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1302
0
    src_uv += 4;
1303
0
    dst_uv += 2;
1304
0
  }
1305
0
}
1306
1307
void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1308
                          ptrdiff_t src_stride,
1309
                          uint8_t* dst_uv,
1310
0
                          int dst_width) {
1311
0
  int x;
1312
0
  for (x = 0; x < dst_width; ++x) {
1313
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1314
0
                 src_uv[src_stride + 2] + 2) >>
1315
0
                2;
1316
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1317
0
                 src_uv[src_stride + 3] + 2) >>
1318
0
                2;
1319
0
    src_uv += 4;
1320
0
    dst_uv += 2;
1321
0
  }
1322
0
}
1323
1324
void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1325
                          ptrdiff_t src_stride,
1326
                          int src_stepx,
1327
                          uint8_t* dst_uv,
1328
0
                          int dst_width) {
1329
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1330
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1331
0
  (void)src_stride;
1332
0
  int x;
1333
0
  for (x = 0; x < dst_width - 1; x += 2) {
1334
0
    dst[0] = src[0];
1335
0
    dst[1] = src[src_stepx];
1336
0
    src += src_stepx * 2;
1337
0
    dst += 2;
1338
0
  }
1339
0
  if (dst_width & 1) {
1340
0
    dst[0] = src[0];
1341
0
  }
1342
0
}
1343
1344
void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1345
                             ptrdiff_t src_stride,
1346
                             int src_stepx,
1347
                             uint8_t* dst_uv,
1348
0
                             int dst_width) {
1349
0
  int x;
1350
0
  for (x = 0; x < dst_width; ++x) {
1351
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1352
0
                 src_uv[src_stride + 2] + 2) >>
1353
0
                2;
1354
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1355
0
                 src_uv[src_stride + 3] + 2) >>
1356
0
                2;
1357
0
    src_uv += src_stepx * 2;
1358
0
    dst_uv += 2;
1359
0
  }
1360
0
}
1361
1362
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
1363
                            uint8_t* dst_ptr,
1364
0
                            int dst_width) {
1365
0
  int src_width = dst_width >> 1;
1366
0
  int x;
1367
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1368
0
  for (x = 0; x < src_width; ++x) {
1369
0
    dst_ptr[4 * x + 0] =
1370
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1371
0
    dst_ptr[4 * x + 1] =
1372
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1373
0
    dst_ptr[4 * x + 2] =
1374
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1375
0
    dst_ptr[4 * x + 3] =
1376
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1377
0
  }
1378
0
}
1379
1380
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
1381
                              ptrdiff_t src_stride,
1382
                              uint8_t* dst_ptr,
1383
                              ptrdiff_t dst_stride,
1384
0
                              int dst_width) {
1385
0
  const uint8_t* s = src_ptr;
1386
0
  const uint8_t* t = src_ptr + src_stride;
1387
0
  uint8_t* d = dst_ptr;
1388
0
  uint8_t* e = dst_ptr + dst_stride;
1389
0
  int src_width = dst_width >> 1;
1390
0
  int x;
1391
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1392
0
  for (x = 0; x < src_width; ++x) {
1393
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1394
0
                    t[2 * x + 2] * 1 + 8) >>
1395
0
                   4;
1396
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1397
0
                    t[2 * x + 3] * 1 + 8) >>
1398
0
                   4;
1399
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1400
0
                    t[2 * x + 2] * 3 + 8) >>
1401
0
                   4;
1402
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1403
0
                    t[2 * x + 3] * 3 + 8) >>
1404
0
                   4;
1405
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1406
0
                    t[2 * x + 2] * 3 + 8) >>
1407
0
                   4;
1408
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1409
0
                    t[2 * x + 3] * 3 + 8) >>
1410
0
                   4;
1411
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1412
0
                    t[2 * x + 2] * 9 + 8) >>
1413
0
                   4;
1414
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1415
0
                    t[2 * x + 3] * 9 + 8) >>
1416
0
                   4;
1417
0
  }
1418
0
}
1419
1420
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
1421
                               uint16_t* dst_ptr,
1422
0
                               int dst_width) {
1423
0
  int src_width = dst_width >> 1;
1424
0
  int x;
1425
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1426
0
  for (x = 0; x < src_width; ++x) {
1427
0
    dst_ptr[4 * x + 0] =
1428
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1429
0
    dst_ptr[4 * x + 1] =
1430
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1431
0
    dst_ptr[4 * x + 2] =
1432
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1433
0
    dst_ptr[4 * x + 3] =
1434
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1435
0
  }
1436
0
}
1437
1438
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
1439
                                 ptrdiff_t src_stride,
1440
                                 uint16_t* dst_ptr,
1441
                                 ptrdiff_t dst_stride,
1442
0
                                 int dst_width) {
1443
0
  const uint16_t* s = src_ptr;
1444
0
  const uint16_t* t = src_ptr + src_stride;
1445
0
  uint16_t* d = dst_ptr;
1446
0
  uint16_t* e = dst_ptr + dst_stride;
1447
0
  int src_width = dst_width >> 1;
1448
0
  int x;
1449
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1450
0
  for (x = 0; x < src_width; ++x) {
1451
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1452
0
                    t[2 * x + 2] * 1 + 8) >>
1453
0
                   4;
1454
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1455
0
                    t[2 * x + 3] * 1 + 8) >>
1456
0
                   4;
1457
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1458
0
                    t[2 * x + 2] * 3 + 8) >>
1459
0
                   4;
1460
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1461
0
                    t[2 * x + 3] * 3 + 8) >>
1462
0
                   4;
1463
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1464
0
                    t[2 * x + 2] * 3 + 8) >>
1465
0
                   4;
1466
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1467
0
                    t[2 * x + 3] * 3 + 8) >>
1468
0
                   4;
1469
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1470
0
                    t[2 * x + 2] * 9 + 8) >>
1471
0
                   4;
1472
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1473
0
                    t[2 * x + 3] * 9 + 8) >>
1474
0
                   4;
1475
0
  }
1476
0
}
1477
1478
// Scales a single row of pixels using point sampling.
1479
void ScaleUVCols_C(uint8_t* dst_uv,
1480
                   const uint8_t* src_uv,
1481
                   int dst_width,
1482
                   int x,
1483
0
                   int dx) {
1484
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1485
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1486
0
  int j;
1487
0
  for (j = 0; j < dst_width - 1; j += 2) {
1488
0
    dst[0] = src[x >> 16];
1489
0
    x += dx;
1490
0
    dst[1] = src[x >> 16];
1491
0
    x += dx;
1492
0
    dst += 2;
1493
0
  }
1494
0
  if (dst_width & 1) {
1495
0
    dst[0] = src[x >> 16];
1496
0
  }
1497
0
}
1498
1499
void ScaleUVCols64_C(uint8_t* dst_uv,
1500
                     const uint8_t* src_uv,
1501
                     int dst_width,
1502
                     int x32,
1503
0
                     int dx) {
1504
0
  int64_t x = (int64_t)(x32);
1505
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1506
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1507
0
  int j;
1508
0
  for (j = 0; j < dst_width - 1; j += 2) {
1509
0
    dst[0] = src[x >> 16];
1510
0
    x += dx;
1511
0
    dst[1] = src[x >> 16];
1512
0
    x += dx;
1513
0
    dst += 2;
1514
0
  }
1515
0
  if (dst_width & 1) {
1516
0
    dst[0] = src[x >> 16];
1517
0
  }
1518
0
}
1519
1520
// Scales a single row of pixels up by 2x using point sampling.
1521
void ScaleUVColsUp2_C(uint8_t* dst_uv,
1522
                      const uint8_t* src_uv,
1523
                      int dst_width,
1524
                      int x,
1525
0
                      int dx) {
1526
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1527
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1528
0
  int j;
1529
0
  (void)x;
1530
0
  (void)dx;
1531
0
  for (j = 0; j < dst_width - 1; j += 2) {
1532
0
    dst[1] = dst[0] = src[0];
1533
0
    src += 1;
1534
0
    dst += 2;
1535
0
  }
1536
0
  if (dst_width & 1) {
1537
0
    dst[0] = src[0];
1538
0
  }
1539
0
}
1540
1541
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1542
// Mimics SSSE3 blender
1543
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1544
#define BLENDERC(a, b, f, s) \
1545
0
  (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1546
0
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1547
1548
void ScaleUVFilterCols_C(uint8_t* dst_uv,
1549
                         const uint8_t* src_uv,
1550
                         int dst_width,
1551
                         int x,
1552
0
                         int dx) {
1553
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1554
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1555
0
  int j;
1556
0
  for (j = 0; j < dst_width - 1; j += 2) {
1557
0
    int xi = x >> 16;
1558
0
    int xf = (x >> 9) & 0x7f;
1559
0
    uint16_t a = src[xi];
1560
0
    uint16_t b = src[xi + 1];
1561
0
    dst[0] = BLENDER(a, b, xf);
1562
0
    x += dx;
1563
0
    xi = x >> 16;
1564
0
    xf = (x >> 9) & 0x7f;
1565
0
    a = src[xi];
1566
0
    b = src[xi + 1];
1567
0
    dst[1] = BLENDER(a, b, xf);
1568
0
    x += dx;
1569
0
    dst += 2;
1570
0
  }
1571
0
  if (dst_width & 1) {
1572
0
    int xi = x >> 16;
1573
0
    int xf = (x >> 9) & 0x7f;
1574
0
    uint16_t a = src[xi];
1575
0
    uint16_t b = src[xi + 1];
1576
0
    dst[0] = BLENDER(a, b, xf);
1577
0
  }
1578
0
}
1579
1580
void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1581
                           const uint8_t* src_uv,
1582
                           int dst_width,
1583
                           int x32,
1584
0
                           int dx) {
1585
0
  int64_t x = (int64_t)(x32);
1586
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1587
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1588
0
  int j;
1589
0
  for (j = 0; j < dst_width - 1; j += 2) {
1590
0
    int64_t xi = x >> 16;
1591
0
    int xf = (x >> 9) & 0x7f;
1592
0
    uint16_t a = src[xi];
1593
0
    uint16_t b = src[xi + 1];
1594
0
    dst[0] = BLENDER(a, b, xf);
1595
0
    x += dx;
1596
0
    xi = x >> 16;
1597
0
    xf = (x >> 9) & 0x7f;
1598
0
    a = src[xi];
1599
0
    b = src[xi + 1];
1600
0
    dst[1] = BLENDER(a, b, xf);
1601
0
    x += dx;
1602
0
    dst += 2;
1603
0
  }
1604
0
  if (dst_width & 1) {
1605
0
    int64_t xi = x >> 16;
1606
0
    int xf = (x >> 9) & 0x7f;
1607
0
    uint16_t a = src[xi];
1608
0
    uint16_t b = src[xi + 1];
1609
0
    dst[0] = BLENDER(a, b, xf);
1610
0
  }
1611
0
}
1612
#undef BLENDER1
1613
#undef BLENDERC
1614
#undef BLENDER
1615
1616
// Scale plane vertically with bilinear interpolation.
1617
void ScalePlaneVertical(int src_height,
1618
                        int dst_width,
1619
                        int dst_height,
1620
                        int src_stride,
1621
                        int dst_stride,
1622
                        const uint8_t* src_argb,
1623
                        uint8_t* dst_argb,
1624
                        int x,
1625
                        int y,
1626
                        int dy,
1627
                        int bpp,  // bytes per pixel. 4 for ARGB.
1628
8.33k
                        enum FilterMode filtering) {
1629
  // TODO(fbarchard): Allow higher bpp.
1630
8.33k
  int dst_width_bytes = dst_width * bpp;
1631
8.33k
  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
1632
8.33k
                         ptrdiff_t src_stride, int dst_width,
1633
8.33k
                         int source_y_fraction) = InterpolateRow_C;
1634
8.33k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1635
8.33k
  int j;
1636
8.33k
  assert(bpp >= 1 && bpp <= 4);
1637
8.33k
  assert(src_height != 0);
1638
8.33k
  assert(dst_width > 0);
1639
8.33k
  assert(dst_height > 0);
1640
8.33k
  src_argb += (x >> 16) * bpp;
1641
8.33k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1642
8.33k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1643
8.33k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1644
8.33k
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1645
708
      InterpolateRow = InterpolateRow_SSSE3;
1646
708
    }
1647
8.33k
  }
1648
8.33k
#endif
1649
8.33k
#if defined(HAS_INTERPOLATEROW_AVX2)
1650
8.33k
  if (TestCpuFlag(kCpuHasAVX2)) {
1651
8.33k
    InterpolateRow = InterpolateRow_Any_AVX2;
1652
8.33k
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1653
534
      InterpolateRow = InterpolateRow_AVX2;
1654
534
    }
1655
8.33k
  }
1656
8.33k
#endif
1657
#if defined(HAS_INTERPOLATEROW_NEON)
1658
  if (TestCpuFlag(kCpuHasNEON)) {
1659
    InterpolateRow = InterpolateRow_Any_NEON;
1660
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1661
      InterpolateRow = InterpolateRow_NEON;
1662
    }
1663
  }
1664
#endif
1665
#if defined(HAS_INTERPOLATEROW_SME)
1666
  if (TestCpuFlag(kCpuHasSME)) {
1667
    InterpolateRow = InterpolateRow_SME;
1668
  }
1669
#endif
1670
#if defined(HAS_INTERPOLATEROW_LSX)
1671
  if (TestCpuFlag(kCpuHasLSX)) {
1672
    InterpolateRow = InterpolateRow_Any_LSX;
1673
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1674
      InterpolateRow = InterpolateRow_LSX;
1675
    }
1676
  }
1677
#endif
1678
#if defined(HAS_INTERPOLATEROW_RVV)
1679
  if (TestCpuFlag(kCpuHasRVV)) {
1680
    InterpolateRow = InterpolateRow_RVV;
1681
  }
1682
#endif
1683
1684
828k
  for (j = 0; j < dst_height; ++j) {
1685
819k
    int yi;
1686
819k
    int yf;
1687
819k
    if (y > max_y) {
1688
0
      y = max_y;
1689
0
    }
1690
819k
    yi = y >> 16;
1691
819k
    yf = filtering ? ((y >> 8) & 255) : 0;
1692
819k
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1693
819k
                   dst_width_bytes, yf);
1694
819k
    dst_argb += dst_stride;
1695
819k
    y += dy;
1696
819k
  }
1697
8.33k
}
1698
1699
void ScalePlaneVertical_16(int src_height,
1700
                           int dst_width,
1701
                           int dst_height,
1702
                           int src_stride,
1703
                           int dst_stride,
1704
                           const uint16_t* src_argb,
1705
                           uint16_t* dst_argb,
1706
                           int x,
1707
                           int y,
1708
                           int dy,
1709
                           int wpp, /* words per pixel. normally 1 */
1710
2.44k
                           enum FilterMode filtering) {
1711
  // TODO(fbarchard): Allow higher wpp.
1712
2.44k
  int dst_width_words = dst_width * wpp;
1713
2.44k
  void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
1714
2.44k
                         ptrdiff_t src_stride, int dst_width,
1715
2.44k
                         int source_y_fraction) = InterpolateRow_16_C;
1716
2.44k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1717
2.44k
  int j;
1718
2.44k
  assert(wpp >= 1 && wpp <= 2);
1719
2.44k
  assert(src_height != 0);
1720
2.44k
  assert(dst_width > 0);
1721
2.44k
  assert(dst_height > 0);
1722
2.44k
  src_argb += (x >> 16) * wpp;
1723
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1724
  if (TestCpuFlag(kCpuHasSSE2)) {
1725
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1726
    if (IS_ALIGNED(dst_width_words, 16)) {
1727
      InterpolateRow = InterpolateRow_16_SSE2;
1728
    }
1729
  }
1730
#endif
1731
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1732
  if (TestCpuFlag(kCpuHasSSSE3)) {
1733
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1734
    if (IS_ALIGNED(dst_width_words, 16)) {
1735
      InterpolateRow = InterpolateRow_16_SSSE3;
1736
    }
1737
  }
1738
#endif
1739
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1740
  if (TestCpuFlag(kCpuHasAVX2)) {
1741
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1742
    if (IS_ALIGNED(dst_width_words, 32)) {
1743
      InterpolateRow = InterpolateRow_16_AVX2;
1744
    }
1745
  }
1746
#endif
1747
#if defined(HAS_INTERPOLATEROW_16_NEON)
1748
  if (TestCpuFlag(kCpuHasNEON)) {
1749
    InterpolateRow = InterpolateRow_16_Any_NEON;
1750
    if (IS_ALIGNED(dst_width_words, 8)) {
1751
      InterpolateRow = InterpolateRow_16_NEON;
1752
    }
1753
  }
1754
#endif
1755
#if defined(HAS_INTERPOLATEROW_16_SME)
1756
  if (TestCpuFlag(kCpuHasSME)) {
1757
    InterpolateRow = InterpolateRow_16_SME;
1758
  }
1759
#endif
1760
1.35M
  for (j = 0; j < dst_height; ++j) {
1761
1.34M
    int yi;
1762
1.34M
    int yf;
1763
1.34M
    if (y > max_y) {
1764
0
      y = max_y;
1765
0
    }
1766
1.34M
    yi = y >> 16;
1767
1.34M
    yf = filtering ? ((y >> 8) & 255) : 0;
1768
1.34M
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1769
1.34M
                   dst_width_words, yf);
1770
1.34M
    dst_argb += dst_stride;
1771
1.34M
    y += dy;
1772
1.34M
  }
1773
2.44k
}
1774
1775
// Use scale to convert lsb formats to msb, depending how many bits there are:
1776
// 32768 = 9 bits
1777
// 16384 = 10 bits
1778
// 4096 = 12 bits
1779
// 256 = 16 bits
1780
// TODO(fbarchard): change scale to bits
1781
void ScalePlaneVertical_16To8(int src_height,
1782
                              int dst_width,
1783
                              int dst_height,
1784
                              int src_stride,
1785
                              int dst_stride,
1786
                              const uint16_t* src_argb,
1787
                              uint8_t* dst_argb,
1788
                              int x,
1789
                              int y,
1790
                              int dy,
1791
                              int wpp, /* words per pixel. normally 1 */
1792
                              int scale,
1793
0
                              enum FilterMode filtering) {
1794
  // TODO(fbarchard): Allow higher wpp.
1795
0
  int dst_width_words = dst_width * wpp;
1796
  // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
1797
0
  void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
1798
0
                               ptrdiff_t src_stride, int scale, int dst_width,
1799
0
                               int source_y_fraction) = InterpolateRow_16To8_C;
1800
0
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1801
0
  int j;
1802
0
  assert(wpp >= 1 && wpp <= 2);
1803
0
  assert(src_height != 0);
1804
0
  assert(dst_width > 0);
1805
0
  assert(dst_height > 0);
1806
0
  src_argb += (x >> 16) * wpp;
1807
1808
#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
1809
  if (TestCpuFlag(kCpuHasNEON)) {
1810
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
1811
    if (IS_ALIGNED(dst_width, 8)) {
1812
      InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
1813
    }
1814
  }
1815
#endif
1816
#if defined(HAS_INTERPOLATEROW_16TO8_SME)
1817
  if (TestCpuFlag(kCpuHasSME)) {
1818
    InterpolateRow_16To8 = InterpolateRow_16To8_SME;
1819
  }
1820
#endif
1821
0
#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
1822
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1823
0
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
1824
0
    if (IS_ALIGNED(dst_width, 32)) {
1825
0
      InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
1826
0
    }
1827
0
  }
1828
0
#endif
1829
0
  for (j = 0; j < dst_height; ++j) {
1830
0
    int yi;
1831
0
    int yf;
1832
0
    if (y > max_y) {
1833
0
      y = max_y;
1834
0
    }
1835
0
    yi = y >> 16;
1836
0
    yf = filtering ? ((y >> 8) & 255) : 0;
1837
0
    InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
1838
0
                         scale, dst_width_words, yf);
1839
0
    dst_argb += dst_stride;
1840
0
    y += dy;
1841
0
  }
1842
0
}
1843
1844
// Simplify the filtering based on scale factors.
1845
enum FilterMode ScaleFilterReduce(int src_width,
1846
                                  int src_height,
1847
                                  int dst_width,
1848
                                  int dst_height,
1849
64.5k
                                  enum FilterMode filtering) {
1850
64.5k
  if (src_width < 0) {
1851
0
    src_width = -src_width;
1852
0
  }
1853
64.5k
  if (src_height < 0) {
1854
0
    src_height = -src_height;
1855
0
  }
1856
64.5k
  if (filtering == kFilterBox) {
1857
    // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
1858
46.8k
    if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
1859
40.5k
      filtering = kFilterBilinear;
1860
40.5k
    }
1861
46.8k
  }
1862
64.5k
  if (filtering == kFilterBilinear) {
1863
52.8k
    if (src_height == 1) {
1864
4.46k
      filtering = kFilterLinear;
1865
4.46k
    }
1866
    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1867
52.8k
    if (dst_height == src_height || dst_height * 3 == src_height) {
1868
4.63k
      filtering = kFilterLinear;
1869
4.63k
    }
1870
    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1871
    // avoid reading 2 pixels horizontally that causes memory exception.
1872
52.8k
    if (src_width == 1) {
1873
2.85k
      filtering = kFilterNone;
1874
2.85k
    }
1875
52.8k
  }
1876
64.5k
  if (filtering == kFilterLinear) {
1877
11.1k
    if (src_width == 1) {
1878
0
      filtering = kFilterNone;
1879
0
    }
1880
    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1881
11.1k
    if (dst_width == src_width || dst_width * 3 == src_width) {
1882
926
      filtering = kFilterNone;
1883
926
    }
1884
11.1k
  }
1885
64.5k
  return filtering;
1886
64.5k
}
1887
1888
// Divide num by div and return as 16.16 fixed point result.
1889
0
int FixedDiv_C(int num, int div) {
1890
0
  return (int)(((int64_t)(num) << 16) / div);
1891
0
}
1892
1893
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
1894
0
int FixedDiv1_C(int num, int div) {
1895
0
  return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1896
0
}
1897
1898
22.8k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1899
1900
// Compute slope values for stepping.
1901
void ScaleSlope(int src_width,
1902
                int src_height,
1903
                int dst_width,
1904
                int dst_height,
1905
                enum FilterMode filtering,
1906
                int* x,
1907
                int* y,
1908
                int* dx,
1909
31.3k
                int* dy) {
1910
31.3k
  assert(x != NULL);
1911
31.3k
  assert(y != NULL);
1912
31.3k
  assert(dx != NULL);
1913
31.3k
  assert(dy != NULL);
1914
31.3k
  assert(src_width != 0);
1915
31.3k
  assert(src_height != 0);
1916
31.3k
  assert(dst_width > 0);
1917
31.3k
  assert(dst_height > 0);
1918
  // Check for 1 pixel and avoid FixedDiv overflow.
1919
31.3k
  if (dst_width == 1 && src_width >= 32768) {
1920
0
    dst_width = src_width;
1921
0
  }
1922
31.3k
  if (dst_height == 1 && src_height >= 32768) {
1923
0
    dst_height = src_height;
1924
0
  }
1925
31.3k
  if (filtering == kFilterBox) {
1926
    // Scale step for point sampling duplicates all pixels equally.
1927
3.84k
    *dx = FixedDiv(Abs(src_width), dst_width);
1928
3.84k
    *dy = FixedDiv(src_height, dst_height);
1929
3.84k
    *x = 0;
1930
3.84k
    *y = 0;
1931
27.5k
  } else if (filtering == kFilterBilinear) {
1932
    // Scale step for bilinear sampling renders last pixel once for upsample.
1933
18.8k
    if (dst_width <= Abs(src_width)) {
1934
6.29k
      *dx = FixedDiv(Abs(src_width), dst_width);
1935
6.29k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1936
12.5k
    } else if (src_width > 1 && dst_width > 1) {
1937
12.5k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1938
12.5k
      *x = 0;
1939
12.5k
    }
1940
18.8k
    if (dst_height <= src_height) {
1941
7.83k
      *dy = FixedDiv(src_height, dst_height);
1942
7.83k
      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
1943
10.9k
    } else if (src_height > 1 && dst_height > 1) {
1944
10.9k
      *dy = FixedDiv1(src_height, dst_height);
1945
10.9k
      *y = 0;
1946
10.9k
    }
1947
18.8k
  } else if (filtering == kFilterLinear) {
1948
    // Scale step for bilinear sampling renders last pixel once for upsample.
1949
6.02k
    if (dst_width <= Abs(src_width)) {
1950
3.36k
      *dx = FixedDiv(Abs(src_width), dst_width);
1951
3.36k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1952
3.36k
    } else if (src_width > 1 && dst_width > 1) {
1953
2.66k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1954
2.66k
      *x = 0;
1955
2.66k
    }
1956
6.02k
    *dy = FixedDiv(src_height, dst_height);
1957
6.02k
    *y = *dy >> 1;
1958
6.02k
  } else {
1959
    // Scale step for point sampling duplicates all pixels equally.
1960
2.70k
    *dx = FixedDiv(Abs(src_width), dst_width);
1961
2.70k
    *dy = FixedDiv(src_height, dst_height);
1962
2.70k
    *x = CENTERSTART(*dx, 0);
1963
2.70k
    *y = CENTERSTART(*dy, 0);
1964
2.70k
  }
1965
  // Negative src_width means horizontally mirror.
1966
31.3k
  if (src_width < 0) {
1967
0
    *x += (dst_width - 1) * *dx;
1968
0
    *dx = -*dx;
1969
    // src_width = -src_width;   // Caller must do this.
1970
0
  }
1971
31.3k
}
1972
#undef CENTERSTART
1973
1974
#ifdef __cplusplus
1975
}  // extern "C"
1976
}  // namespace libyuv
1977
#endif