Coverage Report

Created: 2025-11-09 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/libyuv/source/scale_common.cc
Line
Count
Source
1
/*
2
 *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "libyuv/scale.h"
12
13
#include <assert.h>
14
#include <string.h>
15
16
#include "libyuv/cpu_id.h"
17
#include "libyuv/planar_functions.h"  // For CopyARGB
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20
21
#ifdef __cplusplus
22
namespace libyuv {
23
extern "C" {
24
#endif
25
26
#ifdef __cplusplus
27
0
#define STATIC_CAST(type, expr) static_cast<type>(expr)
28
#else
29
#define STATIC_CAST(type, expr) (type)(expr)
30
#endif
31
32
// TODO(fbarchard): make clamp255 preserve negative values.
33
0
static __inline int32_t clamp255(int32_t v) {
34
0
  return (-(v >= 255) | v) & 255;
35
0
}
36
37
// Use scale to convert lsb formats to msb, depending how many bits there are:
38
// 32768 = 9 bits
39
// 16384 = 10 bits
40
// 4096 = 12 bits
41
// 256 = 16 bits
42
// TODO(fbarchard): change scale to bits
43
#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
44
45
53.3k
static __inline int Abs(int v) {
46
53.3k
  return v >= 0 ? v : -v;
47
53.3k
}
48
49
// CPU agnostic row functions
50
void ScaleRowDown2_C(const uint8_t* src_ptr,
51
                     ptrdiff_t src_stride,
52
                     uint8_t* dst,
53
0
                     int dst_width) {
54
0
  int x;
55
0
  (void)src_stride;
56
0
  for (x = 0; x < dst_width - 1; x += 2) {
57
0
    dst[0] = src_ptr[1];
58
0
    dst[1] = src_ptr[3];
59
0
    dst += 2;
60
0
    src_ptr += 4;
61
0
  }
62
0
  if (dst_width & 1) {
63
0
    dst[0] = src_ptr[1];
64
0
  }
65
0
}
66
67
void ScaleRowDown2_16_C(const uint16_t* src_ptr,
68
                        ptrdiff_t src_stride,
69
                        uint16_t* dst,
70
0
                        int dst_width) {
71
0
  int x;
72
0
  (void)src_stride;
73
0
  for (x = 0; x < dst_width - 1; x += 2) {
74
0
    dst[0] = src_ptr[1];
75
0
    dst[1] = src_ptr[3];
76
0
    dst += 2;
77
0
    src_ptr += 4;
78
0
  }
79
0
  if (dst_width & 1) {
80
0
    dst[0] = src_ptr[1];
81
0
  }
82
0
}
83
84
void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
85
                           ptrdiff_t src_stride,
86
                           uint8_t* dst,
87
                           int dst_width,
88
0
                           int scale) {
89
0
  int x;
90
0
  (void)src_stride;
91
0
  assert(scale >= 256);
92
0
  assert(scale <= 32768);
93
0
  for (x = 0; x < dst_width - 1; x += 2) {
94
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
95
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
96
0
    dst += 2;
97
0
    src_ptr += 4;
98
0
  }
99
0
  if (dst_width & 1) {
100
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
101
0
  }
102
0
}
103
104
void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
105
                               ptrdiff_t src_stride,
106
                               uint8_t* dst,
107
                               int dst_width,
108
0
                               int scale) {
109
0
  int x;
110
0
  (void)src_stride;
111
0
  assert(scale >= 256);
112
0
  assert(scale <= 32768);
113
0
  dst_width -= 1;
114
0
  for (x = 0; x < dst_width - 1; x += 2) {
115
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
116
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
117
0
    dst += 2;
118
0
    src_ptr += 4;
119
0
  }
120
0
  if (dst_width & 1) {
121
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
122
0
    dst += 1;
123
0
    src_ptr += 2;
124
0
  }
125
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
126
0
}
127
128
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
129
                           ptrdiff_t src_stride,
130
                           uint8_t* dst,
131
0
                           int dst_width) {
132
0
  const uint8_t* s = src_ptr;
133
0
  int x;
134
0
  (void)src_stride;
135
0
  for (x = 0; x < dst_width - 1; x += 2) {
136
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
137
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
138
0
    dst += 2;
139
0
    s += 4;
140
0
  }
141
0
  if (dst_width & 1) {
142
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
143
0
  }
144
0
}
145
146
void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
147
                              ptrdiff_t src_stride,
148
                              uint16_t* dst,
149
0
                              int dst_width) {
150
0
  const uint16_t* s = src_ptr;
151
0
  int x;
152
0
  (void)src_stride;
153
0
  for (x = 0; x < dst_width - 1; x += 2) {
154
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
155
0
    dst[1] = (s[2] + s[3] + 1) >> 1;
156
0
    dst += 2;
157
0
    s += 4;
158
0
  }
159
0
  if (dst_width & 1) {
160
0
    dst[0] = (s[0] + s[1] + 1) >> 1;
161
0
  }
162
0
}
163
164
void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
165
                                 ptrdiff_t src_stride,
166
                                 uint8_t* dst,
167
                                 int dst_width,
168
0
                                 int scale) {
169
0
  const uint16_t* s = src_ptr;
170
0
  int x;
171
0
  (void)src_stride;
172
0
  assert(scale >= 256);
173
0
  assert(scale <= 32768);
174
0
  for (x = 0; x < dst_width - 1; x += 2) {
175
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
176
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
177
0
    dst += 2;
178
0
    s += 4;
179
0
  }
180
0
  if (dst_width & 1) {
181
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
182
0
  }
183
0
}
184
185
void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
186
                                     ptrdiff_t src_stride,
187
                                     uint8_t* dst,
188
                                     int dst_width,
189
0
                                     int scale) {
190
0
  const uint16_t* s = src_ptr;
191
0
  int x;
192
0
  (void)src_stride;
193
0
  assert(scale >= 256);
194
0
  assert(scale <= 32768);
195
0
  dst_width -= 1;
196
0
  for (x = 0; x < dst_width - 1; x += 2) {
197
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
198
0
    dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
199
0
    dst += 2;
200
0
    s += 4;
201
0
  }
202
0
  if (dst_width & 1) {
203
0
    dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
204
0
    dst += 1;
205
0
    s += 2;
206
0
  }
207
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
208
0
}
209
210
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
211
                        ptrdiff_t src_stride,
212
                        uint8_t* dst,
213
1.12k
                        int dst_width) {
214
1.12k
  const uint8_t* s = src_ptr;
215
1.12k
  const uint8_t* t = src_ptr + src_stride;
216
1.12k
  int x;
217
9.52k
  for (x = 0; x < dst_width - 1; x += 2) {
218
8.39k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
219
8.39k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
220
8.39k
    dst += 2;
221
8.39k
    s += 4;
222
8.39k
    t += 4;
223
8.39k
  }
224
1.12k
  if (dst_width & 1) {
225
268
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
226
268
  }
227
1.12k
}
228
229
void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
230
                            ptrdiff_t src_stride,
231
                            uint8_t* dst,
232
0
                            int dst_width) {
233
0
  const uint8_t* s = src_ptr;
234
0
  const uint8_t* t = src_ptr + src_stride;
235
0
  int x;
236
0
  dst_width -= 1;
237
0
  for (x = 0; x < dst_width - 1; x += 2) {
238
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
239
0
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
240
0
    dst += 2;
241
0
    s += 4;
242
0
    t += 4;
243
0
  }
244
0
  if (dst_width & 1) {
245
0
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
246
0
    dst += 1;
247
0
    s += 2;
248
0
    t += 2;
249
0
  }
250
0
  dst[0] = (s[0] + t[0] + 1) >> 1;
251
0
}
252
253
void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
254
                           ptrdiff_t src_stride,
255
                           uint16_t* dst,
256
1.31k
                           int dst_width) {
257
1.31k
  const uint16_t* s = src_ptr;
258
1.31k
  const uint16_t* t = src_ptr + src_stride;
259
1.31k
  int x;
260
23.3k
  for (x = 0; x < dst_width - 1; x += 2) {
261
22.0k
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
262
22.0k
    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
263
22.0k
    dst += 2;
264
22.0k
    s += 4;
265
22.0k
    t += 4;
266
22.0k
  }
267
1.31k
  if (dst_width & 1) {
268
254
    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
269
254
  }
270
1.31k
}
271
272
void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
273
                              ptrdiff_t src_stride,
274
                              uint8_t* dst,
275
                              int dst_width,
276
0
                              int scale) {
277
0
  const uint16_t* s = src_ptr;
278
0
  const uint16_t* t = src_ptr + src_stride;
279
0
  int x;
280
0
  assert(scale >= 256);
281
0
  assert(scale <= 32768);
282
0
  for (x = 0; x < dst_width - 1; x += 2) {
283
0
    dst[0] = STATIC_CAST(uint8_t,
284
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
285
0
    dst[1] = STATIC_CAST(uint8_t,
286
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
287
0
    dst += 2;
288
0
    s += 4;
289
0
    t += 4;
290
0
  }
291
0
  if (dst_width & 1) {
292
0
    dst[0] = STATIC_CAST(uint8_t,
293
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
294
0
  }
295
0
}
296
297
void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
298
                                  ptrdiff_t src_stride,
299
                                  uint8_t* dst,
300
                                  int dst_width,
301
0
                                  int scale) {
302
0
  const uint16_t* s = src_ptr;
303
0
  const uint16_t* t = src_ptr + src_stride;
304
0
  int x;
305
0
  assert(scale >= 256);
306
0
  assert(scale <= 32768);
307
0
  dst_width -= 1;
308
0
  for (x = 0; x < dst_width - 1; x += 2) {
309
0
    dst[0] = STATIC_CAST(uint8_t,
310
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
311
0
    dst[1] = STATIC_CAST(uint8_t,
312
0
                         C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
313
0
    dst += 2;
314
0
    s += 4;
315
0
    t += 4;
316
0
  }
317
0
  if (dst_width & 1) {
318
0
    dst[0] = STATIC_CAST(uint8_t,
319
0
                         C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
320
0
    dst += 1;
321
0
    s += 2;
322
0
    t += 2;
323
0
  }
324
0
  dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
325
0
}
326
327
void ScaleRowDown4_C(const uint8_t* src_ptr,
328
                     ptrdiff_t src_stride,
329
                     uint8_t* dst,
330
0
                     int dst_width) {
331
0
  int x;
332
0
  (void)src_stride;
333
0
  for (x = 0; x < dst_width - 1; x += 2) {
334
0
    dst[0] = src_ptr[2];
335
0
    dst[1] = src_ptr[6];
336
0
    dst += 2;
337
0
    src_ptr += 8;
338
0
  }
339
0
  if (dst_width & 1) {
340
0
    dst[0] = src_ptr[2];
341
0
  }
342
0
}
343
344
void ScaleRowDown4_16_C(const uint16_t* src_ptr,
345
                        ptrdiff_t src_stride,
346
                        uint16_t* dst,
347
0
                        int dst_width) {
348
0
  int x;
349
0
  (void)src_stride;
350
0
  for (x = 0; x < dst_width - 1; x += 2) {
351
0
    dst[0] = src_ptr[2];
352
0
    dst[1] = src_ptr[6];
353
0
    dst += 2;
354
0
    src_ptr += 8;
355
0
  }
356
0
  if (dst_width & 1) {
357
0
    dst[0] = src_ptr[2];
358
0
  }
359
0
}
360
361
void ScaleRowDown4Box_C(const uint8_t* src_ptr,
362
                        ptrdiff_t src_stride,
363
                        uint8_t* dst,
364
616
                        int dst_width) {
365
616
  intptr_t stride = src_stride;
366
616
  int x;
367
1.91k
  for (x = 0; x < dst_width - 1; x += 2) {
368
1.29k
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
369
1.29k
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
370
1.29k
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
371
1.29k
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
372
1.29k
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
373
1.29k
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
374
1.29k
              src_ptr[stride * 3 + 3] + 8) >>
375
1.29k
             4;
376
1.29k
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
377
1.29k
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
378
1.29k
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
379
1.29k
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
380
1.29k
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
381
1.29k
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
382
1.29k
              src_ptr[stride * 3 + 7] + 8) >>
383
1.29k
             4;
384
1.29k
    dst += 2;
385
1.29k
    src_ptr += 8;
386
1.29k
  }
387
616
  if (dst_width & 1) {
388
266
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
389
266
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
390
266
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
391
266
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
392
266
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
393
266
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
394
266
              src_ptr[stride * 3 + 3] + 8) >>
395
266
             4;
396
266
  }
397
616
}
398
399
void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
400
                           ptrdiff_t src_stride,
401
                           uint16_t* dst,
402
633
                           int dst_width) {
403
633
  intptr_t stride = src_stride;
404
633
  int x;
405
21.8k
  for (x = 0; x < dst_width - 1; x += 2) {
406
21.1k
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
407
21.1k
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
408
21.1k
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
409
21.1k
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
410
21.1k
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
411
21.1k
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
412
21.1k
              src_ptr[stride * 3 + 3] + 8) >>
413
21.1k
             4;
414
21.1k
    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
415
21.1k
              src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
416
21.1k
              src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
417
21.1k
              src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
418
21.1k
              src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
419
21.1k
              src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
420
21.1k
              src_ptr[stride * 3 + 7] + 8) >>
421
21.1k
             4;
422
21.1k
    dst += 2;
423
21.1k
    src_ptr += 8;
424
21.1k
  }
425
633
  if (dst_width & 1) {
426
278
    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
427
278
              src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
428
278
              src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
429
278
              src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
430
278
              src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
431
278
              src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
432
278
              src_ptr[stride * 3 + 3] + 8) >>
433
278
             4;
434
278
  }
435
633
}
436
437
void ScaleRowDown34_C(const uint8_t* src_ptr,
438
                      ptrdiff_t src_stride,
439
                      uint8_t* dst,
440
0
                      int dst_width) {
441
0
  int x;
442
0
  (void)src_stride;
443
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
444
0
  for (x = 0; x < dst_width; x += 3) {
445
0
    dst[0] = src_ptr[0];
446
0
    dst[1] = src_ptr[1];
447
0
    dst[2] = src_ptr[3];
448
0
    dst += 3;
449
0
    src_ptr += 4;
450
0
  }
451
0
}
452
453
void ScaleRowDown34_16_C(const uint16_t* src_ptr,
454
                         ptrdiff_t src_stride,
455
                         uint16_t* dst,
456
0
                         int dst_width) {
457
0
  int x;
458
0
  (void)src_stride;
459
0
  assert((dst_width % 3 == 0) && (dst_width > 0));
460
0
  for (x = 0; x < dst_width; x += 3) {
461
0
    dst[0] = src_ptr[0];
462
0
    dst[1] = src_ptr[1];
463
0
    dst[2] = src_ptr[3];
464
0
    dst += 3;
465
0
    src_ptr += 4;
466
0
  }
467
0
}
468
469
// Filter rows 0 and 1 together, 3 : 1
470
void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
471
                            ptrdiff_t src_stride,
472
                            uint8_t* d,
473
1.21k
                            int dst_width) {
474
1.21k
  const uint8_t* s = src_ptr;
475
1.21k
  const uint8_t* t = src_ptr + src_stride;
476
1.21k
  int x;
477
1.21k
  assert((dst_width % 3 == 0) && (dst_width > 0));
478
3.62k
  for (x = 0; x < dst_width; x += 3) {
479
2.40k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
480
2.40k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
481
2.40k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
482
2.40k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
483
2.40k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
484
2.40k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
485
2.40k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
486
2.40k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
487
2.40k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
488
2.40k
    d += 3;
489
2.40k
    s += 4;
490
2.40k
    t += 4;
491
2.40k
  }
492
1.21k
}
493
494
void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
495
                               ptrdiff_t src_stride,
496
                               uint16_t* d,
497
1.43k
                               int dst_width) {
498
1.43k
  const uint16_t* s = src_ptr;
499
1.43k
  const uint16_t* t = src_ptr + src_stride;
500
1.43k
  int x;
501
1.43k
  assert((dst_width % 3 == 0) && (dst_width > 0));
502
8.70k
  for (x = 0; x < dst_width; x += 3) {
503
7.27k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
504
7.27k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
505
7.27k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
506
7.27k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
507
7.27k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
508
7.27k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
509
7.27k
    d[0] = (a0 * 3 + b0 + 2) >> 2;
510
7.27k
    d[1] = (a1 * 3 + b1 + 2) >> 2;
511
7.27k
    d[2] = (a2 * 3 + b2 + 2) >> 2;
512
7.27k
    d += 3;
513
7.27k
    s += 4;
514
7.27k
    t += 4;
515
7.27k
  }
516
1.43k
}
517
518
// Filter rows 1 and 2 together, 1 : 1
519
void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
520
                            ptrdiff_t src_stride,
521
                            uint8_t* d,
522
606
                            int dst_width) {
523
606
  const uint8_t* s = src_ptr;
524
606
  const uint8_t* t = src_ptr + src_stride;
525
606
  int x;
526
606
  assert((dst_width % 3 == 0) && (dst_width > 0));
527
1.81k
  for (x = 0; x < dst_width; x += 3) {
528
1.20k
    uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
529
1.20k
    uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
530
1.20k
    uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
531
1.20k
    uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
532
1.20k
    uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
533
1.20k
    uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
534
1.20k
    d[0] = (a0 + b0 + 1) >> 1;
535
1.20k
    d[1] = (a1 + b1 + 1) >> 1;
536
1.20k
    d[2] = (a2 + b2 + 1) >> 1;
537
1.20k
    d += 3;
538
1.20k
    s += 4;
539
1.20k
    t += 4;
540
1.20k
  }
541
606
}
542
543
void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
544
                               ptrdiff_t src_stride,
545
                               uint16_t* d,
546
715
                               int dst_width) {
547
715
  const uint16_t* s = src_ptr;
548
715
  const uint16_t* t = src_ptr + src_stride;
549
715
  int x;
550
715
  assert((dst_width % 3 == 0) && (dst_width > 0));
551
4.35k
  for (x = 0; x < dst_width; x += 3) {
552
3.63k
    uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
553
3.63k
    uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
554
3.63k
    uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
555
3.63k
    uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
556
3.63k
    uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
557
3.63k
    uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
558
3.63k
    d[0] = (a0 + b0 + 1) >> 1;
559
3.63k
    d[1] = (a1 + b1 + 1) >> 1;
560
3.63k
    d[2] = (a2 + b2 + 1) >> 1;
561
3.63k
    d += 3;
562
3.63k
    s += 4;
563
3.63k
    t += 4;
564
3.63k
  }
565
715
}
566
567
// Sample position: (O is src sample position, X is dst sample position)
568
//
569
//      v dst_ptr at here           v stop at here
570
//  X O X   X O X   X O X   X O X   X O X
571
//    ^ src_ptr at here
572
void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
573
                          uint8_t* dst_ptr,
574
442k
                          int dst_width) {
575
442k
  int src_width = dst_width >> 1;
576
442k
  int x;
577
442k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
578
3.59M
  for (x = 0; x < src_width; ++x) {
579
3.14M
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
580
3.14M
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
581
3.14M
  }
582
442k
}
583
584
// Sample position: (O is src sample position, X is dst sample position)
585
//
586
//    src_ptr at here
587
//  X v X   X   X   X   X   X   X   X   X
588
//    O       O       O       O       O
589
//  X   X   X   X   X   X   X   X   X   X
590
//      ^ dst_ptr at here           ^ stop at here
591
//  X   X   X   X   X   X   X   X   X   X
592
//    O       O       O       O       O
593
//  X   X   X   X   X   X   X   X   X   X
594
void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
595
                            ptrdiff_t src_stride,
596
                            uint8_t* dst_ptr,
597
                            ptrdiff_t dst_stride,
598
19.5k
                            int dst_width) {
599
19.5k
  const uint8_t* s = src_ptr;
600
19.5k
  const uint8_t* t = src_ptr + src_stride;
601
19.5k
  uint8_t* d = dst_ptr;
602
19.5k
  uint8_t* e = dst_ptr + dst_stride;
603
19.5k
  int src_width = dst_width >> 1;
604
19.5k
  int x;
605
19.5k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
606
270k
  for (x = 0; x < src_width; ++x) {
607
250k
    d[2 * x + 0] =
608
250k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
609
250k
    d[2 * x + 1] =
610
250k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
611
250k
    e[2 * x + 0] =
612
250k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
613
250k
    e[2 * x + 1] =
614
250k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
615
250k
  }
616
19.5k
}
617
618
// Only suitable for at most 14 bit range.
619
void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
620
                             uint16_t* dst_ptr,
621
344k
                             int dst_width) {
622
344k
  int src_width = dst_width >> 1;
623
344k
  int x;
624
344k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
625
2.73M
  for (x = 0; x < src_width; ++x) {
626
2.38M
    dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
627
2.38M
    dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
628
2.38M
  }
629
344k
}
630
631
// Only suitable for at most 12bit range.
632
void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
633
                               ptrdiff_t src_stride,
634
                               uint16_t* dst_ptr,
635
                               ptrdiff_t dst_stride,
636
18.6k
                               int dst_width) {
637
18.6k
  const uint16_t* s = src_ptr;
638
18.6k
  const uint16_t* t = src_ptr + src_stride;
639
18.6k
  uint16_t* d = dst_ptr;
640
18.6k
  uint16_t* e = dst_ptr + dst_stride;
641
18.6k
  int src_width = dst_width >> 1;
642
18.6k
  int x;
643
18.6k
  assert((dst_width % 2 == 0) && (dst_width >= 0));
644
133k
  for (x = 0; x < src_width; ++x) {
645
114k
    d[2 * x + 0] =
646
114k
        (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
647
114k
    d[2 * x + 1] =
648
114k
        (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
649
114k
    e[2 * x + 0] =
650
114k
        (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
651
114k
    e[2 * x + 1] =
652
114k
        (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
653
114k
  }
654
18.6k
}
655
656
// Scales a single row of pixels using point sampling.
657
void ScaleCols_C(uint8_t* dst_ptr,
658
                 const uint8_t* src_ptr,
659
                 int dst_width,
660
                 int x,
661
324k
                 int dx) {
662
324k
  int j;
663
147M
  for (j = 0; j < dst_width - 1; j += 2) {
664
147M
    dst_ptr[0] = src_ptr[x >> 16];
665
147M
    x += dx;
666
147M
    dst_ptr[1] = src_ptr[x >> 16];
667
147M
    x += dx;
668
147M
    dst_ptr += 2;
669
147M
  }
670
324k
  if (dst_width & 1) {
671
118k
    dst_ptr[0] = src_ptr[x >> 16];
672
118k
  }
673
324k
}
674
675
void ScaleCols_16_C(uint16_t* dst_ptr,
676
                    const uint16_t* src_ptr,
677
                    int dst_width,
678
                    int x,
679
458k
                    int dx) {
680
458k
  int j;
681
190M
  for (j = 0; j < dst_width - 1; j += 2) {
682
189M
    dst_ptr[0] = src_ptr[x >> 16];
683
189M
    x += dx;
684
189M
    dst_ptr[1] = src_ptr[x >> 16];
685
189M
    x += dx;
686
189M
    dst_ptr += 2;
687
189M
  }
688
458k
  if (dst_width & 1) {
689
195k
    dst_ptr[0] = src_ptr[x >> 16];
690
195k
  }
691
458k
}
692
693
// Scales a single row of pixels up by 2x using point sampling.
694
void ScaleColsUp2_C(uint8_t* dst_ptr,
695
                    const uint8_t* src_ptr,
696
                    int dst_width,
697
                    int x,
698
36.2k
                    int dx) {
699
36.2k
  int j;
700
36.2k
  (void)x;
701
36.2k
  (void)dx;
702
72.4k
  for (j = 0; j < dst_width - 1; j += 2) {
703
36.2k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
704
36.2k
    src_ptr += 1;
705
36.2k
    dst_ptr += 2;
706
36.2k
  }
707
36.2k
  if (dst_width & 1) {
708
0
    dst_ptr[0] = src_ptr[0];
709
0
  }
710
36.2k
}
711
712
void ScaleColsUp2_16_C(uint16_t* dst_ptr,
713
                       const uint16_t* src_ptr,
714
                       int dst_width,
715
                       int x,
716
36.9k
                       int dx) {
717
36.9k
  int j;
718
36.9k
  (void)x;
719
36.9k
  (void)dx;
720
73.9k
  for (j = 0; j < dst_width - 1; j += 2) {
721
36.9k
    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
722
36.9k
    src_ptr += 1;
723
36.9k
    dst_ptr += 2;
724
36.9k
  }
725
36.9k
  if (dst_width & 1) {
726
0
    dst_ptr[0] = src_ptr[0];
727
0
  }
728
36.9k
}
729
730
// (1-f)a + fb can be replaced with a + f(b-a)
731
#if defined(__arm__) || defined(__aarch64__)
732
#define BLENDER(a, b, f) \
733
  (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
734
#else
735
// Intel uses 7 bit math with rounding.
736
#define BLENDER(a, b, f) \
737
0
  (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
738
#endif
739
740
void ScaleFilterCols_C(uint8_t* dst_ptr,
741
                       const uint8_t* src_ptr,
742
                       int dst_width,
743
                       int x,
744
0
                       int dx) {
745
0
  int j;
746
0
  for (j = 0; j < dst_width - 1; j += 2) {
747
0
    int xi = x >> 16;
748
0
    int a = src_ptr[xi];
749
0
    int b = src_ptr[xi + 1];
750
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
751
0
    x += dx;
752
0
    xi = x >> 16;
753
0
    a = src_ptr[xi];
754
0
    b = src_ptr[xi + 1];
755
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
756
0
    x += dx;
757
0
    dst_ptr += 2;
758
0
  }
759
0
  if (dst_width & 1) {
760
0
    int xi = x >> 16;
761
0
    int a = src_ptr[xi];
762
0
    int b = src_ptr[xi + 1];
763
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
764
0
  }
765
0
}
766
767
void ScaleFilterCols64_C(uint8_t* dst_ptr,
768
                         const uint8_t* src_ptr,
769
                         int dst_width,
770
                         int x32,
771
0
                         int dx) {
772
0
  int64_t x = (int64_t)(x32);
773
0
  int j;
774
0
  for (j = 0; j < dst_width - 1; j += 2) {
775
0
    int64_t xi = x >> 16;
776
0
    int a = src_ptr[xi];
777
0
    int b = src_ptr[xi + 1];
778
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
779
0
    x += dx;
780
0
    xi = x >> 16;
781
0
    a = src_ptr[xi];
782
0
    b = src_ptr[xi + 1];
783
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
784
0
    x += dx;
785
0
    dst_ptr += 2;
786
0
  }
787
0
  if (dst_width & 1) {
788
0
    int64_t xi = x >> 16;
789
0
    int a = src_ptr[xi];
790
0
    int b = src_ptr[xi + 1];
791
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
792
0
  }
793
0
}
794
#undef BLENDER
795
796
// Same as 8 bit arm blender but return is cast to uint16_t
797
#define BLENDER(a, b, f) \
798
238M
  (uint16_t)(            \
799
238M
      (int)(a) +         \
800
238M
      (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
801
802
void ScaleFilterCols_16_C(uint16_t* dst_ptr,
803
                          const uint16_t* src_ptr,
804
                          int dst_width,
805
                          int x,
806
658k
                          int dx) {
807
658k
  int j;
808
119M
  for (j = 0; j < dst_width - 1; j += 2) {
809
119M
    int xi = x >> 16;
810
119M
    int a = src_ptr[xi];
811
119M
    int b = src_ptr[xi + 1];
812
119M
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
813
119M
    x += dx;
814
119M
    xi = x >> 16;
815
119M
    a = src_ptr[xi];
816
119M
    b = src_ptr[xi + 1];
817
119M
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
818
119M
    x += dx;
819
119M
    dst_ptr += 2;
820
119M
  }
821
658k
  if (dst_width & 1) {
822
223k
    int xi = x >> 16;
823
223k
    int a = src_ptr[xi];
824
223k
    int b = src_ptr[xi + 1];
825
223k
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
826
223k
  }
827
658k
}
828
829
void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
830
                            const uint16_t* src_ptr,
831
                            int dst_width,
832
                            int x32,
833
0
                            int dx) {
834
0
  int64_t x = (int64_t)(x32);
835
0
  int j;
836
0
  for (j = 0; j < dst_width - 1; j += 2) {
837
0
    int64_t xi = x >> 16;
838
0
    int a = src_ptr[xi];
839
0
    int b = src_ptr[xi + 1];
840
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
841
0
    x += dx;
842
0
    xi = x >> 16;
843
0
    a = src_ptr[xi];
844
0
    b = src_ptr[xi + 1];
845
0
    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
846
0
    x += dx;
847
0
    dst_ptr += 2;
848
0
  }
849
0
  if (dst_width & 1) {
850
0
    int64_t xi = x >> 16;
851
0
    int a = src_ptr[xi];
852
0
    int b = src_ptr[xi + 1];
853
0
    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
854
0
  }
855
0
}
856
#undef BLENDER
857
858
void ScaleRowDown38_C(const uint8_t* src_ptr,
859
                      ptrdiff_t src_stride,
860
                      uint8_t* dst,
861
0
                      int dst_width) {
862
0
  int x;
863
0
  (void)src_stride;
864
0
  assert(dst_width % 3 == 0);
865
0
  for (x = 0; x < dst_width; x += 3) {
866
0
    dst[0] = src_ptr[0];
867
0
    dst[1] = src_ptr[3];
868
0
    dst[2] = src_ptr[6];
869
0
    dst += 3;
870
0
    src_ptr += 8;
871
0
  }
872
0
}
873
874
void ScaleRowDown38_16_C(const uint16_t* src_ptr,
875
                         ptrdiff_t src_stride,
876
                         uint16_t* dst,
877
0
                         int dst_width) {
878
0
  int x;
879
0
  (void)src_stride;
880
0
  assert(dst_width % 3 == 0);
881
0
  for (x = 0; x < dst_width; x += 3) {
882
0
    dst[0] = src_ptr[0];
883
0
    dst[1] = src_ptr[3];
884
0
    dst[2] = src_ptr[6];
885
0
    dst += 3;
886
0
    src_ptr += 8;
887
0
  }
888
0
}
889
890
// 8x3 -> 3x1
891
void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
892
                            ptrdiff_t src_stride,
893
                            uint8_t* dst_ptr,
894
1.10k
                            int dst_width) {
895
1.10k
  intptr_t stride = src_stride;
896
1.10k
  int i;
897
1.10k
  assert((dst_width % 3 == 0) && (dst_width > 0));
898
2.20k
  for (i = 0; i < dst_width; i += 3) {
899
1.10k
    dst_ptr[0] =
900
1.10k
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
901
1.10k
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
902
1.10k
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
903
1.10k
            (65536 / 9) >>
904
1.10k
        16;
905
1.10k
    dst_ptr[1] =
906
1.10k
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
907
1.10k
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
908
1.10k
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
909
1.10k
            (65536 / 9) >>
910
1.10k
        16;
911
1.10k
    dst_ptr[2] =
912
1.10k
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
913
1.10k
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
914
1.10k
            (65536 / 6) >>
915
1.10k
        16;
916
1.10k
    src_ptr += 8;
917
1.10k
    dst_ptr += 3;
918
1.10k
  }
919
1.10k
}
920
921
void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
922
                               ptrdiff_t src_stride,
923
                               uint16_t* dst_ptr,
924
1.72k
                               int dst_width) {
925
1.72k
  intptr_t stride = src_stride;
926
1.72k
  int i;
927
1.72k
  assert((dst_width % 3 == 0) && (dst_width > 0));
928
3.60k
  for (i = 0; i < dst_width; i += 3) {
929
1.88k
    dst_ptr[0] =
930
1.88k
        (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
931
1.88k
         src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
932
1.88k
         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
933
1.88k
            (65536u / 9u) >>
934
1.88k
        16;
935
1.88k
    dst_ptr[1] =
936
1.88k
        (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
937
1.88k
         src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
938
1.88k
         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
939
1.88k
            (65536u / 9u) >>
940
1.88k
        16;
941
1.88k
    dst_ptr[2] =
942
1.88k
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
943
1.88k
         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
944
1.88k
            (65536u / 6u) >>
945
1.88k
        16;
946
1.88k
    src_ptr += 8;
947
1.88k
    dst_ptr += 3;
948
1.88k
  }
949
1.72k
}
950
951
// 8x2 -> 3x1
952
void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
953
                            ptrdiff_t src_stride,
954
                            uint8_t* dst_ptr,
955
550
                            int dst_width) {
956
550
  intptr_t stride = src_stride;
957
550
  int i;
958
550
  assert((dst_width % 3 == 0) && (dst_width > 0));
959
1.10k
  for (i = 0; i < dst_width; i += 3) {
960
550
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
961
550
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
962
550
                     (65536 / 6) >>
963
550
                 16;
964
550
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
965
550
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
966
550
                     (65536 / 6) >>
967
550
                 16;
968
550
    dst_ptr[2] =
969
550
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
970
550
            (65536 / 4) >>
971
550
        16;
972
550
    src_ptr += 8;
973
550
    dst_ptr += 3;
974
550
  }
975
550
}
976
977
void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
978
                               ptrdiff_t src_stride,
979
                               uint16_t* dst_ptr,
980
861
                               int dst_width) {
981
861
  intptr_t stride = src_stride;
982
861
  int i;
983
861
  assert((dst_width % 3 == 0) && (dst_width > 0));
984
1.80k
  for (i = 0; i < dst_width; i += 3) {
985
941
    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
986
941
                  src_ptr[stride + 1] + src_ptr[stride + 2]) *
987
941
                     (65536u / 6u) >>
988
941
                 16;
989
941
    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
990
941
                  src_ptr[stride + 4] + src_ptr[stride + 5]) *
991
941
                     (65536u / 6u) >>
992
941
                 16;
993
941
    dst_ptr[2] =
994
941
        (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
995
941
            (65536u / 4u) >>
996
941
        16;
997
941
    src_ptr += 8;
998
941
    dst_ptr += 3;
999
941
  }
1000
861
}
1001
1002
722k
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
1003
722k
  int x;
1004
722k
  assert(src_width > 0);
1005
6.83M
  for (x = 0; x < src_width - 1; x += 2) {
1006
6.11M
    dst_ptr[0] += src_ptr[0];
1007
6.11M
    dst_ptr[1] += src_ptr[1];
1008
6.11M
    src_ptr += 2;
1009
6.11M
    dst_ptr += 2;
1010
6.11M
  }
1011
722k
  if (src_width & 1) {
1012
301k
    dst_ptr[0] += src_ptr[0];
1013
301k
  }
1014
722k
}
1015
1016
void ScaleAddRow_16_C(const uint16_t* src_ptr,
1017
                      uint32_t* dst_ptr,
1018
699k
                      int src_width) {
1019
699k
  int x;
1020
699k
  assert(src_width > 0);
1021
69.8M
  for (x = 0; x < src_width - 1; x += 2) {
1022
69.1M
    dst_ptr[0] += src_ptr[0];
1023
69.1M
    dst_ptr[1] += src_ptr[1];
1024
69.1M
    src_ptr += 2;
1025
69.1M
    dst_ptr += 2;
1026
69.1M
  }
1027
699k
  if (src_width & 1) {
1028
268k
    dst_ptr[0] += src_ptr[0];
1029
268k
  }
1030
699k
}
1031
1032
// ARGB scale row functions
1033
1034
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
1035
                         ptrdiff_t src_stride,
1036
                         uint8_t* dst_argb,
1037
0
                         int dst_width) {
1038
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1039
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1040
0
  int x;
1041
0
  (void)src_stride;
1042
0
  for (x = 0; x < dst_width - 1; x += 2) {
1043
0
    dst[0] = src[1];
1044
0
    dst[1] = src[3];
1045
0
    src += 4;
1046
0
    dst += 2;
1047
0
  }
1048
0
  if (dst_width & 1) {
1049
0
    dst[0] = src[1];
1050
0
  }
1051
0
}
1052
1053
void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
1054
                               ptrdiff_t src_stride,
1055
                               uint8_t* dst_argb,
1056
0
                               int dst_width) {
1057
0
  int x;
1058
0
  (void)src_stride;
1059
0
  for (x = 0; x < dst_width; ++x) {
1060
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
1061
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
1062
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
1063
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
1064
0
    src_argb += 8;
1065
0
    dst_argb += 4;
1066
0
  }
1067
0
}
1068
1069
void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
1070
                            ptrdiff_t src_stride,
1071
                            uint8_t* dst_argb,
1072
0
                            int dst_width) {
1073
0
  int x;
1074
0
  for (x = 0; x < dst_width; ++x) {
1075
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1076
0
                   src_argb[src_stride + 4] + 2) >>
1077
0
                  2;
1078
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1079
0
                   src_argb[src_stride + 5] + 2) >>
1080
0
                  2;
1081
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1082
0
                   src_argb[src_stride + 6] + 2) >>
1083
0
                  2;
1084
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1085
0
                   src_argb[src_stride + 7] + 2) >>
1086
0
                  2;
1087
0
    src_argb += 8;
1088
0
    dst_argb += 4;
1089
0
  }
1090
0
}
1091
1092
void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
1093
                            ptrdiff_t src_stride,
1094
                            int src_stepx,
1095
                            uint8_t* dst_argb,
1096
0
                            int dst_width) {
1097
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1098
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1099
0
  (void)src_stride;
1100
0
  int x;
1101
0
  for (x = 0; x < dst_width - 1; x += 2) {
1102
0
    dst[0] = src[0];
1103
0
    dst[1] = src[src_stepx];
1104
0
    src += src_stepx * 2;
1105
0
    dst += 2;
1106
0
  }
1107
0
  if (dst_width & 1) {
1108
0
    dst[0] = src[0];
1109
0
  }
1110
0
}
1111
1112
void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
1113
                               ptrdiff_t src_stride,
1114
                               int src_stepx,
1115
                               uint8_t* dst_argb,
1116
0
                               int dst_width) {
1117
0
  int x;
1118
0
  for (x = 0; x < dst_width; ++x) {
1119
0
    dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1120
0
                   src_argb[src_stride + 4] + 2) >>
1121
0
                  2;
1122
0
    dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1123
0
                   src_argb[src_stride + 5] + 2) >>
1124
0
                  2;
1125
0
    dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1126
0
                   src_argb[src_stride + 6] + 2) >>
1127
0
                  2;
1128
0
    dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1129
0
                   src_argb[src_stride + 7] + 2) >>
1130
0
                  2;
1131
0
    src_argb += src_stepx * 4;
1132
0
    dst_argb += 4;
1133
0
  }
1134
0
}
1135
1136
// Scales a single row of pixels using point sampling.
1137
void ScaleARGBCols_C(uint8_t* dst_argb,
1138
                     const uint8_t* src_argb,
1139
                     int dst_width,
1140
                     int x,
1141
0
                     int dx) {
1142
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1143
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1144
0
  int j;
1145
0
  for (j = 0; j < dst_width - 1; j += 2) {
1146
0
    dst[0] = src[x >> 16];
1147
0
    x += dx;
1148
0
    dst[1] = src[x >> 16];
1149
0
    x += dx;
1150
0
    dst += 2;
1151
0
  }
1152
0
  if (dst_width & 1) {
1153
0
    dst[0] = src[x >> 16];
1154
0
  }
1155
0
}
1156
1157
void ScaleARGBCols64_C(uint8_t* dst_argb,
1158
                       const uint8_t* src_argb,
1159
                       int dst_width,
1160
                       int x32,
1161
0
                       int dx) {
1162
0
  int64_t x = (int64_t)(x32);
1163
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1164
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1165
0
  int j;
1166
0
  for (j = 0; j < dst_width - 1; j += 2) {
1167
0
    dst[0] = src[x >> 16];
1168
0
    x += dx;
1169
0
    dst[1] = src[x >> 16];
1170
0
    x += dx;
1171
0
    dst += 2;
1172
0
  }
1173
0
  if (dst_width & 1) {
1174
0
    dst[0] = src[x >> 16];
1175
0
  }
1176
0
}
1177
1178
// Scales a single row of pixels up by 2x using point sampling.
1179
void ScaleARGBColsUp2_C(uint8_t* dst_argb,
1180
                        const uint8_t* src_argb,
1181
                        int dst_width,
1182
                        int x,
1183
0
                        int dx) {
1184
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1185
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1186
0
  int j;
1187
0
  (void)x;
1188
0
  (void)dx;
1189
0
  for (j = 0; j < dst_width - 1; j += 2) {
1190
0
    dst[1] = dst[0] = src[0];
1191
0
    src += 1;
1192
0
    dst += 2;
1193
0
  }
1194
0
  if (dst_width & 1) {
1195
0
    dst[0] = src[0];
1196
0
  }
1197
0
}
1198
1199
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1200
// Mimics SSSE3 blender
1201
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1202
#define BLENDERC(a, b, f, s) \
1203
0
  (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1204
#define BLENDER(a, b, f)                                                 \
1205
0
  BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
1206
0
      BLENDERC(a, b, f, 0)
1207
1208
void ScaleARGBFilterCols_C(uint8_t* dst_argb,
1209
                           const uint8_t* src_argb,
1210
                           int dst_width,
1211
                           int x,
1212
0
                           int dx) {
1213
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1214
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1215
0
  int j;
1216
0
  for (j = 0; j < dst_width - 1; j += 2) {
1217
0
    int xi = x >> 16;
1218
0
    int xf = (x >> 9) & 0x7f;
1219
0
    uint32_t a = src[xi];
1220
0
    uint32_t b = src[xi + 1];
1221
0
    dst[0] = BLENDER(a, b, xf);
1222
0
    x += dx;
1223
0
    xi = x >> 16;
1224
0
    xf = (x >> 9) & 0x7f;
1225
0
    a = src[xi];
1226
0
    b = src[xi + 1];
1227
0
    dst[1] = BLENDER(a, b, xf);
1228
0
    x += dx;
1229
0
    dst += 2;
1230
0
  }
1231
0
  if (dst_width & 1) {
1232
0
    int xi = x >> 16;
1233
0
    int xf = (x >> 9) & 0x7f;
1234
0
    uint32_t a = src[xi];
1235
0
    uint32_t b = src[xi + 1];
1236
0
    dst[0] = BLENDER(a, b, xf);
1237
0
  }
1238
0
}
1239
1240
void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
1241
                             const uint8_t* src_argb,
1242
                             int dst_width,
1243
                             int x32,
1244
0
                             int dx) {
1245
0
  int64_t x = (int64_t)(x32);
1246
0
  const uint32_t* src = (const uint32_t*)(src_argb);
1247
0
  uint32_t* dst = (uint32_t*)(dst_argb);
1248
0
  int j;
1249
0
  for (j = 0; j < dst_width - 1; j += 2) {
1250
0
    int64_t xi = x >> 16;
1251
0
    int xf = (x >> 9) & 0x7f;
1252
0
    uint32_t a = src[xi];
1253
0
    uint32_t b = src[xi + 1];
1254
0
    dst[0] = BLENDER(a, b, xf);
1255
0
    x += dx;
1256
0
    xi = x >> 16;
1257
0
    xf = (x >> 9) & 0x7f;
1258
0
    a = src[xi];
1259
0
    b = src[xi + 1];
1260
0
    dst[1] = BLENDER(a, b, xf);
1261
0
    x += dx;
1262
0
    dst += 2;
1263
0
  }
1264
0
  if (dst_width & 1) {
1265
0
    int64_t xi = x >> 16;
1266
0
    int xf = (x >> 9) & 0x7f;
1267
0
    uint32_t a = src[xi];
1268
0
    uint32_t b = src[xi + 1];
1269
0
    dst[0] = BLENDER(a, b, xf);
1270
0
  }
1271
0
}
1272
#undef BLENDER1
1273
#undef BLENDERC
1274
#undef BLENDER
1275
1276
// UV scale row functions
1277
// same as ARGB but 2 channels
1278
1279
void ScaleUVRowDown2_C(const uint8_t* src_uv,
1280
                       ptrdiff_t src_stride,
1281
                       uint8_t* dst_uv,
1282
0
                       int dst_width) {
1283
0
  int x;
1284
0
  (void)src_stride;
1285
0
  for (x = 0; x < dst_width; ++x) {
1286
0
    dst_uv[0] = src_uv[2];  // Store the 2nd UV
1287
0
    dst_uv[1] = src_uv[3];
1288
0
    src_uv += 4;
1289
0
    dst_uv += 2;
1290
0
  }
1291
0
}
1292
1293
void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1294
                             ptrdiff_t src_stride,
1295
                             uint8_t* dst_uv,
1296
0
                             int dst_width) {
1297
0
  int x;
1298
0
  (void)src_stride;
1299
0
  for (x = 0; x < dst_width; ++x) {
1300
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1301
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1302
0
    src_uv += 4;
1303
0
    dst_uv += 2;
1304
0
  }
1305
0
}
1306
1307
void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1308
                          ptrdiff_t src_stride,
1309
                          uint8_t* dst_uv,
1310
0
                          int dst_width) {
1311
0
  int x;
1312
0
  for (x = 0; x < dst_width; ++x) {
1313
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1314
0
                 src_uv[src_stride + 2] + 2) >>
1315
0
                2;
1316
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1317
0
                 src_uv[src_stride + 3] + 2) >>
1318
0
                2;
1319
0
    src_uv += 4;
1320
0
    dst_uv += 2;
1321
0
  }
1322
0
}
1323
1324
void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1325
                          ptrdiff_t src_stride,
1326
                          int src_stepx,
1327
                          uint8_t* dst_uv,
1328
0
                          int dst_width) {
1329
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1330
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1331
0
  (void)src_stride;
1332
0
  int x;
1333
0
  for (x = 0; x < dst_width - 1; x += 2) {
1334
0
    dst[0] = src[0];
1335
0
    dst[1] = src[src_stepx];
1336
0
    src += src_stepx * 2;
1337
0
    dst += 2;
1338
0
  }
1339
0
  if (dst_width & 1) {
1340
0
    dst[0] = src[0];
1341
0
  }
1342
0
}
1343
1344
void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1345
                             ptrdiff_t src_stride,
1346
                             int src_stepx,
1347
                             uint8_t* dst_uv,
1348
0
                             int dst_width) {
1349
0
  int x;
1350
0
  for (x = 0; x < dst_width; ++x) {
1351
0
    dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1352
0
                 src_uv[src_stride + 2] + 2) >>
1353
0
                2;
1354
0
    dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1355
0
                 src_uv[src_stride + 3] + 2) >>
1356
0
                2;
1357
0
    src_uv += src_stepx * 2;
1358
0
    dst_uv += 2;
1359
0
  }
1360
0
}
1361
1362
void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
1363
                            uint8_t* dst_ptr,
1364
0
                            int dst_width) {
1365
0
  int src_width = dst_width >> 1;
1366
0
  int x;
1367
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1368
0
  for (x = 0; x < src_width; ++x) {
1369
0
    dst_ptr[4 * x + 0] =
1370
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1371
0
    dst_ptr[4 * x + 1] =
1372
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1373
0
    dst_ptr[4 * x + 2] =
1374
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1375
0
    dst_ptr[4 * x + 3] =
1376
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1377
0
  }
1378
0
}
1379
1380
void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
1381
                              ptrdiff_t src_stride,
1382
                              uint8_t* dst_ptr,
1383
                              ptrdiff_t dst_stride,
1384
0
                              int dst_width) {
1385
0
  const uint8_t* s = src_ptr;
1386
0
  const uint8_t* t = src_ptr + src_stride;
1387
0
  uint8_t* d = dst_ptr;
1388
0
  uint8_t* e = dst_ptr + dst_stride;
1389
0
  int src_width = dst_width >> 1;
1390
0
  int x;
1391
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1392
0
  for (x = 0; x < src_width; ++x) {
1393
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1394
0
                    t[2 * x + 2] * 1 + 8) >>
1395
0
                   4;
1396
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1397
0
                    t[2 * x + 3] * 1 + 8) >>
1398
0
                   4;
1399
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1400
0
                    t[2 * x + 2] * 3 + 8) >>
1401
0
                   4;
1402
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1403
0
                    t[2 * x + 3] * 3 + 8) >>
1404
0
                   4;
1405
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1406
0
                    t[2 * x + 2] * 3 + 8) >>
1407
0
                   4;
1408
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1409
0
                    t[2 * x + 3] * 3 + 8) >>
1410
0
                   4;
1411
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1412
0
                    t[2 * x + 2] * 9 + 8) >>
1413
0
                   4;
1414
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1415
0
                    t[2 * x + 3] * 9 + 8) >>
1416
0
                   4;
1417
0
  }
1418
0
}
1419
1420
void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
1421
                               uint16_t* dst_ptr,
1422
0
                               int dst_width) {
1423
0
  int src_width = dst_width >> 1;
1424
0
  int x;
1425
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1426
0
  for (x = 0; x < src_width; ++x) {
1427
0
    dst_ptr[4 * x + 0] =
1428
0
        (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1429
0
    dst_ptr[4 * x + 1] =
1430
0
        (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1431
0
    dst_ptr[4 * x + 2] =
1432
0
        (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1433
0
    dst_ptr[4 * x + 3] =
1434
0
        (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1435
0
  }
1436
0
}
1437
1438
void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
1439
                                 ptrdiff_t src_stride,
1440
                                 uint16_t* dst_ptr,
1441
                                 ptrdiff_t dst_stride,
1442
0
                                 int dst_width) {
1443
0
  const uint16_t* s = src_ptr;
1444
0
  const uint16_t* t = src_ptr + src_stride;
1445
0
  uint16_t* d = dst_ptr;
1446
0
  uint16_t* e = dst_ptr + dst_stride;
1447
0
  int src_width = dst_width >> 1;
1448
0
  int x;
1449
0
  assert((dst_width % 2 == 0) && (dst_width >= 0));
1450
0
  for (x = 0; x < src_width; ++x) {
1451
0
    d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1452
0
                    t[2 * x + 2] * 1 + 8) >>
1453
0
                   4;
1454
0
    d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1455
0
                    t[2 * x + 3] * 1 + 8) >>
1456
0
                   4;
1457
0
    d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1458
0
                    t[2 * x + 2] * 3 + 8) >>
1459
0
                   4;
1460
0
    d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1461
0
                    t[2 * x + 3] * 3 + 8) >>
1462
0
                   4;
1463
0
    e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1464
0
                    t[2 * x + 2] * 3 + 8) >>
1465
0
                   4;
1466
0
    e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1467
0
                    t[2 * x + 3] * 3 + 8) >>
1468
0
                   4;
1469
0
    e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1470
0
                    t[2 * x + 2] * 9 + 8) >>
1471
0
                   4;
1472
0
    e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1473
0
                    t[2 * x + 3] * 9 + 8) >>
1474
0
                   4;
1475
0
  }
1476
0
}
1477
1478
// Scales a single row of pixels using point sampling.
1479
void ScaleUVCols_C(uint8_t* dst_uv,
1480
                   const uint8_t* src_uv,
1481
                   int dst_width,
1482
                   int x,
1483
0
                   int dx) {
1484
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1485
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1486
0
  int j;
1487
0
  for (j = 0; j < dst_width - 1; j += 2) {
1488
0
    dst[0] = src[x >> 16];
1489
0
    x += dx;
1490
0
    dst[1] = src[x >> 16];
1491
0
    x += dx;
1492
0
    dst += 2;
1493
0
  }
1494
0
  if (dst_width & 1) {
1495
0
    dst[0] = src[x >> 16];
1496
0
  }
1497
0
}
1498
1499
void ScaleUVCols64_C(uint8_t* dst_uv,
1500
                     const uint8_t* src_uv,
1501
                     int dst_width,
1502
                     int x32,
1503
0
                     int dx) {
1504
0
  int64_t x = (int64_t)(x32);
1505
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1506
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1507
0
  int j;
1508
0
  for (j = 0; j < dst_width - 1; j += 2) {
1509
0
    dst[0] = src[x >> 16];
1510
0
    x += dx;
1511
0
    dst[1] = src[x >> 16];
1512
0
    x += dx;
1513
0
    dst += 2;
1514
0
  }
1515
0
  if (dst_width & 1) {
1516
0
    dst[0] = src[x >> 16];
1517
0
  }
1518
0
}
1519
1520
// Scales a single row of pixels up by 2x using point sampling.
1521
void ScaleUVColsUp2_C(uint8_t* dst_uv,
1522
                      const uint8_t* src_uv,
1523
                      int dst_width,
1524
                      int x,
1525
0
                      int dx) {
1526
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1527
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1528
0
  int j;
1529
0
  (void)x;
1530
0
  (void)dx;
1531
0
  for (j = 0; j < dst_width - 1; j += 2) {
1532
0
    dst[1] = dst[0] = src[0];
1533
0
    src += 1;
1534
0
    dst += 2;
1535
0
  }
1536
0
  if (dst_width & 1) {
1537
0
    dst[0] = src[0];
1538
0
  }
1539
0
}
1540
1541
// TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
1542
// Mimics SSSE3 blender
1543
0
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1544
#define BLENDERC(a, b, f, s) \
1545
0
  (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1546
0
#define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1547
1548
void ScaleUVFilterCols_C(uint8_t* dst_uv,
1549
                         const uint8_t* src_uv,
1550
                         int dst_width,
1551
                         int x,
1552
0
                         int dx) {
1553
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1554
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1555
0
  int j;
1556
0
  for (j = 0; j < dst_width - 1; j += 2) {
1557
0
    int xi = x >> 16;
1558
0
    int xf = (x >> 9) & 0x7f;
1559
0
    uint16_t a = src[xi];
1560
0
    uint16_t b = src[xi + 1];
1561
0
    dst[0] = BLENDER(a, b, xf);
1562
0
    x += dx;
1563
0
    xi = x >> 16;
1564
0
    xf = (x >> 9) & 0x7f;
1565
0
    a = src[xi];
1566
0
    b = src[xi + 1];
1567
0
    dst[1] = BLENDER(a, b, xf);
1568
0
    x += dx;
1569
0
    dst += 2;
1570
0
  }
1571
0
  if (dst_width & 1) {
1572
0
    int xi = x >> 16;
1573
0
    int xf = (x >> 9) & 0x7f;
1574
0
    uint16_t a = src[xi];
1575
0
    uint16_t b = src[xi + 1];
1576
0
    dst[0] = BLENDER(a, b, xf);
1577
0
  }
1578
0
}
1579
1580
void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1581
                           const uint8_t* src_uv,
1582
                           int dst_width,
1583
                           int x32,
1584
0
                           int dx) {
1585
0
  int64_t x = (int64_t)(x32);
1586
0
  const uint16_t* src = (const uint16_t*)(src_uv);
1587
0
  uint16_t* dst = (uint16_t*)(dst_uv);
1588
0
  int j;
1589
0
  for (j = 0; j < dst_width - 1; j += 2) {
1590
0
    int64_t xi = x >> 16;
1591
0
    int xf = (x >> 9) & 0x7f;
1592
0
    uint16_t a = src[xi];
1593
0
    uint16_t b = src[xi + 1];
1594
0
    dst[0] = BLENDER(a, b, xf);
1595
0
    x += dx;
1596
0
    xi = x >> 16;
1597
0
    xf = (x >> 9) & 0x7f;
1598
0
    a = src[xi];
1599
0
    b = src[xi + 1];
1600
0
    dst[1] = BLENDER(a, b, xf);
1601
0
    x += dx;
1602
0
    dst += 2;
1603
0
  }
1604
0
  if (dst_width & 1) {
1605
0
    int64_t xi = x >> 16;
1606
0
    int xf = (x >> 9) & 0x7f;
1607
0
    uint16_t a = src[xi];
1608
0
    uint16_t b = src[xi + 1];
1609
0
    dst[0] = BLENDER(a, b, xf);
1610
0
  }
1611
0
}
1612
#undef BLENDER1
1613
#undef BLENDERC
1614
#undef BLENDER
1615
1616
// Scale plane vertically with bilinear interpolation.
1617
void ScalePlaneVertical(int src_height,
1618
                        int dst_width,
1619
                        int dst_height,
1620
                        int src_stride,
1621
                        int dst_stride,
1622
                        const uint8_t* src_argb,
1623
                        uint8_t* dst_argb,
1624
                        int x,
1625
                        int y,
1626
                        int dy,
1627
                        int bpp,  // bytes per pixel. 4 for ARGB.
1628
7.84k
                        enum FilterMode filtering) {
1629
  // TODO(fbarchard): Allow higher bpp.
1630
7.84k
  int dst_width_bytes = dst_width * bpp;
1631
7.84k
  void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
1632
7.84k
                         ptrdiff_t src_stride, int dst_width,
1633
7.84k
                         int source_y_fraction) = InterpolateRow_C;
1634
7.84k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1635
7.84k
  int j;
1636
7.84k
  assert(bpp >= 1 && bpp <= 4);
1637
7.84k
  assert(src_height != 0);
1638
7.84k
  assert(dst_width > 0);
1639
7.84k
  assert(dst_height > 0);
1640
7.84k
  src_argb += (x >> 16) * bpp;
1641
7.84k
#if defined(HAS_INTERPOLATEROW_SSSE3)
1642
7.84k
  if (TestCpuFlag(kCpuHasSSSE3)) {
1643
7.84k
    InterpolateRow = InterpolateRow_Any_SSSE3;
1644
7.84k
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1645
468
      InterpolateRow = InterpolateRow_SSSE3;
1646
468
    }
1647
7.84k
  }
1648
7.84k
#endif
1649
7.84k
#if defined(HAS_INTERPOLATEROW_AVX2)
1650
7.84k
  if (TestCpuFlag(kCpuHasAVX2)) {
1651
7.84k
    InterpolateRow = InterpolateRow_Any_AVX2;
1652
7.84k
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1653
294
      InterpolateRow = InterpolateRow_AVX2;
1654
294
    }
1655
7.84k
  }
1656
7.84k
#endif
1657
#if defined(HAS_INTERPOLATEROW_NEON)
1658
  if (TestCpuFlag(kCpuHasNEON)) {
1659
    InterpolateRow = InterpolateRow_Any_NEON;
1660
    if (IS_ALIGNED(dst_width_bytes, 16)) {
1661
      InterpolateRow = InterpolateRow_NEON;
1662
    }
1663
  }
1664
#endif
1665
#if defined(HAS_INTERPOLATEROW_SME)
1666
  if (TestCpuFlag(kCpuHasSME)) {
1667
    InterpolateRow = InterpolateRow_SME;
1668
  }
1669
#endif
1670
#if defined(HAS_INTERPOLATEROW_MSA)
1671
  if (TestCpuFlag(kCpuHasMSA)) {
1672
    InterpolateRow = InterpolateRow_Any_MSA;
1673
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1674
      InterpolateRow = InterpolateRow_MSA;
1675
    }
1676
  }
1677
#endif
1678
#if defined(HAS_INTERPOLATEROW_LSX)
1679
  if (TestCpuFlag(kCpuHasLSX)) {
1680
    InterpolateRow = InterpolateRow_Any_LSX;
1681
    if (IS_ALIGNED(dst_width_bytes, 32)) {
1682
      InterpolateRow = InterpolateRow_LSX;
1683
    }
1684
  }
1685
#endif
1686
#if defined(HAS_INTERPOLATEROW_RVV)
1687
  if (TestCpuFlag(kCpuHasRVV)) {
1688
    InterpolateRow = InterpolateRow_RVV;
1689
  }
1690
#endif
1691
1692
732k
  for (j = 0; j < dst_height; ++j) {
1693
724k
    int yi;
1694
724k
    int yf;
1695
724k
    if (y > max_y) {
1696
0
      y = max_y;
1697
0
    }
1698
724k
    yi = y >> 16;
1699
724k
    yf = filtering ? ((y >> 8) & 255) : 0;
1700
724k
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1701
724k
                   dst_width_bytes, yf);
1702
724k
    dst_argb += dst_stride;
1703
724k
    y += dy;
1704
724k
  }
1705
7.84k
}
1706
1707
void ScalePlaneVertical_16(int src_height,
1708
                           int dst_width,
1709
                           int dst_height,
1710
                           int src_stride,
1711
                           int dst_stride,
1712
                           const uint16_t* src_argb,
1713
                           uint16_t* dst_argb,
1714
                           int x,
1715
                           int y,
1716
                           int dy,
1717
                           int wpp, /* words per pixel. normally 1 */
1718
2.21k
                           enum FilterMode filtering) {
1719
  // TODO(fbarchard): Allow higher wpp.
1720
2.21k
  int dst_width_words = dst_width * wpp;
1721
2.21k
  void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
1722
2.21k
                         ptrdiff_t src_stride, int dst_width,
1723
2.21k
                         int source_y_fraction) = InterpolateRow_16_C;
1724
2.21k
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1725
2.21k
  int j;
1726
2.21k
  assert(wpp >= 1 && wpp <= 2);
1727
2.21k
  assert(src_height != 0);
1728
2.21k
  assert(dst_width > 0);
1729
2.21k
  assert(dst_height > 0);
1730
2.21k
  src_argb += (x >> 16) * wpp;
1731
#if defined(HAS_INTERPOLATEROW_16_SSE2)
1732
  if (TestCpuFlag(kCpuHasSSE2)) {
1733
    InterpolateRow = InterpolateRow_16_Any_SSE2;
1734
    if (IS_ALIGNED(dst_width_words, 16)) {
1735
      InterpolateRow = InterpolateRow_16_SSE2;
1736
    }
1737
  }
1738
#endif
1739
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
1740
  if (TestCpuFlag(kCpuHasSSSE3)) {
1741
    InterpolateRow = InterpolateRow_16_Any_SSSE3;
1742
    if (IS_ALIGNED(dst_width_words, 16)) {
1743
      InterpolateRow = InterpolateRow_16_SSSE3;
1744
    }
1745
  }
1746
#endif
1747
#if defined(HAS_INTERPOLATEROW_16_AVX2)
1748
  if (TestCpuFlag(kCpuHasAVX2)) {
1749
    InterpolateRow = InterpolateRow_16_Any_AVX2;
1750
    if (IS_ALIGNED(dst_width_words, 32)) {
1751
      InterpolateRow = InterpolateRow_16_AVX2;
1752
    }
1753
  }
1754
#endif
1755
#if defined(HAS_INTERPOLATEROW_16_NEON)
1756
  if (TestCpuFlag(kCpuHasNEON)) {
1757
    InterpolateRow = InterpolateRow_16_Any_NEON;
1758
    if (IS_ALIGNED(dst_width_words, 8)) {
1759
      InterpolateRow = InterpolateRow_16_NEON;
1760
    }
1761
  }
1762
#endif
1763
#if defined(HAS_INTERPOLATEROW_16_SME)
1764
  if (TestCpuFlag(kCpuHasSME)) {
1765
    InterpolateRow = InterpolateRow_16_SME;
1766
  }
1767
#endif
1768
623k
  for (j = 0; j < dst_height; ++j) {
1769
621k
    int yi;
1770
621k
    int yf;
1771
621k
    if (y > max_y) {
1772
0
      y = max_y;
1773
0
    }
1774
621k
    yi = y >> 16;
1775
621k
    yf = filtering ? ((y >> 8) & 255) : 0;
1776
621k
    InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1777
621k
                   dst_width_words, yf);
1778
621k
    dst_argb += dst_stride;
1779
621k
    y += dy;
1780
621k
  }
1781
2.21k
}
1782
1783
// Use scale to convert lsb formats to msb, depending how many bits there are:
1784
// 32768 = 9 bits
1785
// 16384 = 10 bits
1786
// 4096 = 12 bits
1787
// 256 = 16 bits
1788
// TODO(fbarchard): change scale to bits
1789
void ScalePlaneVertical_16To8(int src_height,
1790
                              int dst_width,
1791
                              int dst_height,
1792
                              int src_stride,
1793
                              int dst_stride,
1794
                              const uint16_t* src_argb,
1795
                              uint8_t* dst_argb,
1796
                              int x,
1797
                              int y,
1798
                              int dy,
1799
                              int wpp, /* words per pixel. normally 1 */
1800
                              int scale,
1801
0
                              enum FilterMode filtering) {
1802
  // TODO(fbarchard): Allow higher wpp.
1803
0
  int dst_width_words = dst_width * wpp;
1804
  // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
1805
0
  void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
1806
0
                               ptrdiff_t src_stride, int scale, int dst_width,
1807
0
                               int source_y_fraction) = InterpolateRow_16To8_C;
1808
0
  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1809
0
  int j;
1810
0
  assert(wpp >= 1 && wpp <= 2);
1811
0
  assert(src_height != 0);
1812
0
  assert(dst_width > 0);
1813
0
  assert(dst_height > 0);
1814
0
  src_argb += (x >> 16) * wpp;
1815
1816
#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
1817
  if (TestCpuFlag(kCpuHasNEON)) {
1818
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
1819
    if (IS_ALIGNED(dst_width, 8)) {
1820
      InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
1821
    }
1822
  }
1823
#endif
1824
#if defined(HAS_INTERPOLATEROW_16TO8_SME)
1825
  if (TestCpuFlag(kCpuHasSME)) {
1826
    InterpolateRow_16To8 = InterpolateRow_16To8_SME;
1827
  }
1828
#endif
1829
0
#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
1830
0
  if (TestCpuFlag(kCpuHasAVX2)) {
1831
0
    InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
1832
0
    if (IS_ALIGNED(dst_width, 32)) {
1833
0
      InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
1834
0
    }
1835
0
  }
1836
0
#endif
1837
0
  for (j = 0; j < dst_height; ++j) {
1838
0
    int yi;
1839
0
    int yf;
1840
0
    if (y > max_y) {
1841
0
      y = max_y;
1842
0
    }
1843
0
    yi = y >> 16;
1844
0
    yf = filtering ? ((y >> 8) & 255) : 0;
1845
0
    InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
1846
0
                         scale, dst_width_words, yf);
1847
0
    dst_argb += dst_stride;
1848
0
    y += dy;
1849
0
  }
1850
0
}
1851
1852
// Simplify the filtering based on scale factors.
1853
enum FilterMode ScaleFilterReduce(int src_width,
1854
                                  int src_height,
1855
                                  int dst_width,
1856
                                  int dst_height,
1857
60.4k
                                  enum FilterMode filtering) {
1858
60.4k
  if (src_width < 0) {
1859
0
    src_width = -src_width;
1860
0
  }
1861
60.4k
  if (src_height < 0) {
1862
0
    src_height = -src_height;
1863
0
  }
1864
60.4k
  if (filtering == kFilterBox) {
1865
    // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
1866
43.5k
    if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
1867
38.8k
      filtering = kFilterBilinear;
1868
38.8k
    }
1869
43.5k
  }
1870
60.4k
  if (filtering == kFilterBilinear) {
1871
50.0k
    if (src_height == 1) {
1872
3.90k
      filtering = kFilterLinear;
1873
3.90k
    }
1874
    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1875
50.0k
    if (dst_height == src_height || dst_height * 3 == src_height) {
1876
4.85k
      filtering = kFilterLinear;
1877
4.85k
    }
1878
    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1879
    // avoid reading 2 pixels horizontally that causes memory exception.
1880
50.0k
    if (src_width == 1) {
1881
2.76k
      filtering = kFilterNone;
1882
2.76k
    }
1883
50.0k
  }
1884
60.4k
  if (filtering == kFilterLinear) {
1885
11.1k
    if (src_width == 1) {
1886
0
      filtering = kFilterNone;
1887
0
    }
1888
    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1889
11.1k
    if (dst_width == src_width || dst_width * 3 == src_width) {
1890
865
      filtering = kFilterNone;
1891
865
    }
1892
11.1k
  }
1893
60.4k
  return filtering;
1894
60.4k
}
1895
1896
// Divide num by div and return as 16.16 fixed point result.
1897
0
int FixedDiv_C(int num, int div) {
1898
0
  return (int)(((int64_t)(num) << 16) / div);
1899
0
}
1900
1901
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
1902
0
int FixedDiv1_C(int num, int div) {
1903
0
  return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1904
0
}
1905
1906
21.3k
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1907
1908
// Compute slope values for stepping.
1909
void ScaleSlope(int src_width,
1910
                int src_height,
1911
                int dst_width,
1912
                int dst_height,
1913
                enum FilterMode filtering,
1914
                int* x,
1915
                int* y,
1916
                int* dx,
1917
29.4k
                int* dy) {
1918
29.4k
  assert(x != NULL);
1919
29.4k
  assert(y != NULL);
1920
29.4k
  assert(dx != NULL);
1921
29.4k
  assert(dy != NULL);
1922
29.4k
  assert(src_width != 0);
1923
29.4k
  assert(src_height != 0);
1924
29.4k
  assert(dst_width > 0);
1925
29.4k
  assert(dst_height > 0);
1926
  // Check for 1 pixel and avoid FixedDiv overflow.
1927
29.4k
  if (dst_width == 1 && src_width >= 32768) {
1928
0
    dst_width = src_width;
1929
0
  }
1930
29.4k
  if (dst_height == 1 && src_height >= 32768) {
1931
0
    dst_height = src_height;
1932
0
  }
1933
29.4k
  if (filtering == kFilterBox) {
1934
    // Scale step for point sampling duplicates all pixels equally.
1935
2.98k
    *dx = FixedDiv(Abs(src_width), dst_width);
1936
2.98k
    *dy = FixedDiv(src_height, dst_height);
1937
2.98k
    *x = 0;
1938
2.98k
    *y = 0;
1939
26.4k
  } else if (filtering == kFilterBilinear) {
1940
    // Scale step for bilinear sampling renders last pixel once for upsample.
1941
17.9k
    if (dst_width <= Abs(src_width)) {
1942
5.62k
      *dx = FixedDiv(Abs(src_width), dst_width);
1943
5.62k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1944
12.3k
    } else if (src_width > 1 && dst_width > 1) {
1945
12.3k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1946
12.3k
      *x = 0;
1947
12.3k
    }
1948
17.9k
    if (dst_height <= src_height) {
1949
6.98k
      *dy = FixedDiv(src_height, dst_height);
1950
6.98k
      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
1951
10.9k
    } else if (src_height > 1 && dst_height > 1) {
1952
10.9k
      *dy = FixedDiv1(src_height, dst_height);
1953
10.9k
      *y = 0;
1954
10.9k
    }
1955
17.9k
  } else if (filtering == kFilterLinear) {
1956
    // Scale step for bilinear sampling renders last pixel once for upsample.
1957
5.95k
    if (dst_width <= Abs(src_width)) {
1958
3.57k
      *dx = FixedDiv(Abs(src_width), dst_width);
1959
3.57k
      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
1960
3.57k
    } else if (src_width > 1 && dst_width > 1) {
1961
2.38k
      *dx = FixedDiv1(Abs(src_width), dst_width);
1962
2.38k
      *x = 0;
1963
2.38k
    }
1964
5.95k
    *dy = FixedDiv(src_height, dst_height);
1965
5.95k
    *y = *dy >> 1;
1966
5.95k
  } else {
1967
    // Scale step for point sampling duplicates all pixels equally.
1968
2.60k
    *dx = FixedDiv(Abs(src_width), dst_width);
1969
2.60k
    *dy = FixedDiv(src_height, dst_height);
1970
2.60k
    *x = CENTERSTART(*dx, 0);
1971
2.60k
    *y = CENTERSTART(*dy, 0);
1972
2.60k
  }
1973
  // Negative src_width means horizontally mirror.
1974
29.4k
  if (src_width < 0) {
1975
0
    *x += (dst_width - 1) * *dx;
1976
0
    *dx = -*dx;
1977
    // src_width = -src_width;   // Caller must do this.
1978
0
  }
1979
29.4k
}
1980
#undef CENTERSTART
1981
1982
#ifdef __cplusplus
1983
}  // extern "C"
1984
}  // namespace libyuv
1985
#endif