Coverage Report

Created: 2025-12-03 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/warped_motion.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <stdio.h>
13
#include <stdlib.h>
14
#include <memory.h>
15
#include <math.h>
16
#include <assert.h>
17
18
#include "config/av1_rtcd.h"
19
20
#include "av1/common/warped_motion.h"
21
#include "av1/common/scale.h"
22
23
// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
24
// at a time. The zoom/rotation/shear in the model are applied to the
25
// "fractional" position of each pixel, which therefore varies within
26
// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
27
// We need an extra 2 taps to fit this in, for a total of 8 taps.
28
/* clang-format off */
29
const int16_t av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
30
#if WARPEDPIXEL_PREC_BITS == 6
31
  // [-1, 0)
32
  { 0,   0, 127,   1,   0, 0, 0, 0 }, { 0, - 1, 127,   2,   0, 0, 0, 0 },
33
  { 1, - 3, 127,   4, - 1, 0, 0, 0 }, { 1, - 4, 126,   6, - 2, 1, 0, 0 },
34
  { 1, - 5, 126,   8, - 3, 1, 0, 0 }, { 1, - 6, 125,  11, - 4, 1, 0, 0 },
35
  { 1, - 7, 124,  13, - 4, 1, 0, 0 }, { 2, - 8, 123,  15, - 5, 1, 0, 0 },
36
  { 2, - 9, 122,  18, - 6, 1, 0, 0 }, { 2, -10, 121,  20, - 6, 1, 0, 0 },
37
  { 2, -11, 120,  22, - 7, 2, 0, 0 }, { 2, -12, 119,  25, - 8, 2, 0, 0 },
38
  { 3, -13, 117,  27, - 8, 2, 0, 0 }, { 3, -13, 116,  29, - 9, 2, 0, 0 },
39
  { 3, -14, 114,  32, -10, 3, 0, 0 }, { 3, -15, 113,  35, -10, 2, 0, 0 },
40
  { 3, -15, 111,  37, -11, 3, 0, 0 }, { 3, -16, 109,  40, -11, 3, 0, 0 },
41
  { 3, -16, 108,  42, -12, 3, 0, 0 }, { 4, -17, 106,  45, -13, 3, 0, 0 },
42
  { 4, -17, 104,  47, -13, 3, 0, 0 }, { 4, -17, 102,  50, -14, 3, 0, 0 },
43
  { 4, -17, 100,  52, -14, 3, 0, 0 }, { 4, -18,  98,  55, -15, 4, 0, 0 },
44
  { 4, -18,  96,  58, -15, 3, 0, 0 }, { 4, -18,  94,  60, -16, 4, 0, 0 },
45
  { 4, -18,  91,  63, -16, 4, 0, 0 }, { 4, -18,  89,  65, -16, 4, 0, 0 },
46
  { 4, -18,  87,  68, -17, 4, 0, 0 }, { 4, -18,  85,  70, -17, 4, 0, 0 },
47
  { 4, -18,  82,  73, -17, 4, 0, 0 }, { 4, -18,  80,  75, -17, 4, 0, 0 },
48
  { 4, -18,  78,  78, -18, 4, 0, 0 }, { 4, -17,  75,  80, -18, 4, 0, 0 },
49
  { 4, -17,  73,  82, -18, 4, 0, 0 }, { 4, -17,  70,  85, -18, 4, 0, 0 },
50
  { 4, -17,  68,  87, -18, 4, 0, 0 }, { 4, -16,  65,  89, -18, 4, 0, 0 },
51
  { 4, -16,  63,  91, -18, 4, 0, 0 }, { 4, -16,  60,  94, -18, 4, 0, 0 },
52
  { 3, -15,  58,  96, -18, 4, 0, 0 }, { 4, -15,  55,  98, -18, 4, 0, 0 },
53
  { 3, -14,  52, 100, -17, 4, 0, 0 }, { 3, -14,  50, 102, -17, 4, 0, 0 },
54
  { 3, -13,  47, 104, -17, 4, 0, 0 }, { 3, -13,  45, 106, -17, 4, 0, 0 },
55
  { 3, -12,  42, 108, -16, 3, 0, 0 }, { 3, -11,  40, 109, -16, 3, 0, 0 },
56
  { 3, -11,  37, 111, -15, 3, 0, 0 }, { 2, -10,  35, 113, -15, 3, 0, 0 },
57
  { 3, -10,  32, 114, -14, 3, 0, 0 }, { 2, - 9,  29, 116, -13, 3, 0, 0 },
58
  { 2, - 8,  27, 117, -13, 3, 0, 0 }, { 2, - 8,  25, 119, -12, 2, 0, 0 },
59
  { 2, - 7,  22, 120, -11, 2, 0, 0 }, { 1, - 6,  20, 121, -10, 2, 0, 0 },
60
  { 1, - 6,  18, 122, - 9, 2, 0, 0 }, { 1, - 5,  15, 123, - 8, 2, 0, 0 },
61
  { 1, - 4,  13, 124, - 7, 1, 0, 0 }, { 1, - 4,  11, 125, - 6, 1, 0, 0 },
62
  { 1, - 3,   8, 126, - 5, 1, 0, 0 }, { 1, - 2,   6, 126, - 4, 1, 0, 0 },
63
  { 0, - 1,   4, 127, - 3, 1, 0, 0 }, { 0,   0,   2, 127, - 1, 0, 0, 0 },
64
65
  // [0, 1)
66
  { 0,  0,   0, 127,   1,   0,  0,  0}, { 0,  0,  -1, 127,   2,   0,  0,  0},
67
  { 0,  1,  -3, 127,   4,  -2,  1,  0}, { 0,  1,  -5, 127,   6,  -2,  1,  0},
68
  { 0,  2,  -6, 126,   8,  -3,  1,  0}, {-1,  2,  -7, 126,  11,  -4,  2, -1},
69
  {-1,  3,  -8, 125,  13,  -5,  2, -1}, {-1,  3, -10, 124,  16,  -6,  3, -1},
70
  {-1,  4, -11, 123,  18,  -7,  3, -1}, {-1,  4, -12, 122,  20,  -7,  3, -1},
71
  {-1,  4, -13, 121,  23,  -8,  3, -1}, {-2,  5, -14, 120,  25,  -9,  4, -1},
72
  {-1,  5, -15, 119,  27, -10,  4, -1}, {-1,  5, -16, 118,  30, -11,  4, -1},
73
  {-2,  6, -17, 116,  33, -12,  5, -1}, {-2,  6, -17, 114,  35, -12,  5, -1},
74
  {-2,  6, -18, 113,  38, -13,  5, -1}, {-2,  7, -19, 111,  41, -14,  6, -2},
75
  {-2,  7, -19, 110,  43, -15,  6, -2}, {-2,  7, -20, 108,  46, -15,  6, -2},
76
  {-2,  7, -20, 106,  49, -16,  6, -2}, {-2,  7, -21, 104,  51, -16,  7, -2},
77
  {-2,  7, -21, 102,  54, -17,  7, -2}, {-2,  8, -21, 100,  56, -18,  7, -2},
78
  {-2,  8, -22,  98,  59, -18,  7, -2}, {-2,  8, -22,  96,  62, -19,  7, -2},
79
  {-2,  8, -22,  94,  64, -19,  7, -2}, {-2,  8, -22,  91,  67, -20,  8, -2},
80
  {-2,  8, -22,  89,  69, -20,  8, -2}, {-2,  8, -22,  87,  72, -21,  8, -2},
81
  {-2,  8, -21,  84,  74, -21,  8, -2}, {-2,  8, -22,  82,  77, -21,  8, -2},
82
  {-2,  8, -21,  79,  79, -21,  8, -2}, {-2,  8, -21,  77,  82, -22,  8, -2},
83
  {-2,  8, -21,  74,  84, -21,  8, -2}, {-2,  8, -21,  72,  87, -22,  8, -2},
84
  {-2,  8, -20,  69,  89, -22,  8, -2}, {-2,  8, -20,  67,  91, -22,  8, -2},
85
  {-2,  7, -19,  64,  94, -22,  8, -2}, {-2,  7, -19,  62,  96, -22,  8, -2},
86
  {-2,  7, -18,  59,  98, -22,  8, -2}, {-2,  7, -18,  56, 100, -21,  8, -2},
87
  {-2,  7, -17,  54, 102, -21,  7, -2}, {-2,  7, -16,  51, 104, -21,  7, -2},
88
  {-2,  6, -16,  49, 106, -20,  7, -2}, {-2,  6, -15,  46, 108, -20,  7, -2},
89
  {-2,  6, -15,  43, 110, -19,  7, -2}, {-2,  6, -14,  41, 111, -19,  7, -2},
90
  {-1,  5, -13,  38, 113, -18,  6, -2}, {-1,  5, -12,  35, 114, -17,  6, -2},
91
  {-1,  5, -12,  33, 116, -17,  6, -2}, {-1,  4, -11,  30, 118, -16,  5, -1},
92
  {-1,  4, -10,  27, 119, -15,  5, -1}, {-1,  4,  -9,  25, 120, -14,  5, -2},
93
  {-1,  3,  -8,  23, 121, -13,  4, -1}, {-1,  3,  -7,  20, 122, -12,  4, -1},
94
  {-1,  3,  -7,  18, 123, -11,  4, -1}, {-1,  3,  -6,  16, 124, -10,  3, -1},
95
  {-1,  2,  -5,  13, 125,  -8,  3, -1}, {-1,  2,  -4,  11, 126,  -7,  2, -1},
96
  { 0,  1,  -3,   8, 126,  -6,  2,  0}, { 0,  1,  -2,   6, 127,  -5,  1,  0},
97
  { 0,  1,  -2,   4, 127,  -3,  1,  0}, { 0,  0,   0,   2, 127,  -1,  0,  0},
98
99
  // [1, 2)
100
  { 0, 0, 0,   1, 127,   0,   0, 0 }, { 0, 0, 0, - 1, 127,   2,   0, 0 },
101
  { 0, 0, 1, - 3, 127,   4, - 1, 0 }, { 0, 0, 1, - 4, 126,   6, - 2, 1 },
102
  { 0, 0, 1, - 5, 126,   8, - 3, 1 }, { 0, 0, 1, - 6, 125,  11, - 4, 1 },
103
  { 0, 0, 1, - 7, 124,  13, - 4, 1 }, { 0, 0, 2, - 8, 123,  15, - 5, 1 },
104
  { 0, 0, 2, - 9, 122,  18, - 6, 1 }, { 0, 0, 2, -10, 121,  20, - 6, 1 },
105
  { 0, 0, 2, -11, 120,  22, - 7, 2 }, { 0, 0, 2, -12, 119,  25, - 8, 2 },
106
  { 0, 0, 3, -13, 117,  27, - 8, 2 }, { 0, 0, 3, -13, 116,  29, - 9, 2 },
107
  { 0, 0, 3, -14, 114,  32, -10, 3 }, { 0, 0, 3, -15, 113,  35, -10, 2 },
108
  { 0, 0, 3, -15, 111,  37, -11, 3 }, { 0, 0, 3, -16, 109,  40, -11, 3 },
109
  { 0, 0, 3, -16, 108,  42, -12, 3 }, { 0, 0, 4, -17, 106,  45, -13, 3 },
110
  { 0, 0, 4, -17, 104,  47, -13, 3 }, { 0, 0, 4, -17, 102,  50, -14, 3 },
111
  { 0, 0, 4, -17, 100,  52, -14, 3 }, { 0, 0, 4, -18,  98,  55, -15, 4 },
112
  { 0, 0, 4, -18,  96,  58, -15, 3 }, { 0, 0, 4, -18,  94,  60, -16, 4 },
113
  { 0, 0, 4, -18,  91,  63, -16, 4 }, { 0, 0, 4, -18,  89,  65, -16, 4 },
114
  { 0, 0, 4, -18,  87,  68, -17, 4 }, { 0, 0, 4, -18,  85,  70, -17, 4 },
115
  { 0, 0, 4, -18,  82,  73, -17, 4 }, { 0, 0, 4, -18,  80,  75, -17, 4 },
116
  { 0, 0, 4, -18,  78,  78, -18, 4 }, { 0, 0, 4, -17,  75,  80, -18, 4 },
117
  { 0, 0, 4, -17,  73,  82, -18, 4 }, { 0, 0, 4, -17,  70,  85, -18, 4 },
118
  { 0, 0, 4, -17,  68,  87, -18, 4 }, { 0, 0, 4, -16,  65,  89, -18, 4 },
119
  { 0, 0, 4, -16,  63,  91, -18, 4 }, { 0, 0, 4, -16,  60,  94, -18, 4 },
120
  { 0, 0, 3, -15,  58,  96, -18, 4 }, { 0, 0, 4, -15,  55,  98, -18, 4 },
121
  { 0, 0, 3, -14,  52, 100, -17, 4 }, { 0, 0, 3, -14,  50, 102, -17, 4 },
122
  { 0, 0, 3, -13,  47, 104, -17, 4 }, { 0, 0, 3, -13,  45, 106, -17, 4 },
123
  { 0, 0, 3, -12,  42, 108, -16, 3 }, { 0, 0, 3, -11,  40, 109, -16, 3 },
124
  { 0, 0, 3, -11,  37, 111, -15, 3 }, { 0, 0, 2, -10,  35, 113, -15, 3 },
125
  { 0, 0, 3, -10,  32, 114, -14, 3 }, { 0, 0, 2, - 9,  29, 116, -13, 3 },
126
  { 0, 0, 2, - 8,  27, 117, -13, 3 }, { 0, 0, 2, - 8,  25, 119, -12, 2 },
127
  { 0, 0, 2, - 7,  22, 120, -11, 2 }, { 0, 0, 1, - 6,  20, 121, -10, 2 },
128
  { 0, 0, 1, - 6,  18, 122, - 9, 2 }, { 0, 0, 1, - 5,  15, 123, - 8, 2 },
129
  { 0, 0, 1, - 4,  13, 124, - 7, 1 }, { 0, 0, 1, - 4,  11, 125, - 6, 1 },
130
  { 0, 0, 1, - 3,   8, 126, - 5, 1 }, { 0, 0, 1, - 2,   6, 126, - 4, 1 },
131
  { 0, 0, 0, - 1,   4, 127, - 3, 1 }, { 0, 0, 0,   0,   2, 127, - 1, 0 },
132
  // dummy (replicate row index 191)
133
  { 0, 0, 0,   0,   2, 127, - 1, 0 },
134
135
#elif WARPEDPIXEL_PREC_BITS == 5
136
  // [-1, 0)
137
  {0,   0, 127,   1,   0, 0, 0, 0}, {1,  -3, 127,   4,  -1, 0, 0, 0},
138
  {1,  -5, 126,   8,  -3, 1, 0, 0}, {1,  -7, 124,  13,  -4, 1, 0, 0},
139
  {2,  -9, 122,  18,  -6, 1, 0, 0}, {2, -11, 120,  22,  -7, 2, 0, 0},
140
  {3, -13, 117,  27,  -8, 2, 0, 0}, {3, -14, 114,  32, -10, 3, 0, 0},
141
  {3, -15, 111,  37, -11, 3, 0, 0}, {3, -16, 108,  42, -12, 3, 0, 0},
142
  {4, -17, 104,  47, -13, 3, 0, 0}, {4, -17, 100,  52, -14, 3, 0, 0},
143
  {4, -18,  96,  58, -15, 3, 0, 0}, {4, -18,  91,  63, -16, 4, 0, 0},
144
  {4, -18,  87,  68, -17, 4, 0, 0}, {4, -18,  82,  73, -17, 4, 0, 0},
145
  {4, -18,  78,  78, -18, 4, 0, 0}, {4, -17,  73,  82, -18, 4, 0, 0},
146
  {4, -17,  68,  87, -18, 4, 0, 0}, {4, -16,  63,  91, -18, 4, 0, 0},
147
  {3, -15,  58,  96, -18, 4, 0, 0}, {3, -14,  52, 100, -17, 4, 0, 0},
148
  {3, -13,  47, 104, -17, 4, 0, 0}, {3, -12,  42, 108, -16, 3, 0, 0},
149
  {3, -11,  37, 111, -15, 3, 0, 0}, {3, -10,  32, 114, -14, 3, 0, 0},
150
  {2,  -8,  27, 117, -13, 3, 0, 0}, {2,  -7,  22, 120, -11, 2, 0, 0},
151
  {1,  -6,  18, 122,  -9, 2, 0, 0}, {1,  -4,  13, 124,  -7, 1, 0, 0},
152
  {1,  -3,   8, 126,  -5, 1, 0, 0}, {0,  -1,   4, 127,  -3, 1, 0, 0},
153
  // [0, 1)
154
  { 0,  0,   0, 127,   1,   0,   0,  0}, { 0,  1,  -3, 127,   4,  -2,   1,  0},
155
  { 0,  2,  -6, 126,   8,  -3,   1,  0}, {-1,  3,  -8, 125,  13,  -5,   2, -1},
156
  {-1,  4, -11, 123,  18,  -7,   3, -1}, {-1,  4, -13, 121,  23,  -8,   3, -1},
157
  {-1,  5, -15, 119,  27, -10,   4, -1}, {-2,  6, -17, 116,  33, -12,   5, -1},
158
  {-2,  6, -18, 113,  38, -13,   5, -1}, {-2,  7, -19, 110,  43, -15,   6, -2},
159
  {-2,  7, -20, 106,  49, -16,   6, -2}, {-2,  7, -21, 102,  54, -17,   7, -2},
160
  {-2,  8, -22,  98,  59, -18,   7, -2}, {-2,  8, -22,  94,  64, -19,   7, -2},
161
  {-2,  8, -22,  89,  69, -20,   8, -2}, {-2,  8, -21,  84,  74, -21,   8, -2},
162
  {-2,  8, -21,  79,  79, -21,   8, -2}, {-2,  8, -21,  74,  84, -21,   8, -2},
163
  {-2,  8, -20,  69,  89, -22,   8, -2}, {-2,  7, -19,  64,  94, -22,   8, -2},
164
  {-2,  7, -18,  59,  98, -22,   8, -2}, {-2,  7, -17,  54, 102, -21,   7, -2},
165
  {-2,  6, -16,  49, 106, -20,   7, -2}, {-2,  6, -15,  43, 110, -19,   7, -2},
166
  {-1,  5, -13,  38, 113, -18,   6, -2}, {-1,  5, -12,  33, 116, -17,   6, -2},
167
  {-1,  4, -10,  27, 119, -15,   5, -1}, {-1,  3,  -8,  23, 121, -13,   4, -1},
168
  {-1,  3,  -7,  18, 123, -11,   4, -1}, {-1,  2,  -5,  13, 125,  -8,   3, -1},
169
  { 0,  1,  -3,   8, 126,  -6,   2,  0}, { 0,  1,  -2,   4, 127,  -3,   1,  0},
170
  // [1, 2)
171
  {0, 0, 0,   1, 127,   0,   0, 0}, {0, 0, 1,  -3, 127,   4,  -1, 0},
172
  {0, 0, 1,  -5, 126,   8,  -3, 1}, {0, 0, 1,  -7, 124,  13,  -4, 1},
173
  {0, 0, 2,  -9, 122,  18,  -6, 1}, {0, 0, 2, -11, 120,  22,  -7, 2},
174
  {0, 0, 3, -13, 117,  27,  -8, 2}, {0, 0, 3, -14, 114,  32, -10, 3},
175
  {0, 0, 3, -15, 111,  37, -11, 3}, {0, 0, 3, -16, 108,  42, -12, 3},
176
  {0, 0, 4, -17, 104,  47, -13, 3}, {0, 0, 4, -17, 100,  52, -14, 3},
177
  {0, 0, 4, -18,  96,  58, -15, 3}, {0, 0, 4, -18,  91,  63, -16, 4},
178
  {0, 0, 4, -18,  87,  68, -17, 4}, {0, 0, 4, -18,  82,  73, -17, 4},
179
  {0, 0, 4, -18,  78,  78, -18, 4}, {0, 0, 4, -17,  73,  82, -18, 4},
180
  {0, 0, 4, -17,  68,  87, -18, 4}, {0, 0, 4, -16,  63,  91, -18, 4},
181
  {0, 0, 3, -15,  58,  96, -18, 4}, {0, 0, 3, -14,  52, 100, -17, 4},
182
  {0, 0, 3, -13,  47, 104, -17, 4}, {0, 0, 3, -12,  42, 108, -16, 3},
183
  {0, 0, 3, -11,  37, 111, -15, 3}, {0, 0, 3, -10,  32, 114, -14, 3},
184
  {0, 0, 2,  -8,  27, 117, -13, 3}, {0, 0, 2,  -7,  22, 120, -11, 2},
185
  {0, 0, 1,  -6,  18, 122,  -9, 2}, {0, 0, 1,  -4,  13, 124,  -7, 1},
186
  {0, 0, 1,  -3,   8, 126,  -5, 1}, {0, 0, 0,  -1,   4, 127,  -3, 1},
187
  // dummy (replicate row index 95)
188
  {0, 0, 0,  -1,   4, 127,  -3, 1},
189
190
#endif  // WARPEDPIXEL_PREC_BITS == 6
191
};
192
193
/* clang-format on */
194
195
92.9k
#define DIV_LUT_PREC_BITS 14
196
93.0k
#define DIV_LUT_BITS 8
197
#define DIV_LUT_NUM (1 << DIV_LUT_BITS)
198
199
static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
200
  16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
201
  15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
202
  15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
203
  14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
204
  13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
205
  13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
206
  13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
207
  12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
208
  12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
209
  11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
210
  11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
211
  11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
212
  10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
213
  10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
214
  10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
215
  9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
216
  9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
217
  9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
218
  9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
219
  9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
220
  8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
221
  8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
222
  8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
223
  8240,  8224,  8208,  8192,
224
};
225
226
// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
227
// at precision of DIV_LUT_PREC_BITS along with the shift.
228
1.08k
static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
229
1.08k
  int64_t f;
230
1.08k
  *shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
231
1.08k
                               : get_msb((unsigned int)D));
232
  // e is obtained from D after resetting the most significant 1 bit.
233
1.08k
  const int64_t e = D - ((uint64_t)1 << *shift);
234
  // Get the most significant DIV_LUT_BITS (8) bits of e into f
235
1.08k
  if (*shift > DIV_LUT_BITS)
236
1.08k
    f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);
237
0
  else
238
0
    f = e << (DIV_LUT_BITS - *shift);
239
1.08k
  assert(f <= DIV_LUT_NUM);
240
1.08k
  *shift += DIV_LUT_PREC_BITS;
241
  // Use f as lookup into the precomputed table of multipliers
242
1.08k
  return div_lut[f];
243
1.08k
}
244
245
91.8k
static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
246
91.8k
  int32_t f;
247
91.8k
  *shift = get_msb(D);
248
  // e is obtained from D after resetting the most significant 1 bit.
249
91.8k
  const int32_t e = D - ((uint32_t)1 << *shift);
250
  // Get the most significant DIV_LUT_BITS (8) bits of e into f
251
91.8k
  if (*shift > DIV_LUT_BITS)
252
91.8k
    f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);
253
77
  else
254
77
    f = e << (DIV_LUT_BITS - *shift);
255
91.8k
  assert(f <= DIV_LUT_NUM);
256
91.8k
  *shift += DIV_LUT_PREC_BITS;
257
  // Use f as lookup into the precomputed table of multipliers
258
91.8k
  return div_lut[f];
259
91.8k
}
260
261
91.9k
static int is_affine_valid(const WarpedMotionParams *const wm) {
262
91.9k
  const int32_t *mat = wm->wmmat;
263
91.9k
  return (mat[2] > 0);
264
91.9k
}
265
266
static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
267
91.8k
                                   int16_t delta) {
268
91.8k
  if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
269
90.8k
      (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS)))
270
1.27k
    return 0;
271
90.6k
  else
272
90.6k
    return 1;
273
91.8k
}
274
275
// Returns 1 on success or 0 on an invalid affine set
276
91.9k
int av1_get_shear_params(WarpedMotionParams *wm) {
277
91.9k
  const int32_t *mat = wm->wmmat;
278
91.9k
  if (!is_affine_valid(wm)) return 0;
279
91.8k
  wm->alpha =
280
91.8k
      clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
281
91.8k
  wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
282
91.8k
  int16_t shift;
283
91.8k
  int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
284
91.8k
  int64_t v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
285
91.8k
  wm->gamma =
286
91.8k
      clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
287
91.8k
  v = ((int64_t)mat[3] * mat[4]) * y;
288
91.8k
  wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
289
91.8k
                        (1 << WARPEDMODEL_PREC_BITS),
290
91.8k
                    INT16_MIN, INT16_MAX);
291
292
91.8k
  wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) *
293
91.8k
              (1 << WARP_PARAM_REDUCE_BITS);
294
91.8k
  wm->beta = ROUND_POWER_OF_TWO_SIGNED(wm->beta, WARP_PARAM_REDUCE_BITS) *
295
91.8k
             (1 << WARP_PARAM_REDUCE_BITS);
296
91.8k
  wm->gamma = ROUND_POWER_OF_TWO_SIGNED(wm->gamma, WARP_PARAM_REDUCE_BITS) *
297
91.8k
              (1 << WARP_PARAM_REDUCE_BITS);
298
91.8k
  wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) *
299
91.8k
              (1 << WARP_PARAM_REDUCE_BITS);
300
301
91.8k
  if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
302
1.27k
    return 0;
303
304
90.6k
  return 1;
305
91.8k
}
306
307
#if CONFIG_AV1_HIGHBITDEPTH
308
0
static INLINE int highbd_error_measure(int err, int bd) {
309
0
  const int b = bd - 8;
310
0
  const int bmask = (1 << b) - 1;
311
0
  const int v = (1 << b);
312
0
  err = abs(err);
313
0
  const int e1 = err >> b;
314
0
  const int e2 = err & bmask;
315
0
  return error_measure_lut[255 + e1] * (v - e2) +
316
0
         error_measure_lut[256 + e1] * e2;
317
0
}
318
319
/* Note: For an explanation of the warp algorithm, and some notes on bit widths
320
    for hardware implementations, see the comments above av1_warp_affine_c
321
*/
322
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
323
                              int width, int height, int stride, uint16_t *pred,
324
                              int p_col, int p_row, int p_width, int p_height,
325
                              int p_stride, int subsampling_x,
326
                              int subsampling_y, int bd,
327
                              ConvolveParams *conv_params, int16_t alpha,
328
7.35k
                              int16_t beta, int16_t gamma, int16_t delta) {
329
7.35k
  int32_t tmp[15 * 8];
330
7.35k
  const int reduce_bits_horiz =
331
7.35k
      conv_params->round_0 +
332
7.35k
      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
333
7.35k
  const int reduce_bits_vert = conv_params->is_compound
334
7.35k
                                   ? conv_params->round_1
335
7.35k
                                   : 2 * FILTER_BITS - reduce_bits_horiz;
336
7.35k
  const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
337
7.35k
  const int offset_bits_horiz = bd + FILTER_BITS - 1;
338
7.35k
  const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
339
7.35k
  const int round_bits =
340
7.35k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
341
7.35k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
342
7.35k
  (void)max_bits_horiz;
343
7.35k
  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
344
345
15.5k
  for (int i = p_row; i < p_row + p_height; i += 8) {
346
21.7k
    for (int j = p_col; j < p_col + p_width; j += 8) {
347
      // Calculate the center of this 8x8 block,
348
      // project to luma coordinates (if in a subsampled chroma plane),
349
      // apply the affine transformation,
350
      // then convert back to the original coordinates (if necessary)
351
13.5k
      const int32_t src_x = (j + 4) << subsampling_x;
352
13.5k
      const int32_t src_y = (i + 4) << subsampling_y;
353
13.5k
      const int64_t dst_x =
354
13.5k
          (int64_t)mat[2] * src_x + (int64_t)mat[3] * src_y + (int64_t)mat[0];
355
13.5k
      const int64_t dst_y =
356
13.5k
          (int64_t)mat[4] * src_x + (int64_t)mat[5] * src_y + (int64_t)mat[1];
357
13.5k
      const int64_t x4 = dst_x >> subsampling_x;
358
13.5k
      const int64_t y4 = dst_y >> subsampling_y;
359
360
13.5k
      const int32_t ix4 = (int32_t)(x4 >> WARPEDMODEL_PREC_BITS);
361
13.5k
      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
362
13.5k
      const int32_t iy4 = (int32_t)(y4 >> WARPEDMODEL_PREC_BITS);
363
13.5k
      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
364
365
13.5k
      sx4 += alpha * (-4) + beta * (-4);
366
13.5k
      sy4 += gamma * (-4) + delta * (-4);
367
368
13.5k
      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
369
13.5k
      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
370
371
      // Horizontal filter
372
216k
      for (int k = -7; k < 8; ++k) {
373
203k
        const int iy = clamp(iy4 + k, 0, height - 1);
374
375
203k
        int sx = sx4 + beta * (k + 4);
376
1.82M
        for (int l = -4; l < 4; ++l) {
377
1.62M
          int ix = ix4 + l - 3;
378
1.62M
          const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
379
1.62M
                           WARPEDPIXEL_PREC_SHIFTS;
380
1.62M
          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
381
1.62M
          const int16_t *coeffs = av1_warped_filter[offs];
382
383
1.62M
          int32_t sum = 1 << offset_bits_horiz;
384
14.6M
          for (int m = 0; m < 8; ++m) {
385
12.9M
            const int sample_x = clamp(ix + m, 0, width - 1);
386
12.9M
            sum += ref[iy * stride + sample_x] * coeffs[m];
387
12.9M
          }
388
1.62M
          sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
389
1.62M
          assert(0 <= sum && sum < (1 << max_bits_horiz));
390
1.62M
          tmp[(k + 7) * 8 + (l + 4)] = sum;
391
1.62M
          sx += alpha;
392
1.62M
        }
393
203k
      }
394
395
      // Vertical filter
396
121k
      for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
397
108k
        int sy = sy4 + delta * (k + 4);
398
974k
        for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
399
866k
          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
400
866k
                           WARPEDPIXEL_PREC_SHIFTS;
401
866k
          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
402
866k
          const int16_t *coeffs = av1_warped_filter[offs];
403
404
866k
          int32_t sum = 1 << offset_bits_vert;
405
7.79M
          for (int m = 0; m < 8; ++m) {
406
6.93M
            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
407
6.93M
          }
408
409
866k
          if (conv_params->is_compound) {
410
303k
            CONV_BUF_TYPE *p =
411
303k
                &conv_params
412
303k
                     ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
413
303k
                           (j - p_col + l + 4)];
414
303k
            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
415
303k
            if (conv_params->do_average) {
416
118k
              uint16_t *dst16 =
417
118k
                  &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
418
118k
              int32_t tmp32 = *p;
419
118k
              if (conv_params->use_dist_wtd_comp_avg) {
420
11.1k
                tmp32 = tmp32 * conv_params->fwd_offset +
421
11.1k
                        sum * conv_params->bck_offset;
422
11.1k
                tmp32 = tmp32 >> DIST_PRECISION_BITS;
423
107k
              } else {
424
107k
                tmp32 += sum;
425
107k
                tmp32 = tmp32 >> 1;
426
107k
              }
427
118k
              tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
428
118k
                      (1 << (offset_bits - conv_params->round_1 - 1));
429
118k
              *dst16 =
430
118k
                  clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd);
431
184k
            } else {
432
184k
              *p = sum;
433
184k
            }
434
563k
          } else {
435
563k
            uint16_t *p =
436
563k
                &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
437
563k
            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
438
563k
            assert(0 <= sum && sum < (1 << (bd + 2)));
439
563k
            *p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
440
563k
          }
441
866k
          sy += gamma;
442
866k
        }
443
108k
      }
444
13.5k
    }
445
8.16k
  }
446
7.35k
}
447
448
void highbd_warp_plane(WarpedMotionParams *wm, const uint16_t *const ref,
449
                       int width, int height, int stride, uint16_t *const pred,
450
                       int p_col, int p_row, int p_width, int p_height,
451
                       int p_stride, int subsampling_x, int subsampling_y,
452
7.35k
                       int bd, ConvolveParams *conv_params) {
453
7.35k
  assert(wm->wmtype <= AFFINE);
454
7.35k
  if (wm->wmtype == ROTZOOM) {
455
4.28k
    wm->wmmat[5] = wm->wmmat[2];
456
4.28k
    wm->wmmat[4] = -wm->wmmat[3];
457
4.28k
  }
458
7.35k
  const int32_t *const mat = wm->wmmat;
459
7.35k
  const int16_t alpha = wm->alpha;
460
7.35k
  const int16_t beta = wm->beta;
461
7.35k
  const int16_t gamma = wm->gamma;
462
7.35k
  const int16_t delta = wm->delta;
463
464
7.35k
  av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
465
7.35k
                         p_width, p_height, p_stride, subsampling_x,
466
7.35k
                         subsampling_y, bd, conv_params, alpha, beta, gamma,
467
7.35k
                         delta);
468
7.35k
}
469
470
int64_t av1_calc_highbd_frame_error(const uint16_t *const ref, int stride,
471
                                    const uint16_t *const dst, int p_width,
472
0
                                    int p_height, int p_stride, int bd) {
473
0
  int64_t sum_error = 0;
474
0
  for (int i = 0; i < p_height; ++i) {
475
0
    for (int j = 0; j < p_width; ++j) {
476
0
      sum_error +=
477
0
          highbd_error_measure(dst[j + i * p_stride] - ref[j + i * stride], bd);
478
0
    }
479
0
  }
480
0
  return sum_error;
481
0
}
482
483
static int64_t highbd_segmented_frame_error(
484
    const uint16_t *const ref, int stride, const uint16_t *const dst,
485
    int p_width, int p_height, int p_stride, int bd, uint8_t *segment_map,
486
0
    int segment_map_stride) {
487
0
  int patch_w, patch_h;
488
0
  const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
489
0
  const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
490
0
  int64_t sum_error = 0;
491
0
  for (int i = 0; i < p_height; i += WARP_ERROR_BLOCK) {
492
0
    for (int j = 0; j < p_width; j += WARP_ERROR_BLOCK) {
493
0
      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
494
0
      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
495
      // Only compute the error if this block contains inliers from the motion
496
      // model
497
0
      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
498
499
      // avoid computing error into the frame padding
500
0
      patch_w = AOMMIN(error_bsize_w, p_width - j);
501
0
      patch_h = AOMMIN(error_bsize_h, p_height - i);
502
0
      sum_error += av1_calc_highbd_frame_error(ref + j + i * stride, stride,
503
0
                                               dst + j + i * p_stride, patch_w,
504
0
                                               patch_h, p_stride, bd);
505
0
    }
506
0
  }
507
0
  return sum_error;
508
0
}
509
#endif  // CONFIG_AV1_HIGHBITDEPTH
510
511
/* The warp filter for ROTZOOM and AFFINE models works as follows:
512
   * Split the input into 8x8 blocks
513
   * For each block, project the point (4, 4) within the block, to get the
514
     overall block position. Split into integer and fractional coordinates,
515
     maintaining full WARPEDMODEL precision
516
   * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a
517
     variable horizontal offset. This means that, while the rows of the
518
     intermediate buffer align with the rows of the *reference* image, the
519
     columns align with the columns of the *destination* image.
520
   * Filter vertically: Generate the output block (up to 8x8 pixels, but if the
521
     destination is too small we crop the output at this stage). Each pixel has
522
     a variable vertical offset, so that the resulting rows are aligned with
523
     the rows of the destination image.
524
525
   To accomplish these alignments, we factor the warp matrix as a
526
   product of two shear / asymmetric zoom matrices:
527
   / a b \  = /   1       0    \ * / 1+alpha  beta \
528
   \ c d /    \ gamma  1+delta /   \    0      1   /
529
   where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively.
530
   The horizontal shear (with alpha and beta) is applied first,
531
   then the vertical shear (with gamma and delta) is applied second.
532
533
   The only limitation is that, to fit this in a fixed 8-tap filter size,
534
   the fractional pixel offsets must be at most +-1. Since the horizontal filter
535
   generates 15 rows of 8 columns, and the initial point we project is at (4, 4)
536
   within the block, the parameters must satisfy
537
   4 * |alpha| + 7 * |beta| <= 1   and   4 * |gamma| + 4 * |delta| <= 1
538
   for this filter to be applicable.
539
540
   Note: This function assumes that the caller has done all of the relevant
541
   checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5]
542
   are set appropriately (if using a ROTZOOM model), and that alpha, beta,
543
   gamma, delta are all in range.
544
545
   TODO(rachelbarker): Maybe support scaled references?
546
*/
547
/* A note on hardware implementation:
548
    The warp filter is intended to be implementable using the same hardware as
549
    the high-precision convolve filters from the loop-restoration and
550
    convolve-round experiments.
551
552
    For a single filter stage, considering all of the coefficient sets for the
553
    warp filter and the regular convolution filter, an input in the range
554
    [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)]
555
    before rounding.
556
557
    Allowing for some changes to the filter coefficient sets, call the range
558
    [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k,
559
    we can replace this by the range [0, 256 * 2^k], which can be stored in an
560
    unsigned value with 8 + k bits.
561
562
    This allows the derivation of the appropriate bit widths and offsets for
563
    the various intermediate values: If
564
565
    F := FILTER_BITS = 7 (or else the above ranges need adjusting)
566
         So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
567
         intermediate value.
568
    H := ROUND0_BITS
569
    V := VERSHEAR_REDUCE_PREC_BITS
570
    (and note that we must have H + V = 2*F for the output to have the same
571
     scale as the input)
572
573
    then we end up with the following offsets and ranges:
574
    Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a
575
                       uint{bd + F + 1}
576
    After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}.
577
    Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a
578
                     uint{bd + 2*F + 2 - H}
579
    After rounding: The final value, before undoing the offset, fits into a
580
                    uint{bd + 2}.
581
582
    Then we need to undo the offsets before clamping to a pixel. Note that,
583
    if we do this at the end, the amount to subtract is actually independent
584
    of H and V:
585
586
    offset to subtract = (1 << ((bd + F - 1) - H + F - V)) +
587
                         (1 << ((bd + 2*F - H) - V))
588
                      == (1 << (bd - 1)) + (1 << bd)
589
590
    This allows us to entirely avoid clamping in both the warp filter and
591
    the convolve-round experiment. As of the time of writing, the Wiener filter
592
    from loop-restoration can encode a central coefficient up to 216, which
593
    leads to a maximum value of about 282 * 2^k after applying the offset.
594
    So in that case we still need to clamp.
595
*/
596
void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
597
                       int height, int stride, uint8_t *pred, int p_col,
598
                       int p_row, int p_width, int p_height, int p_stride,
599
                       int subsampling_x, int subsampling_y,
600
                       ConvolveParams *conv_params, int16_t alpha, int16_t beta,
601
5.91k
                       int16_t gamma, int16_t delta) {
602
5.91k
  int32_t tmp[15 * 8];
603
5.91k
  const int bd = 8;
604
5.91k
  const int reduce_bits_horiz = conv_params->round_0;
605
5.91k
  const int reduce_bits_vert = conv_params->is_compound
606
5.91k
                                   ? conv_params->round_1
607
5.91k
                                   : 2 * FILTER_BITS - reduce_bits_horiz;
608
5.91k
  const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
609
5.91k
  const int offset_bits_horiz = bd + FILTER_BITS - 1;
610
5.91k
  const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
611
5.91k
  const int round_bits =
612
5.91k
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
613
5.91k
  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
614
5.91k
  (void)max_bits_horiz;
615
5.91k
  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
616
5.91k
  assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
617
618
13.0k
  for (int i = p_row; i < p_row + p_height; i += 8) {
619
21.2k
    for (int j = p_col; j < p_col + p_width; j += 8) {
620
      // Calculate the center of this 8x8 block,
621
      // project to luma coordinates (if in a subsampled chroma plane),
622
      // apply the affine transformation,
623
      // then convert back to the original coordinates (if necessary)
624
14.1k
      const int32_t src_x = (j + 4) << subsampling_x;
625
14.1k
      const int32_t src_y = (i + 4) << subsampling_y;
626
14.1k
      const int64_t dst_x =
627
14.1k
          (int64_t)mat[2] * src_x + (int64_t)mat[3] * src_y + (int64_t)mat[0];
628
14.1k
      const int64_t dst_y =
629
14.1k
          (int64_t)mat[4] * src_x + (int64_t)mat[5] * src_y + (int64_t)mat[1];
630
14.1k
      const int64_t x4 = dst_x >> subsampling_x;
631
14.1k
      const int64_t y4 = dst_y >> subsampling_y;
632
633
14.1k
      int32_t ix4 = (int32_t)(x4 >> WARPEDMODEL_PREC_BITS);
634
14.1k
      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
635
14.1k
      int32_t iy4 = (int32_t)(y4 >> WARPEDMODEL_PREC_BITS);
636
14.1k
      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
637
638
14.1k
      sx4 += alpha * (-4) + beta * (-4);
639
14.1k
      sy4 += gamma * (-4) + delta * (-4);
640
641
14.1k
      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
642
14.1k
      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
643
644
      // Horizontal filter
645
226k
      for (int k = -7; k < 8; ++k) {
646
        // Clamp to top/bottom edge of the frame
647
212k
        const int iy = clamp(iy4 + k, 0, height - 1);
648
649
212k
        int sx = sx4 + beta * (k + 4);
650
651
1.90M
        for (int l = -4; l < 4; ++l) {
652
1.69M
          int ix = ix4 + l - 3;
653
          // At this point, sx = sx4 + alpha * l + beta * k
654
1.69M
          const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
655
1.69M
                           WARPEDPIXEL_PREC_SHIFTS;
656
1.69M
          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
657
1.69M
          const int16_t *coeffs = av1_warped_filter[offs];
658
659
1.69M
          int32_t sum = 1 << offset_bits_horiz;
660
15.2M
          for (int m = 0; m < 8; ++m) {
661
            // Clamp to left/right edge of the frame
662
13.5M
            const int sample_x = clamp(ix + m, 0, width - 1);
663
664
13.5M
            sum += ref[iy * stride + sample_x] * coeffs[m];
665
13.5M
          }
666
1.69M
          sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
667
1.69M
          assert(0 <= sum && sum < (1 << max_bits_horiz));
668
1.69M
          tmp[(k + 7) * 8 + (l + 4)] = sum;
669
1.69M
          sx += alpha;
670
1.69M
        }
671
212k
      }
672
673
      // Vertical filter
674
127k
      for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
675
113k
        int sy = sy4 + delta * (k + 4);
676
1.01M
        for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
677
          // At this point, sy = sy4 + gamma * l + delta * k
678
904k
          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
679
904k
                           WARPEDPIXEL_PREC_SHIFTS;
680
904k
          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
681
904k
          const int16_t *coeffs = av1_warped_filter[offs];
682
683
904k
          int32_t sum = 1 << offset_bits_vert;
684
8.14M
          for (int m = 0; m < 8; ++m) {
685
7.23M
            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
686
7.23M
          }
687
688
904k
          if (conv_params->is_compound) {
689
250k
            CONV_BUF_TYPE *p =
690
250k
                &conv_params
691
250k
                     ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
692
250k
                           (j - p_col + l + 4)];
693
250k
            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
694
250k
            if (conv_params->do_average) {
695
115k
              uint8_t *dst8 =
696
115k
                  &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
697
115k
              int32_t tmp32 = *p;
698
115k
              if (conv_params->use_dist_wtd_comp_avg) {
699
16.0k
                tmp32 = tmp32 * conv_params->fwd_offset +
700
16.0k
                        sum * conv_params->bck_offset;
701
16.0k
                tmp32 = tmp32 >> DIST_PRECISION_BITS;
702
99.0k
              } else {
703
99.0k
                tmp32 += sum;
704
99.0k
                tmp32 = tmp32 >> 1;
705
99.0k
              }
706
115k
              tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
707
115k
                      (1 << (offset_bits - conv_params->round_1 - 1));
708
115k
              *dst8 = clip_pixel(ROUND_POWER_OF_TWO(tmp32, round_bits));
709
135k
            } else {
710
135k
              *p = sum;
711
135k
            }
712
654k
          } else {
713
654k
            uint8_t *p =
714
654k
                &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
715
654k
            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
716
654k
            assert(0 <= sum && sum < (1 << (bd + 2)));
717
654k
            *p = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
718
654k
          }
719
904k
          sy += gamma;
720
904k
        }
721
113k
      }
722
14.1k
    }
723
7.15k
  }
724
5.91k
}
725
726
void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, int width,
727
                int height, int stride, uint8_t *pred, int p_col, int p_row,
728
                int p_width, int p_height, int p_stride, int subsampling_x,
729
5.91k
                int subsampling_y, ConvolveParams *conv_params) {
730
5.91k
  assert(wm->wmtype <= AFFINE);
731
5.91k
  if (wm->wmtype == ROTZOOM) {
732
3.73k
    wm->wmmat[5] = wm->wmmat[2];
733
3.73k
    wm->wmmat[4] = -wm->wmmat[3];
734
3.73k
  }
735
5.91k
  const int32_t *const mat = wm->wmmat;
736
5.91k
  const int16_t alpha = wm->alpha;
737
5.91k
  const int16_t beta = wm->beta;
738
5.91k
  const int16_t gamma = wm->gamma;
739
5.91k
  const int16_t delta = wm->delta;
740
5.91k
  av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width,
741
5.91k
                  p_height, p_stride, subsampling_x, subsampling_y, conv_params,
742
5.91k
                  alpha, beta, gamma, delta);
743
5.91k
}
744
745
int64_t av1_calc_frame_error_c(const uint8_t *const ref, int stride,
746
                               const uint8_t *const dst, int p_width,
747
0
                               int p_height, int p_stride) {
748
0
  int64_t sum_error = 0;
749
0
  for (int i = 0; i < p_height; ++i) {
750
0
    for (int j = 0; j < p_width; ++j) {
751
0
      sum_error +=
752
0
          (int64_t)error_measure(dst[j + i * p_stride] - ref[j + i * stride]);
753
0
    }
754
0
  }
755
0
  return sum_error;
756
0
}
757
758
static int64_t segmented_frame_error(const uint8_t *const ref, int stride,
759
                                     const uint8_t *const dst, int p_width,
760
                                     int p_height, int p_stride,
761
                                     uint8_t *segment_map,
762
0
                                     int segment_map_stride) {
763
0
  int patch_w, patch_h;
764
0
  const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
765
0
  const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
766
0
  int64_t sum_error = 0;
767
0
  for (int i = 0; i < p_height; i += WARP_ERROR_BLOCK) {
768
0
    for (int j = 0; j < p_width; j += WARP_ERROR_BLOCK) {
769
0
      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
770
0
      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
771
      // Only compute the error if this block contains inliers from the motion
772
      // model
773
0
      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
774
775
      // avoid computing error into the frame padding
776
0
      patch_w = AOMMIN(error_bsize_w, p_width - j);
777
0
      patch_h = AOMMIN(error_bsize_h, p_height - i);
778
0
      sum_error += av1_calc_frame_error(ref + j + i * stride, stride,
779
0
                                        dst + j + i * p_stride, patch_w,
780
0
                                        patch_h, p_stride);
781
0
    }
782
0
  }
783
0
  return sum_error;
784
0
}
785
786
int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
787
0
                        uint8_t *dst, int p_width, int p_height, int p_stride) {
788
0
#if CONFIG_AV1_HIGHBITDEPTH
789
0
  if (use_hbd) {
790
0
    return av1_calc_highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride,
791
0
                                       CONVERT_TO_SHORTPTR(dst), p_width,
792
0
                                       p_height, p_stride, bd);
793
0
  }
794
0
#endif
795
0
  (void)use_hbd;
796
0
  (void)bd;
797
0
  return av1_calc_frame_error(ref, stride, dst, p_width, p_height, p_stride);
798
0
}
799
800
int64_t av1_segmented_frame_error(int use_hbd, int bd, const uint8_t *ref,
801
                                  int stride, uint8_t *dst, int p_width,
802
                                  int p_height, int p_stride,
803
                                  uint8_t *segment_map,
804
0
                                  int segment_map_stride) {
805
0
#if CONFIG_AV1_HIGHBITDEPTH
806
0
  if (use_hbd) {
807
0
    return highbd_segmented_frame_error(
808
0
        CONVERT_TO_SHORTPTR(ref), stride, CONVERT_TO_SHORTPTR(dst), p_width,
809
0
        p_height, p_stride, bd, segment_map, segment_map_stride);
810
0
  }
811
0
#endif
812
0
  (void)use_hbd;
813
0
  (void)bd;
814
0
  return segmented_frame_error(ref, stride, dst, p_width, p_height, p_stride,
815
0
                               segment_map, segment_map_stride);
816
0
}
817
818
void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
819
                    const uint8_t *ref, int width, int height, int stride,
820
                    uint8_t *pred, int p_col, int p_row, int p_width,
821
                    int p_height, int p_stride, int subsampling_x,
822
13.2k
                    int subsampling_y, ConvolveParams *conv_params) {
823
13.2k
#if CONFIG_AV1_HIGHBITDEPTH
824
13.2k
  if (use_hbd)
825
7.35k
    highbd_warp_plane(wm, CONVERT_TO_SHORTPTR(ref), width, height, stride,
826
7.35k
                      CONVERT_TO_SHORTPTR(pred), p_col, p_row, p_width,
827
7.35k
                      p_height, p_stride, subsampling_x, subsampling_y, bd,
828
7.35k
                      conv_params);
829
5.91k
  else
830
5.91k
    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
831
5.91k
               p_height, p_stride, subsampling_x, subsampling_y, conv_params);
832
#else
833
  (void)use_hbd;
834
  (void)bd;
835
  warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
836
             p_height, p_stride, subsampling_x, subsampling_y, conv_params);
837
#endif
838
13.2k
}
839
840
4.04k
#define LS_MV_MAX 256  // max mv in 1/8-pel
841
// Use LS_STEP = 8 so that 2 less bits needed for A, Bx, By.
842
27.9k
#define LS_STEP 8
843
844
// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
845
// the precision needed is:
846
//   (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
847
//   (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
848
//   1 [for sign] +
849
//   LEAST_SQUARES_SAMPLES_MAX_BITS
850
//        [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
851
// The value is 23
852
#define LS_MAT_RANGE_BITS \
853
  ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
854
855
// Bit-depth reduction from the full-range
856
9.31k
#define LS_MAT_DOWN_BITS 2
857
858
// bits range of A, Bx and By after downshifting
859
#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
860
#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
861
#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
862
863
// By setting LS_STEP = 8, the least 2 bits of every elements in A, Bx, By are
864
// 0. So, we can reduce LS_MAT_RANGE_BITS(2) bits here.
865
#define LS_SQUARE(a)                                          \
866
2.66k
  (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
867
2.66k
   (2 + LS_MAT_DOWN_BITS))
868
#define LS_PRODUCT1(a, b)                                           \
869
3.99k
  (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> \
870
3.99k
   (2 + LS_MAT_DOWN_BITS))
871
#define LS_PRODUCT2(a, b)                                               \
872
2.66k
  (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
873
2.66k
   (2 + LS_MAT_DOWN_BITS))
874
875
#define USE_LIMITED_PREC_MULT 0
876
877
#if USE_LIMITED_PREC_MULT
878
879
#define MUL_PREC_BITS 16
880
static uint16_t resolve_multiplier_64(uint64_t D, int16_t *shift) {
881
  int msb = 0;
882
  uint16_t mult = 0;
883
  *shift = 0;
884
  if (D != 0) {
885
    msb = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
886
                              : get_msb((unsigned int)D));
887
    if (msb >= MUL_PREC_BITS) {
888
      mult = (uint16_t)ROUND_POWER_OF_TWO_64(D, msb + 1 - MUL_PREC_BITS);
889
      *shift = msb + 1 - MUL_PREC_BITS;
890
    } else {
891
      mult = (uint16_t)D;
892
      *shift = 0;
893
    }
894
  }
895
  return mult;
896
}
897
898
static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) {
899
  int32_t ret;
900
  int16_t mshift;
901
  uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
902
  int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
903
  shift -= mshift;
904
  if (shift > 0) {
905
    return (int32_t)clamp(ROUND_POWER_OF_TWO_SIGNED(v, shift),
906
                          -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
907
                          WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
908
  } else {
909
    return (int32_t)clamp(v * (1 << (-shift)),
910
                          -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
911
                          WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
912
  }
913
  return ret;
914
}
915
916
static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
917
  int16_t mshift;
918
  uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
919
  int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
920
  shift -= mshift;
921
  if (shift > 0) {
922
    return (int32_t)clamp(
923
        ROUND_POWER_OF_TWO_SIGNED(v, shift),
924
        (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
925
        (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
926
  } else {
927
    return (int32_t)clamp(
928
        v * (1 << (-shift)),
929
        (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
930
        (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
931
  }
932
}
933
934
#else
935
936
2.17k
static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) {
937
2.17k
  int64_t v = Px * (int64_t)iDet;
938
2.17k
  return (int32_t)clamp64(ROUND_POWER_OF_TWO_SIGNED_64(v, shift),
939
2.17k
                          -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
940
2.17k
                          WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
941
2.17k
}
942
943
2.17k
static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
944
2.17k
  int64_t v = Px * (int64_t)iDet;
945
2.17k
  return (int32_t)clamp64(
946
2.17k
      ROUND_POWER_OF_TWO_SIGNED_64(v, shift),
947
2.17k
      (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
948
2.17k
      (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
949
2.17k
}
950
#endif  // USE_LIMITED_PREC_MULT
951
952
static int find_affine_int(int np, const int *pts1, const int *pts2,
953
                           BLOCK_SIZE bsize, int mvy, int mvx,
954
1.10k
                           WarpedMotionParams *wm, int mi_row, int mi_col) {
955
1.10k
  int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
956
1.10k
  int32_t Bx[2] = { 0, 0 };
957
1.10k
  int32_t By[2] = { 0, 0 };
958
959
1.10k
  const int bw = block_size_wide[bsize];
960
1.10k
  const int bh = block_size_high[bsize];
961
1.10k
  const int rsuy = bh / 2 - 1;
962
1.10k
  const int rsux = bw / 2 - 1;
963
1.10k
  const int suy = rsuy * 8;
964
1.10k
  const int sux = rsux * 8;
965
1.10k
  const int duy = suy + mvy;
966
1.10k
  const int dux = sux + mvx;
967
968
  // Assume the center pixel of the block has exactly the same motion vector
969
  // as transmitted for the block. First shift the origin of the source
970
  // points to the block center, and the origin of the destination points to
971
  // the block center added to the motion vector transmitted.
972
  // Let (xi, yi) denote the source points and (xi', yi') denote destination
973
  // points after origin shfifting, for i = 0, 1, 2, .... n-1.
974
  // Then if  P = [x0, y0,
975
  //               x1, y1
976
  //               x2, y1,
977
  //                ....
978
  //              ]
979
  //          q = [x0', x1', x2', ... ]'
980
  //          r = [y0', y1', y2', ... ]'
981
  // the least squares problems that need to be solved are:
982
  //          [h1, h2]' = inv(P'P)P'q and
983
  //          [h3, h4]' = inv(P'P)P'r
984
  // where the affine transformation is given by:
985
  //          x' = h1.x + h2.y
986
  //          y' = h3.x + h4.y
987
  //
988
  // The loop below computes: A = P'P, Bx = P'q, By = P'r
989
  // We need to just compute inv(A).Bx and inv(A).By for the solutions.
990
  // Contribution from neighbor block
991
2.45k
  for (int i = 0; i < np; i++) {
992
1.34k
    const int dx = pts2[i * 2] - dux;
993
1.34k
    const int dy = pts2[i * 2 + 1] - duy;
994
1.34k
    const int sx = pts1[i * 2] - sux;
995
1.34k
    const int sy = pts1[i * 2 + 1] - suy;
996
    // (TODO)yunqing: This comparison wouldn't be necessary if the sample
997
    // selection is done in find_samples(). Also, global offset can be removed
998
    // while collecting samples.
999
1.34k
    if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
1000
1.33k
      A[0][0] += LS_SQUARE(sx);
1001
1.33k
      A[0][1] += LS_PRODUCT1(sx, sy);
1002
1.33k
      A[1][1] += LS_SQUARE(sy);
1003
1.33k
      Bx[0] += LS_PRODUCT2(sx, dx);
1004
1.33k
      Bx[1] += LS_PRODUCT1(sy, dx);
1005
1.33k
      By[0] += LS_PRODUCT1(sx, dy);
1006
1.33k
      By[1] += LS_PRODUCT2(sy, dy);
1007
1.33k
    }
1008
1.34k
  }
1009
1010
  // Just for debugging, and can be removed later.
1011
1.10k
  assert(A[0][0] >= LS_MAT_MIN && A[0][0] <= LS_MAT_MAX);
1012
1.10k
  assert(A[0][1] >= LS_MAT_MIN && A[0][1] <= LS_MAT_MAX);
1013
1.10k
  assert(A[1][1] >= LS_MAT_MIN && A[1][1] <= LS_MAT_MAX);
1014
1.10k
  assert(Bx[0] >= LS_MAT_MIN && Bx[0] <= LS_MAT_MAX);
1015
1.10k
  assert(Bx[1] >= LS_MAT_MIN && Bx[1] <= LS_MAT_MAX);
1016
1.10k
  assert(By[0] >= LS_MAT_MIN && By[0] <= LS_MAT_MAX);
1017
1.10k
  assert(By[1] >= LS_MAT_MIN && By[1] <= LS_MAT_MAX);
1018
1019
  // Compute Determinant of A
1020
1.10k
  const int64_t Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
1021
1.10k
  if (Det == 0) return 1;
1022
1023
1.08k
  int16_t shift;
1024
1.08k
  int16_t iDet = resolve_divisor_64(llabs(Det), &shift) * (Det < 0 ? -1 : 1);
1025
1.08k
  shift -= WARPEDMODEL_PREC_BITS;
1026
1.08k
  if (shift < 0) {
1027
0
    iDet <<= (-shift);
1028
0
    shift = 0;
1029
0
  }
1030
1031
1.08k
  int64_t Px[2], Py[2];
1032
  // These divided by the Det, are the least squares solutions
1033
1.08k
  Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
1034
1.08k
  Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
1035
1.08k
  Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
1036
1.08k
  Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
1037
1038
1.08k
  wm->wmmat[2] = get_mult_shift_diag(Px[0], iDet, shift);
1039
1.08k
  wm->wmmat[3] = get_mult_shift_ndiag(Px[1], iDet, shift);
1040
1.08k
  wm->wmmat[4] = get_mult_shift_ndiag(Py[0], iDet, shift);
1041
1.08k
  wm->wmmat[5] = get_mult_shift_diag(Py[1], iDet, shift);
1042
1043
1.08k
  const int isuy = (mi_row * MI_SIZE + rsuy);
1044
1.08k
  const int isux = (mi_col * MI_SIZE + rsux);
1045
  // Note: In the vx, vy expressions below, the max value of each of the
1046
  // 2nd and 3rd terms are (2^16 - 1) * (2^13 - 1). That leaves enough room
1047
  // for the first term so that the overall sum in the worst case fits
1048
  // within 32 bits overall.
1049
1.08k
  const int32_t vx = mvx * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
1050
1.08k
                     (isux * (wm->wmmat[2] - (1 << WARPEDMODEL_PREC_BITS)) +
1051
1.08k
                      isuy * wm->wmmat[3]);
1052
1.08k
  const int32_t vy = mvy * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
1053
1.08k
                     (isux * wm->wmmat[4] +
1054
1.08k
                      isuy * (wm->wmmat[5] - (1 << WARPEDMODEL_PREC_BITS)));
1055
1.08k
  wm->wmmat[0] =
1056
1.08k
      clamp(vx, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1);
1057
1.08k
  wm->wmmat[1] =
1058
1.08k
      clamp(vy, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1);
1059
1.08k
  return 0;
1060
1.10k
}
1061
1062
int av1_find_projection(int np, const int *pts1, const int *pts2,
1063
                        BLOCK_SIZE bsize, int mvy, int mvx,
1064
1.10k
                        WarpedMotionParams *wm_params, int mi_row, int mi_col) {
1065
1.10k
  assert(wm_params->wmtype == AFFINE);
1066
1067
1.10k
  if (find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params, mi_row,
1068
1.10k
                      mi_col))
1069
17
    return 1;
1070
1071
  // check compatibility with the fast warp filter
1072
1.08k
  if (!av1_get_shear_params(wm_params)) return 1;
1073
1074
539
  return 0;
1075
1.08k
}