/src/libvpx/vp9/encoder/vp9_resize.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | #include <stdlib.h> |
16 | | #include <string.h> |
17 | | |
18 | | #include "./vpx_config.h" |
19 | | #if CONFIG_VP9_HIGHBITDEPTH |
20 | | #include "vpx_dsp/vpx_dsp_common.h" |
21 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
22 | | #include "vpx_ports/mem.h" |
23 | | #include "vp9/common/vp9_common.h" |
24 | | #include "vp9/encoder/vp9_resize.h" |
25 | | |
26 | 0 | #define FILTER_BITS 7 |
27 | | |
28 | 0 | #define INTERP_TAPS 8 |
29 | 0 | #define SUBPEL_BITS 5 |
30 | 0 | #define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) |
31 | 0 | #define INTERP_PRECISION_BITS 32 |
32 | | |
33 | | typedef int16_t interp_kernel[INTERP_TAPS]; |
34 | | |
35 | | // Filters for interpolation (0.5-band) - note this also filters integer pels. |
36 | | static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = { |
37 | | { -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, -1, 34, 64, 36, 1, -3, 0 }, |
38 | | { -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 31, 63, 39, 2, -3, 0 }, |
39 | | { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 28, 63, 42, 3, -4, 0 }, |
40 | | { -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 }, |
41 | | { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 }, |
42 | | { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 50, 8, -4, -1 }, |
43 | | { -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 17, 58, 52, 10, -4, 0 }, |
44 | | { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 }, |
45 | | { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 }, |
46 | | { -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 }, |
47 | | { -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 8, 50, 60, 20, -4, -1 }, |
48 | | { 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 }, |
49 | | { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 }, |
50 | | { 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 42, 63, 28, -2, -2 }, |
51 | | { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 39, 63, 31, -2, -2 }, |
52 | | { 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 36, 64, 34, -1, -3 } |
53 | | }; |
54 | | |
55 | | // Filters for interpolation (0.625-band) - note this also filters integer pels. |
56 | | static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = { |
57 | | { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 }, |
58 | | { -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 }, |
59 | | { 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 22, 78, 43, -6, -2, 1 }, |
60 | | { 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 18, 77, 48, -5, -3, 1 }, |
61 | | { 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 75, 52, -3, -4, 1 }, |
62 | | { 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 11, 73, 56, -2, -4, 1 }, |
63 | | { 0, -7, 10, 71, 58, -1, -4, 1 }, { 1, -7, 8, 70, 60, 0, -5, 1 }, |
64 | | { 1, -6, 6, 68, 62, 1, -5, 1 }, { 1, -6, 5, 67, 63, 2, -5, 1 }, |
65 | | { 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -5, 2, 63, 67, 5, -6, 1 }, |
66 | | { 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 0, 60, 70, 8, -7, 1 }, |
67 | | { 1, -4, -1, 58, 71, 10, -7, 0 }, { 1, -4, -2, 56, 73, 11, -7, 0 }, |
68 | | { 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 52, 75, 15, -8, 0 }, |
69 | | { 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 48, 77, 18, -8, 0 }, |
70 | | { 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 43, 78, 22, -8, 0 }, |
71 | | { 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 39, 79, 26, -8, 0 }, |
72 | | { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 }, |
73 | | }; |
74 | | |
75 | | // Filters for interpolation (0.75-band) - note this also filters integer pels. |
76 | | static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = { |
77 | | { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 }, |
78 | | { 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 }, |
79 | | { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 12, 93, 40, -12, 1, 0 }, |
80 | | { 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -7, 7, 91, 46, -12, 1, 0 }, |
81 | | { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 3, 88, 52, -12, 0, 1 }, |
82 | | { 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, -1, 84, 58, -11, 0, 1 }, |
83 | | { 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 }, |
84 | | { 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 75, 70, -8, -2, 1 }, |
85 | | { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 70, 75, -6, -3, 1 }, |
86 | | { 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 64, 80, -4, -4, 2 }, |
87 | | { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 }, |
88 | | { 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 52, 88, 3, -6, 2 }, |
89 | | { 0, 1, -12, 49, 90, 5, -7, 2 }, { 0, 1, -12, 46, 91, 7, -7, 2 }, |
90 | | { 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 40, 93, 12, -8, 2 }, |
91 | | { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 34, 95, 17, -10, 2 }, |
92 | | { 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 } |
93 | | }; |
94 | | |
95 | | // Filters for interpolation (0.875-band) - note this also filters integer pels. |
96 | | static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = { |
97 | | { 3, -8, 13, 112, 13, -8, 3, 0 }, { 3, -7, 10, 112, 17, -9, 3, -1 }, |
98 | | { 2, -6, 7, 111, 21, -9, 3, -1 }, { 2, -5, 4, 111, 24, -10, 3, -1 }, |
99 | | { 2, -4, 1, 110, 28, -11, 3, -1 }, { 1, -3, -1, 108, 32, -12, 4, -1 }, |
100 | | { 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 }, |
101 | | { 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 }, |
102 | | { 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -12, 95, 57, -16, 4, -1 }, |
103 | | { 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 88, 65, -16, 4, -1 }, |
104 | | { 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 81, 73, -16, 3, 0 }, |
105 | | { 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 73, 81, -16, 3, 0 }, |
106 | | { 0, 4, -17, 69, 84, -15, 3, 0 }, { -1, 4, -16, 65, 88, -14, 2, 0 }, |
107 | | { -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 57, 95, -12, 1, 0 }, |
108 | | { -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 48, 100, -9, 0, 1 }, |
109 | | { -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 }, |
110 | | { -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 }, |
111 | | { -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -10, 24, 111, 4, -5, 2 }, |
112 | | { -1, 3, -9, 21, 111, 7, -6, 2 }, { -1, 3, -9, 17, 112, 10, -7, 3 } |
113 | | }; |
114 | | |
115 | | // Filters for interpolation (full-band) - no filtering for integer pixels |
116 | | static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = { |
117 | | { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -3, 128, 3, -1, 0, 0 }, |
118 | | { -1, 2, -6, 127, 7, -2, 1, 0 }, { -1, 3, -9, 126, 12, -4, 1, 0 }, |
119 | | { -1, 4, -12, 125, 16, -5, 1, 0 }, { -1, 4, -14, 123, 20, -6, 2, 0 }, |
120 | | { -1, 5, -15, 120, 25, -8, 2, 0 }, { -1, 5, -17, 118, 30, -9, 3, -1 }, |
121 | | { -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 }, |
122 | | { -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 }, |
123 | | { -1, 6, -21, 99, 57, -16, 5, -1 }, { -1, 6, -21, 94, 63, -17, 5, -1 }, |
124 | | { -1, 6, -20, 89, 68, -18, 5, -1 }, { -1, 6, -20, 84, 73, -19, 6, -1 }, |
125 | | { -1, 6, -20, 79, 79, -20, 6, -1 }, { -1, 6, -19, 73, 84, -20, 6, -1 }, |
126 | | { -1, 5, -18, 68, 89, -20, 6, -1 }, { -1, 5, -17, 63, 94, -21, 6, -1 }, |
127 | | { -1, 5, -16, 57, 99, -21, 6, -1 }, { -1, 4, -14, 52, 103, -21, 6, -1 }, |
128 | | { -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 }, |
129 | | { -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 }, |
130 | | { 0, 2, -8, 25, 120, -15, 5, -1 }, { 0, 2, -6, 20, 123, -14, 4, -1 }, |
131 | | { 0, 1, -5, 16, 125, -12, 4, -1 }, { 0, 1, -4, 12, 126, -9, 3, -1 }, |
132 | | { 0, 1, -2, 7, 127, -6, 2, -1 }, { 0, 0, -1, 3, 128, -3, 1, 0 } |
133 | | }; |
134 | | |
135 | | // Filters for factor of 2 downsampling. |
136 | | static const int16_t vp9_down2_symeven_half_filter[] = { 56, 12, -3, -1 }; |
137 | | static const int16_t vp9_down2_symodd_half_filter[] = { 64, 35, 0, -3 }; |
138 | | |
139 | 0 | static const interp_kernel *choose_interp_filter(int inlength, int outlength) { |
140 | 0 | int outlength16 = outlength * 16; |
141 | 0 | if (outlength16 >= inlength * 16) |
142 | 0 | return filteredinterp_filters1000; |
143 | 0 | else if (outlength16 >= inlength * 13) |
144 | 0 | return filteredinterp_filters875; |
145 | 0 | else if (outlength16 >= inlength * 11) |
146 | 0 | return filteredinterp_filters750; |
147 | 0 | else if (outlength16 >= inlength * 9) |
148 | 0 | return filteredinterp_filters625; |
149 | 0 | else |
150 | 0 | return filteredinterp_filters500; |
151 | 0 | } |
152 | | |
153 | | static void interpolate(const uint8_t *const input, int inlength, |
154 | 0 | uint8_t *output, int outlength) { |
155 | 0 | const int64_t delta = |
156 | 0 | (((uint64_t)inlength << 32) + outlength / 2) / outlength; |
157 | 0 | const int64_t offset = |
158 | 0 | inlength > outlength |
159 | 0 | ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) / |
160 | 0 | outlength |
161 | 0 | : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / |
162 | 0 | outlength; |
163 | 0 | uint8_t *optr = output; |
164 | 0 | int x, x1, x2, sum, k, int_pel, sub_pel; |
165 | 0 | int64_t y; |
166 | |
|
167 | 0 | const interp_kernel *interp_filters = |
168 | 0 | choose_interp_filter(inlength, outlength); |
169 | |
|
170 | 0 | x = 0; |
171 | 0 | y = offset; |
172 | 0 | while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { |
173 | 0 | x++; |
174 | 0 | y += delta; |
175 | 0 | } |
176 | 0 | x1 = x; |
177 | 0 | x = outlength - 1; |
178 | 0 | y = delta * x + offset; |
179 | 0 | while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >= |
180 | 0 | inlength) { |
181 | 0 | x--; |
182 | 0 | y -= delta; |
183 | 0 | } |
184 | 0 | x2 = x; |
185 | 0 | if (x1 > x2) { |
186 | 0 | for (x = 0, y = offset; x < outlength; ++x, y += delta) { |
187 | 0 | const int16_t *filter; |
188 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
189 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
190 | 0 | filter = interp_filters[sub_pel]; |
191 | 0 | sum = 0; |
192 | 0 | for (k = 0; k < INTERP_TAPS; ++k) { |
193 | 0 | const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; |
194 | 0 | sum += filter[k] * |
195 | 0 | input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; |
196 | 0 | } |
197 | 0 | *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
198 | 0 | } |
199 | 0 | } else { |
200 | | // Initial part. |
201 | 0 | for (x = 0, y = offset; x < x1; ++x, y += delta) { |
202 | 0 | const int16_t *filter; |
203 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
204 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
205 | 0 | filter = interp_filters[sub_pel]; |
206 | 0 | sum = 0; |
207 | 0 | for (k = 0; k < INTERP_TAPS; ++k) |
208 | 0 | sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 |
209 | 0 | ? 0 |
210 | 0 | : int_pel - INTERP_TAPS / 2 + 1 + k)]; |
211 | 0 | *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
212 | 0 | } |
213 | | // Middle part. |
214 | 0 | for (; x <= x2; ++x, y += delta) { |
215 | 0 | const int16_t *filter; |
216 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
217 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
218 | 0 | filter = interp_filters[sub_pel]; |
219 | 0 | sum = 0; |
220 | 0 | for (k = 0; k < INTERP_TAPS; ++k) |
221 | 0 | sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; |
222 | 0 | *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
223 | 0 | } |
224 | | // End part. |
225 | 0 | for (; x < outlength; ++x, y += delta) { |
226 | 0 | const int16_t *filter; |
227 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
228 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
229 | 0 | filter = interp_filters[sub_pel]; |
230 | 0 | sum = 0; |
231 | 0 | for (k = 0; k < INTERP_TAPS; ++k) |
232 | 0 | sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength |
233 | 0 | ? inlength - 1 |
234 | 0 | : int_pel - INTERP_TAPS / 2 + 1 + k)]; |
235 | 0 | *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | | static void down2_symeven(const uint8_t *const input, int length, |
241 | 0 | uint8_t *output) { |
242 | | // Actual filter len = 2 * filter_len_half. |
243 | 0 | const int16_t *filter = vp9_down2_symeven_half_filter; |
244 | 0 | const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; |
245 | 0 | int i, j; |
246 | 0 | uint8_t *optr = output; |
247 | 0 | int l1 = filter_len_half; |
248 | 0 | int l2 = (length - filter_len_half); |
249 | 0 | l1 += (l1 & 1); |
250 | 0 | l2 += (l2 & 1); |
251 | 0 | if (l1 > l2) { |
252 | | // Short input length. |
253 | 0 | for (i = 0; i < length; i += 2) { |
254 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
255 | 0 | for (j = 0; j < filter_len_half; ++j) { |
256 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + |
257 | 0 | input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * |
258 | 0 | filter[j]; |
259 | 0 | } |
260 | 0 | sum >>= FILTER_BITS; |
261 | 0 | *optr++ = clip_pixel(sum); |
262 | 0 | } |
263 | 0 | } else { |
264 | | // Initial part. |
265 | 0 | for (i = 0; i < l1; i += 2) { |
266 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
267 | 0 | for (j = 0; j < filter_len_half; ++j) { |
268 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; |
269 | 0 | } |
270 | 0 | sum >>= FILTER_BITS; |
271 | 0 | *optr++ = clip_pixel(sum); |
272 | 0 | } |
273 | | // Middle part. |
274 | 0 | for (; i < l2; i += 2) { |
275 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
276 | 0 | for (j = 0; j < filter_len_half; ++j) { |
277 | 0 | sum += (input[i - j] + input[i + 1 + j]) * filter[j]; |
278 | 0 | } |
279 | 0 | sum >>= FILTER_BITS; |
280 | 0 | *optr++ = clip_pixel(sum); |
281 | 0 | } |
282 | | // End part. |
283 | 0 | for (; i < length; i += 2) { |
284 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
285 | 0 | for (j = 0; j < filter_len_half; ++j) { |
286 | 0 | sum += (input[i - j] + |
287 | 0 | input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * |
288 | 0 | filter[j]; |
289 | 0 | } |
290 | 0 | sum >>= FILTER_BITS; |
291 | 0 | *optr++ = clip_pixel(sum); |
292 | 0 | } |
293 | 0 | } |
294 | 0 | } |
295 | | |
296 | | static void down2_symodd(const uint8_t *const input, int length, |
297 | 0 | uint8_t *output) { |
298 | | // Actual filter len = 2 * filter_len_half - 1. |
299 | 0 | const int16_t *filter = vp9_down2_symodd_half_filter; |
300 | 0 | const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; |
301 | 0 | int i, j; |
302 | 0 | uint8_t *optr = output; |
303 | 0 | int l1 = filter_len_half - 1; |
304 | 0 | int l2 = (length - filter_len_half + 1); |
305 | 0 | l1 += (l1 & 1); |
306 | 0 | l2 += (l2 & 1); |
307 | 0 | if (l1 > l2) { |
308 | | // Short input length. |
309 | 0 | for (i = 0; i < length; i += 2) { |
310 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
311 | 0 | for (j = 1; j < filter_len_half; ++j) { |
312 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + |
313 | 0 | input[(i + j >= length ? length - 1 : i + j)]) * |
314 | 0 | filter[j]; |
315 | 0 | } |
316 | 0 | sum >>= FILTER_BITS; |
317 | 0 | *optr++ = clip_pixel(sum); |
318 | 0 | } |
319 | 0 | } else { |
320 | | // Initial part. |
321 | 0 | for (i = 0; i < l1; i += 2) { |
322 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
323 | 0 | for (j = 1; j < filter_len_half; ++j) { |
324 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; |
325 | 0 | } |
326 | 0 | sum >>= FILTER_BITS; |
327 | 0 | *optr++ = clip_pixel(sum); |
328 | 0 | } |
329 | | // Middle part. |
330 | 0 | for (; i < l2; i += 2) { |
331 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
332 | 0 | for (j = 1; j < filter_len_half; ++j) { |
333 | 0 | sum += (input[i - j] + input[i + j]) * filter[j]; |
334 | 0 | } |
335 | 0 | sum >>= FILTER_BITS; |
336 | 0 | *optr++ = clip_pixel(sum); |
337 | 0 | } |
338 | | // End part. |
339 | 0 | for (; i < length; i += 2) { |
340 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
341 | 0 | for (j = 1; j < filter_len_half; ++j) { |
342 | 0 | sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * |
343 | 0 | filter[j]; |
344 | 0 | } |
345 | 0 | sum >>= FILTER_BITS; |
346 | 0 | *optr++ = clip_pixel(sum); |
347 | 0 | } |
348 | 0 | } |
349 | 0 | } |
350 | | |
351 | 0 | static int get_down2_length(int length, int steps) { |
352 | 0 | int s; |
353 | 0 | for (s = 0; s < steps; ++s) length = (length + 1) >> 1; |
354 | 0 | return length; |
355 | 0 | } |
356 | | |
357 | 0 | static int get_down2_steps(int in_length, int out_length) { |
358 | 0 | int steps = 0; |
359 | 0 | int proj_in_length; |
360 | 0 | while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) { |
361 | 0 | ++steps; |
362 | 0 | in_length = proj_in_length; |
363 | 0 | if (in_length == 1) { |
364 | | // Special case: we break because any further calls to get_down2_length() |
365 | | // with be with length == 1, which return 1, resulting in an infinite |
366 | | // loop. |
367 | 0 | break; |
368 | 0 | } |
369 | 0 | } |
370 | 0 | return steps; |
371 | 0 | } |
372 | | |
373 | | static void resize_multistep(const uint8_t *const input, int length, |
374 | 0 | uint8_t *output, int olength, uint8_t *otmp) { |
375 | 0 | int steps; |
376 | 0 | if (length == olength) { |
377 | 0 | memcpy(output, input, sizeof(output[0]) * length); |
378 | 0 | return; |
379 | 0 | } |
380 | 0 | steps = get_down2_steps(length, olength); |
381 | |
|
382 | 0 | if (steps > 0) { |
383 | 0 | int s; |
384 | 0 | uint8_t *out = NULL; |
385 | 0 | uint8_t *otmp2; |
386 | 0 | int filteredlength = length; |
387 | |
|
388 | 0 | assert(otmp != NULL); |
389 | 0 | otmp2 = otmp + get_down2_length(length, 1); |
390 | 0 | for (s = 0; s < steps; ++s) { |
391 | 0 | const int proj_filteredlength = get_down2_length(filteredlength, 1); |
392 | 0 | const uint8_t *const in = (s == 0 ? input : out); |
393 | 0 | if (s == steps - 1 && proj_filteredlength == olength) |
394 | 0 | out = output; |
395 | 0 | else |
396 | 0 | out = (s & 1 ? otmp2 : otmp); |
397 | 0 | if (filteredlength & 1) |
398 | 0 | down2_symodd(in, filteredlength, out); |
399 | 0 | else |
400 | 0 | down2_symeven(in, filteredlength, out); |
401 | 0 | filteredlength = proj_filteredlength; |
402 | 0 | } |
403 | 0 | if (filteredlength != olength) { |
404 | 0 | interpolate(out, filteredlength, output, olength); |
405 | 0 | } |
406 | 0 | } else { |
407 | 0 | interpolate(input, length, output, olength); |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | 0 | static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) { |
412 | 0 | int i; |
413 | 0 | uint8_t *iptr = img; |
414 | 0 | uint8_t *aptr = arr; |
415 | 0 | for (i = 0; i < len; ++i, iptr += stride) { |
416 | 0 | *aptr++ = *iptr; |
417 | 0 | } |
418 | 0 | } |
419 | | |
420 | 0 | static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) { |
421 | 0 | int i; |
422 | 0 | uint8_t *iptr = img; |
423 | 0 | uint8_t *aptr = arr; |
424 | 0 | for (i = 0; i < len; ++i, iptr += stride) { |
425 | 0 | *iptr = *aptr++; |
426 | 0 | } |
427 | 0 | } |
428 | | |
429 | | void vp9_resize_plane(const uint8_t *const input, int height, int width, |
430 | | int in_stride, uint8_t *output, int height2, int width2, |
431 | 0 | int out_stride) { |
432 | 0 | int i; |
433 | 0 | uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf)); |
434 | 0 | uint8_t *tmpbuf = |
435 | 0 | (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf)); |
436 | 0 | uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf)); |
437 | 0 | uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2)); |
438 | 0 | if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) |
439 | 0 | goto Error; |
440 | 0 | assert(width > 0); |
441 | 0 | assert(height > 0); |
442 | 0 | assert(width2 > 0); |
443 | 0 | assert(height2 > 0); |
444 | 0 | for (i = 0; i < height; ++i) |
445 | 0 | resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2, |
446 | 0 | tmpbuf); |
447 | 0 | for (i = 0; i < width2; ++i) { |
448 | 0 | fill_col_to_arr(intbuf + i, width2, height, arrbuf); |
449 | 0 | resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf); |
450 | 0 | fill_arr_to_col(output + i, out_stride, height2, arrbuf2); |
451 | 0 | } |
452 | |
|
453 | 0 | Error: |
454 | 0 | free(intbuf); |
455 | 0 | free(tmpbuf); |
456 | 0 | free(arrbuf); |
457 | 0 | free(arrbuf2); |
458 | 0 | } |
459 | | |
460 | | #if CONFIG_VP9_HIGHBITDEPTH |
461 | | static void highbd_interpolate(const uint16_t *const input, int inlength, |
462 | 0 | uint16_t *output, int outlength, int bd) { |
463 | 0 | const int64_t delta = |
464 | 0 | (((uint64_t)inlength << 32) + outlength / 2) / outlength; |
465 | 0 | const int64_t offset = |
466 | 0 | inlength > outlength |
467 | 0 | ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) / |
468 | 0 | outlength |
469 | 0 | : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / |
470 | 0 | outlength; |
471 | 0 | uint16_t *optr = output; |
472 | 0 | int x, x1, x2, sum, k, int_pel, sub_pel; |
473 | 0 | int64_t y; |
474 | |
|
475 | 0 | const interp_kernel *interp_filters = |
476 | 0 | choose_interp_filter(inlength, outlength); |
477 | |
|
478 | 0 | x = 0; |
479 | 0 | y = offset; |
480 | 0 | while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { |
481 | 0 | x++; |
482 | 0 | y += delta; |
483 | 0 | } |
484 | 0 | x1 = x; |
485 | 0 | x = outlength - 1; |
486 | 0 | y = delta * x + offset; |
487 | 0 | while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >= |
488 | 0 | inlength) { |
489 | 0 | x--; |
490 | 0 | y -= delta; |
491 | 0 | } |
492 | 0 | x2 = x; |
493 | 0 | if (x1 > x2) { |
494 | 0 | for (x = 0, y = offset; x < outlength; ++x, y += delta) { |
495 | 0 | const int16_t *filter; |
496 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
497 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
498 | 0 | filter = interp_filters[sub_pel]; |
499 | 0 | sum = 0; |
500 | 0 | for (k = 0; k < INTERP_TAPS; ++k) { |
501 | 0 | const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; |
502 | 0 | sum += filter[k] * |
503 | 0 | input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; |
504 | 0 | } |
505 | 0 | *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
506 | 0 | } |
507 | 0 | } else { |
508 | | // Initial part. |
509 | 0 | for (x = 0, y = offset; x < x1; ++x, y += delta) { |
510 | 0 | const int16_t *filter; |
511 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
512 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
513 | 0 | filter = interp_filters[sub_pel]; |
514 | 0 | sum = 0; |
515 | 0 | for (k = 0; k < INTERP_TAPS; ++k) { |
516 | 0 | assert(int_pel - INTERP_TAPS / 2 + 1 + k < inlength); |
517 | 0 | sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 |
518 | 0 | ? 0 |
519 | 0 | : int_pel - INTERP_TAPS / 2 + 1 + k)]; |
520 | 0 | } |
521 | 0 | *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
522 | 0 | } |
523 | | // Middle part. |
524 | 0 | for (; x <= x2; ++x, y += delta) { |
525 | 0 | const int16_t *filter; |
526 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
527 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
528 | 0 | filter = interp_filters[sub_pel]; |
529 | 0 | sum = 0; |
530 | 0 | for (k = 0; k < INTERP_TAPS; ++k) |
531 | 0 | sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; |
532 | 0 | *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
533 | 0 | } |
534 | | // End part. |
535 | 0 | for (; x < outlength; ++x, y += delta) { |
536 | 0 | const int16_t *filter; |
537 | 0 | int_pel = y >> INTERP_PRECISION_BITS; |
538 | 0 | sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; |
539 | 0 | filter = interp_filters[sub_pel]; |
540 | 0 | sum = 0; |
541 | 0 | for (k = 0; k < INTERP_TAPS; ++k) |
542 | 0 | sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength |
543 | 0 | ? inlength - 1 |
544 | 0 | : int_pel - INTERP_TAPS / 2 + 1 + k)]; |
545 | 0 | *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
546 | 0 | } |
547 | 0 | } |
548 | 0 | } |
549 | | |
550 | | static void highbd_down2_symeven(const uint16_t *const input, int length, |
551 | 0 | uint16_t *output, int bd) { |
552 | | // Actual filter len = 2 * filter_len_half. |
553 | 0 | static const int16_t *filter = vp9_down2_symeven_half_filter; |
554 | 0 | const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; |
555 | 0 | int i, j; |
556 | 0 | uint16_t *optr = output; |
557 | 0 | int l1 = filter_len_half; |
558 | 0 | int l2 = (length - filter_len_half); |
559 | 0 | l1 += (l1 & 1); |
560 | 0 | l2 += (l2 & 1); |
561 | 0 | if (l1 > l2) { |
562 | | // Short input length. |
563 | 0 | for (i = 0; i < length; i += 2) { |
564 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
565 | 0 | for (j = 0; j < filter_len_half; ++j) { |
566 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + |
567 | 0 | input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * |
568 | 0 | filter[j]; |
569 | 0 | } |
570 | 0 | sum >>= FILTER_BITS; |
571 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
572 | 0 | } |
573 | 0 | } else { |
574 | | // Initial part. |
575 | 0 | for (i = 0; i < l1; i += 2) { |
576 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
577 | 0 | for (j = 0; j < filter_len_half; ++j) { |
578 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; |
579 | 0 | } |
580 | 0 | sum >>= FILTER_BITS; |
581 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
582 | 0 | } |
583 | | // Middle part. |
584 | 0 | for (; i < l2; i += 2) { |
585 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
586 | 0 | for (j = 0; j < filter_len_half; ++j) { |
587 | 0 | sum += (input[i - j] + input[i + 1 + j]) * filter[j]; |
588 | 0 | } |
589 | 0 | sum >>= FILTER_BITS; |
590 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
591 | 0 | } |
592 | | // End part. |
593 | 0 | for (; i < length; i += 2) { |
594 | 0 | int sum = (1 << (FILTER_BITS - 1)); |
595 | 0 | for (j = 0; j < filter_len_half; ++j) { |
596 | 0 | sum += (input[i - j] + |
597 | 0 | input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * |
598 | 0 | filter[j]; |
599 | 0 | } |
600 | 0 | sum >>= FILTER_BITS; |
601 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | | |
606 | | static void highbd_down2_symodd(const uint16_t *const input, int length, |
607 | 0 | uint16_t *output, int bd) { |
608 | | // Actual filter len = 2 * filter_len_half - 1. |
609 | 0 | static const int16_t *filter = vp9_down2_symodd_half_filter; |
610 | 0 | const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; |
611 | 0 | int i, j; |
612 | 0 | uint16_t *optr = output; |
613 | 0 | int l1 = filter_len_half - 1; |
614 | 0 | int l2 = (length - filter_len_half + 1); |
615 | 0 | l1 += (l1 & 1); |
616 | 0 | l2 += (l2 & 1); |
617 | 0 | if (l1 > l2) { |
618 | | // Short input length. |
619 | 0 | for (i = 0; i < length; i += 2) { |
620 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
621 | 0 | for (j = 1; j < filter_len_half; ++j) { |
622 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + |
623 | 0 | input[(i + j >= length ? length - 1 : i + j)]) * |
624 | 0 | filter[j]; |
625 | 0 | } |
626 | 0 | sum >>= FILTER_BITS; |
627 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
628 | 0 | } |
629 | 0 | } else { |
630 | | // Initial part. |
631 | 0 | for (i = 0; i < l1; i += 2) { |
632 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
633 | 0 | for (j = 1; j < filter_len_half; ++j) { |
634 | 0 | sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; |
635 | 0 | } |
636 | 0 | sum >>= FILTER_BITS; |
637 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
638 | 0 | } |
639 | | // Middle part. |
640 | 0 | for (; i < l2; i += 2) { |
641 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
642 | 0 | for (j = 1; j < filter_len_half; ++j) { |
643 | 0 | sum += (input[i - j] + input[i + j]) * filter[j]; |
644 | 0 | } |
645 | 0 | sum >>= FILTER_BITS; |
646 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
647 | 0 | } |
648 | | // End part. |
649 | 0 | for (; i < length; i += 2) { |
650 | 0 | int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; |
651 | 0 | for (j = 1; j < filter_len_half; ++j) { |
652 | 0 | sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * |
653 | 0 | filter[j]; |
654 | 0 | } |
655 | 0 | sum >>= FILTER_BITS; |
656 | 0 | *optr++ = clip_pixel_highbd(sum, bd); |
657 | 0 | } |
658 | 0 | } |
659 | 0 | } |
660 | | |
661 | | static void highbd_resize_multistep(const uint16_t *const input, int length, |
662 | | uint16_t *output, int olength, |
663 | 0 | uint16_t *otmp, int bd) { |
664 | 0 | int steps; |
665 | 0 | if (length == olength) { |
666 | 0 | memcpy(output, input, sizeof(output[0]) * length); |
667 | 0 | return; |
668 | 0 | } |
669 | 0 | steps = get_down2_steps(length, olength); |
670 | |
|
671 | 0 | if (steps > 0) { |
672 | 0 | int s; |
673 | 0 | uint16_t *out = NULL; |
674 | 0 | uint16_t *otmp2; |
675 | 0 | int filteredlength = length; |
676 | |
|
677 | 0 | assert(otmp != NULL); |
678 | 0 | otmp2 = otmp + get_down2_length(length, 1); |
679 | 0 | for (s = 0; s < steps; ++s) { |
680 | 0 | const int proj_filteredlength = get_down2_length(filteredlength, 1); |
681 | 0 | const uint16_t *const in = (s == 0 ? input : out); |
682 | 0 | if (s == steps - 1 && proj_filteredlength == olength) |
683 | 0 | out = output; |
684 | 0 | else |
685 | 0 | out = (s & 1 ? otmp2 : otmp); |
686 | 0 | if (filteredlength & 1) |
687 | 0 | highbd_down2_symodd(in, filteredlength, out, bd); |
688 | 0 | else |
689 | 0 | highbd_down2_symeven(in, filteredlength, out, bd); |
690 | 0 | filteredlength = proj_filteredlength; |
691 | 0 | } |
692 | 0 | if (filteredlength != olength) { |
693 | 0 | highbd_interpolate(out, filteredlength, output, olength, bd); |
694 | 0 | } |
695 | 0 | } else { |
696 | 0 | highbd_interpolate(input, length, output, olength, bd); |
697 | 0 | } |
698 | 0 | } |
699 | | |
700 | | static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len, |
701 | 0 | uint16_t *arr) { |
702 | 0 | int i; |
703 | 0 | uint16_t *iptr = img; |
704 | 0 | uint16_t *aptr = arr; |
705 | 0 | for (i = 0; i < len; ++i, iptr += stride) { |
706 | 0 | *aptr++ = *iptr; |
707 | 0 | } |
708 | 0 | } |
709 | | |
710 | | static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len, |
711 | 0 | uint16_t *arr) { |
712 | 0 | int i; |
713 | 0 | uint16_t *iptr = img; |
714 | 0 | uint16_t *aptr = arr; |
715 | 0 | for (i = 0; i < len; ++i, iptr += stride) { |
716 | 0 | *iptr = *aptr++; |
717 | 0 | } |
718 | 0 | } |
719 | | |
720 | | void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width, |
721 | | int in_stride, uint8_t *output, int height2, |
722 | 0 | int width2, int out_stride, int bd) { |
723 | 0 | int i; |
724 | 0 | uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height); |
725 | 0 | uint16_t *tmpbuf = |
726 | 0 | (uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width)); |
727 | 0 | uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height); |
728 | 0 | uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2); |
729 | 0 | if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) |
730 | 0 | goto Error; |
731 | 0 | assert(width > 0); |
732 | 0 | assert(height > 0); |
733 | 0 | assert(width2 > 0); |
734 | 0 | assert(height2 > 0); |
735 | 0 | for (i = 0; i < height; ++i) { |
736 | 0 | highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, |
737 | 0 | intbuf + width2 * i, width2, tmpbuf, bd); |
738 | 0 | } |
739 | 0 | for (i = 0; i < width2; ++i) { |
740 | 0 | highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); |
741 | 0 | highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd); |
742 | 0 | highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, |
743 | 0 | arrbuf2); |
744 | 0 | } |
745 | |
|
746 | 0 | Error: |
747 | 0 | free(intbuf); |
748 | 0 | free(tmpbuf); |
749 | 0 | free(arrbuf); |
750 | 0 | free(arrbuf2); |
751 | 0 | } |
752 | | #endif // CONFIG_VP9_HIGHBITDEPTH |