/src/dav1d/src/cdef_apply_tmpl.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2018, VideoLAN and dav1d authors |
3 | | * Copyright © 2018, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <string.h> |
31 | | |
32 | | #include "common/intops.h" |
33 | | |
34 | | #include "src/cdef_apply.h" |
35 | | |
36 | | enum Backup2x8Flags { |
37 | | BACKUP_2X8_Y = 1 << 0, |
38 | | BACKUP_2X8_UV = 1 << 1, |
39 | | }; |
40 | | |
41 | | static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3], |
42 | | const ptrdiff_t stride[2], |
43 | | const enum Dav1dPixelLayout layout) |
44 | 6.28M | { |
45 | 6.28M | const ptrdiff_t y_stride = PXSTRIDE(stride[0]); |
46 | 6.28M | if (y_stride < 0) |
47 | 0 | pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride); |
48 | 6.28M | else |
49 | 6.28M | pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride); |
50 | | |
51 | 6.28M | if (layout != DAV1D_PIXEL_LAYOUT_I400) { |
52 | 1.59M | const ptrdiff_t uv_stride = PXSTRIDE(stride[1]); |
53 | 1.59M | if (uv_stride < 0) { |
54 | 0 | const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7; |
55 | 0 | pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride); |
56 | 0 | pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride); |
57 | 1.59M | } else { |
58 | 1.59M | const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6; |
59 | 1.59M | pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride); |
60 | 1.59M | pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride); |
61 | 1.59M | } |
62 | 1.59M | } |
63 | 6.28M | } |
64 | | |
65 | | static void backup2x8(pixel dst[3][8][2], |
66 | | /*const*/ pixel *const src[3], |
67 | | const ptrdiff_t src_stride[2], int x_off, |
68 | | const enum Dav1dPixelLayout layout, |
69 | | const enum Backup2x8Flags flag) |
70 | 7.54M | { |
71 | 7.54M | ptrdiff_t y_off = 0; |
72 | 7.54M | if (flag & BACKUP_2X8_Y) { |
73 | 62.7M | for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0])) |
74 | 55.6M | pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2); |
75 | 7.09M | } |
76 | | |
77 | 7.54M | if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV)) |
78 | 1.39M | return; |
79 | | |
80 | 6.14M | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; |
81 | 6.14M | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; |
82 | | |
83 | 6.14M | x_off >>= ss_hor; |
84 | 6.14M | y_off = 0; |
85 | 31.5M | for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) { |
86 | 25.4M | pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2); |
87 | 25.4M | pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2); |
88 | 25.4M | } |
89 | 6.14M | } |
90 | | |
91 | 6.10M | static int adjust_strength(const int strength, const unsigned var) { |
92 | 6.10M | if (!var) return 0; |
93 | 4.51M | const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0; |
94 | 4.51M | return (strength * (4 + i) + 8) >> 4; |
95 | 6.10M | } |
96 | | |
97 | | void bytefn(dav1d_cdef_brow)(Dav1dTaskContext *const tc, |
98 | | pixel *const p[3], |
99 | | const Av1Filter *const lflvl, |
100 | | const int by_start, const int by_end, |
101 | | const int sbrow_start, const int sby) |
102 | 987k | { |
103 | 987k | Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f; |
104 | 987k | const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8; |
105 | 987k | const Dav1dDSPContext *const dsp = f->dsp; |
106 | 987k | enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0); |
107 | 987k | pixel *ptrs[3] = { p[0], p[1], p[2] }; |
108 | 987k | const int sbsz = 16; |
109 | 987k | const int sb64w = f->sb128w << 1; |
110 | 987k | const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8; |
111 | 987k | const enum Dav1dPixelLayout layout = f->cur.p.layout; |
112 | 987k | const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; |
113 | 987k | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; |
114 | 987k | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; |
115 | 987k | static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, |
116 | 987k | { 7, 0, 2, 4, 5, 6, 6, 6 } }; |
117 | 987k | const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422]; |
118 | 987k | const int have_tt = f->c->n_tc > 1; |
119 | 987k | const int sb128 = f->seq_hdr->sb128; |
120 | 987k | const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; |
121 | 987k | const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]); |
122 | 987k | const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]); |
123 | | |
124 | 7.70M | for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) { |
125 | 6.73M | const int tf = tc->top_pre_cdef_toggle; |
126 | 6.73M | const int by_idx = (by & 30) >> 1; |
127 | 6.73M | if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM; |
128 | | |
129 | 6.73M | if ((!have_tt || sbrow_start || by + 2 < by_end) && |
130 | 6.29M | edges & CDEF_HAVE_BOTTOM) |
131 | 6.29M | { |
132 | | // backup pre-filter data for next iteration |
133 | 6.29M | pixel *const cdef_top_bak[3] = { |
134 | 6.29M | f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride, |
135 | 6.29M | f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride, |
136 | 6.29M | f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride |
137 | 6.29M | }; |
138 | 6.29M | backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout); |
139 | 6.29M | } |
140 | | |
141 | 6.73M | ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]); |
142 | 6.73M | pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] }; |
143 | 6.73M | edges &= ~CDEF_HAVE_LEFT; |
144 | 6.73M | edges |= CDEF_HAVE_RIGHT; |
145 | 6.73M | enum Backup2x8Flags prev_flag = 0; |
146 | 22.2M | for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) { |
147 | 15.5M | const int sb128x = sbx >> 1; |
148 | 15.5M | const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); |
149 | 15.5M | const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx]; |
150 | 15.5M | if (cdef_idx == -1 || |
151 | 2.76M | (!f->frame_hdr->cdef.y_strength[cdef_idx] && |
152 | 1.46M | !f->frame_hdr->cdef.uv_strength[cdef_idx])) |
153 | 14.1M | { |
154 | 14.1M | prev_flag = 0; |
155 | 14.1M | goto next_sb; |
156 | 14.1M | } |
157 | | |
158 | | // Create a complete 32-bit mask for the sb row ahead of time. |
159 | 1.38M | const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx]; |
160 | 1.38M | const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 | |
161 | 1.38M | noskip_row[0][0]; |
162 | | |
163 | 1.38M | const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx]; |
164 | 1.38M | const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx]; |
165 | 1.38M | const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1); |
166 | | |
167 | 1.38M | const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8; |
168 | 1.38M | int y_sec_lvl = y_lvl & 3; |
169 | 1.38M | y_sec_lvl += y_sec_lvl == 3; |
170 | 1.38M | y_sec_lvl <<= bitdepth_min_8; |
171 | | |
172 | 1.38M | const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8; |
173 | 1.38M | int uv_sec_lvl = uv_lvl & 3; |
174 | 1.38M | uv_sec_lvl += uv_sec_lvl == 3; |
175 | 1.38M | uv_sec_lvl <<= bitdepth_min_8; |
176 | | |
177 | 1.38M | pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] }; |
178 | 10.6M | for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw); |
179 | 9.26M | bx += 2, edges |= CDEF_HAVE_LEFT) |
180 | 9.19M | { |
181 | 9.19M | if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT; |
182 | | |
183 | | // check if this 8x8 block had any coded coefficients; if not, |
184 | | // go to the next block |
185 | 9.19M | const uint32_t bx_mask = 3U << (bx & 30); |
186 | 9.19M | if (!(noskip_mask & bx_mask)) { |
187 | 1.68M | prev_flag = 0; |
188 | 1.68M | goto next_b; |
189 | 1.68M | } |
190 | 7.50M | const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag; |
191 | 7.50M | prev_flag = flag; |
192 | 7.50M | if (do_left && edges & CDEF_HAVE_LEFT) { |
193 | | // we didn't backup the prefilter data because it wasn't |
194 | | // there, so do it here instead |
195 | 271k | backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left); |
196 | 271k | } |
197 | 7.50M | if (edges & CDEF_HAVE_RIGHT) { |
198 | | // backup pre-filter data for next iteration |
199 | 7.30M | backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag); |
200 | 7.30M | } |
201 | | |
202 | 7.50M | int dir; |
203 | 7.50M | unsigned variance; |
204 | 7.50M | if (y_pri_lvl || uv_pri_lvl) |
205 | 6.76M | dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0], |
206 | 6.76M | &variance HIGHBD_CALL_SUFFIX); |
207 | | |
208 | 7.50M | const pixel *top, *bot; |
209 | 7.50M | ptrdiff_t offset; |
210 | | |
211 | 7.50M | if (!have_tt) goto st_y; |
212 | 6.60M | if (sbrow_start && by == by_start) { |
213 | 430k | if (resize) { |
214 | 38.1k | offset = (sby - 1) * 4 * y_stride + bx * 4; |
215 | 38.1k | top = &f->lf.cdef_lpf_line[0][offset]; |
216 | 391k | } else { |
217 | 391k | offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4; |
218 | 391k | top = &f->lf.lr_lpf_line[0][offset]; |
219 | 391k | } |
220 | 430k | bot = bptrs[0] + 8 * y_stride; |
221 | 6.17M | } else if (!sbrow_start && by + 2 >= by_end) { |
222 | 527k | top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4]; |
223 | 527k | if (resize) { |
224 | 58.9k | offset = (sby * 4 + 2) * y_stride + bx * 4; |
225 | 58.9k | bot = &f->lf.cdef_lpf_line[0][offset]; |
226 | 468k | } else { |
227 | 468k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; |
228 | 468k | offset = line * y_stride + bx * 4; |
229 | 468k | bot = &f->lf.lr_lpf_line[0][offset]; |
230 | 468k | } |
231 | 5.65M | } else { |
232 | 6.45M | st_y:; |
233 | 6.45M | offset = sby * 4 * y_stride; |
234 | 6.45M | top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4]; |
235 | 6.45M | bot = bptrs[0] + 8 * y_stride; |
236 | 6.45M | } |
237 | 7.41M | if (y_pri_lvl) { |
238 | 6.10M | const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); |
239 | 6.10M | if (adj_y_pri_lvl || y_sec_lvl) |
240 | 5.07M | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], |
241 | 5.07M | top, bot, adj_y_pri_lvl, y_sec_lvl, |
242 | 5.07M | dir, damping, edges HIGHBD_CALL_SUFFIX); |
243 | 6.10M | } else if (y_sec_lvl) |
244 | 741k | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], |
245 | 741k | top, bot, 0, y_sec_lvl, 0, damping, |
246 | 741k | edges HIGHBD_CALL_SUFFIX); |
247 | | |
248 | 7.41M | if (!uv_lvl) goto skip_uv; |
249 | 7.41M | assert(layout != DAV1D_PIXEL_LAYOUT_I400); |
250 | | |
251 | 5.95M | const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0; |
252 | 17.9M | for (int pl = 1; pl <= 2; pl++) { |
253 | 11.8M | if (!have_tt) goto st_uv; |
254 | 10.6M | if (sbrow_start && by == by_start) { |
255 | 656k | if (resize) { |
256 | 46.8k | offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor); |
257 | 46.8k | top = &f->lf.cdef_lpf_line[pl][offset]; |
258 | 609k | } else { |
259 | 609k | const int line = sby * (4 << sb128) - 4; |
260 | 609k | offset = line * uv_stride + (bx * 4 >> ss_hor); |
261 | 609k | top = &f->lf.lr_lpf_line[pl][offset]; |
262 | 609k | } |
263 | 656k | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; |
264 | 10.0M | } else if (!sbrow_start && by + 2 >= by_end) { |
265 | 764k | const ptrdiff_t top_offset = sby * 8 * uv_stride + |
266 | 764k | (bx * 4 >> ss_hor); |
267 | 764k | top = &f->lf.cdef_line[tf][pl][top_offset]; |
268 | 764k | if (resize) { |
269 | 56.5k | offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor); |
270 | 56.5k | bot = &f->lf.cdef_lpf_line[pl][offset]; |
271 | 708k | } else { |
272 | 708k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; |
273 | 708k | offset = line * uv_stride + (bx * 4 >> ss_hor); |
274 | 708k | bot = &f->lf.lr_lpf_line[pl][offset]; |
275 | 708k | } |
276 | 9.26M | } else { |
277 | 10.5M | st_uv:; |
278 | 10.5M | const ptrdiff_t offset = sby * 8 * uv_stride; |
279 | 10.5M | top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)]; |
280 | 10.5M | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; |
281 | 10.5M | } |
282 | 11.9M | dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], |
283 | 11.9M | lr_bak[bit][pl], top, bot, |
284 | 11.9M | uv_pri_lvl, uv_sec_lvl, uvdir, |
285 | 11.9M | damping - 1, edges HIGHBD_CALL_SUFFIX); |
286 | 11.9M | } |
287 | | |
288 | 7.61M | skip_uv: |
289 | 7.61M | bit ^= 1; |
290 | | |
291 | 9.26M | next_b: |
292 | 9.26M | bptrs[0] += 8; |
293 | 9.26M | bptrs[1] += 8 >> ss_hor; |
294 | 9.26M | bptrs[2] += 8 >> ss_hor; |
295 | 9.26M | } |
296 | | |
297 | 15.4M | next_sb: |
298 | 15.4M | iptrs[0] += sbsz * 4; |
299 | 15.4M | iptrs[1] += sbsz * 4 >> ss_hor; |
300 | 15.4M | iptrs[2] += sbsz * 4 >> ss_hor; |
301 | 15.4M | } |
302 | | |
303 | 6.72M | ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]); |
304 | 6.72M | ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; |
305 | 6.72M | ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; |
306 | 6.72M | tc->top_pre_cdef_toggle ^= 1; |
307 | 6.72M | } |
308 | 987k | } Line | Count | Source | 102 | 219k | { | 103 | 219k | Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f; | 104 | 219k | const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8; | 105 | 219k | const Dav1dDSPContext *const dsp = f->dsp; | 106 | 219k | enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0); | 107 | 219k | pixel *ptrs[3] = { p[0], p[1], p[2] }; | 108 | 219k | const int sbsz = 16; | 109 | 219k | const int sb64w = f->sb128w << 1; | 110 | 219k | const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8; | 111 | 219k | const enum Dav1dPixelLayout layout = f->cur.p.layout; | 112 | 219k | const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; | 113 | 219k | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; | 114 | 219k | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; | 115 | 219k | static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, | 116 | 219k | { 7, 0, 2, 4, 5, 6, 6, 6 } }; | 117 | 219k | const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422]; | 118 | 219k | const int have_tt = f->c->n_tc > 1; | 119 | 219k | const int sb128 = f->seq_hdr->sb128; | 120 | 219k | const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; | 121 | 219k | const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]); | 122 | 219k | const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]); | 123 | | | 124 | 1.29M | for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) { | 125 | 1.07M | const int tf = tc->top_pre_cdef_toggle; | 126 | 1.07M | const int by_idx = (by & 30) >> 1; | 127 | 1.07M | if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM; | 128 | | | 129 | 1.07M | if ((!have_tt || sbrow_start || by + 2 < by_end) && | 130 | 987k | edges & CDEF_HAVE_BOTTOM) | 131 | 984k | { | 132 | | // backup pre-filter data for next iteration | 133 | 984k | pixel *const cdef_top_bak[3] = { | 134 | 984k | f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride, | 135 | 984k | f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride, | 136 | 984k | f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride | 137 | 984k | }; | 138 | 984k | backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout); | 139 | 984k | } | 140 | | | 141 | 1.07M | ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]); | 142 | 1.07M | pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] }; | 143 | 1.07M | edges &= ~CDEF_HAVE_LEFT; | 144 | 1.07M | edges |= CDEF_HAVE_RIGHT; | 145 | 1.07M | enum Backup2x8Flags prev_flag = 0; | 146 | 4.49M | for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) { | 147 | 3.41M | const int sb128x = sbx >> 1; | 148 | 3.41M | const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); | 149 | 3.41M | const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx]; | 150 | 3.41M | if (cdef_idx == -1 || | 151 | 1.09M | (!f->frame_hdr->cdef.y_strength[cdef_idx] && | 152 | 354k | !f->frame_hdr->cdef.uv_strength[cdef_idx])) | 153 | 2.62M | { | 154 | 2.62M | prev_flag = 0; | 155 | 2.62M | goto next_sb; | 156 | 2.62M | } | 157 | | | 158 | | // Create a complete 32-bit mask for the sb row ahead of time. | 159 | 794k | const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx]; | 160 | 794k | const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 | | 161 | 794k | noskip_row[0][0]; | 162 | | | 163 | 794k | const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx]; | 164 | 794k | const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx]; | 165 | 794k | const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1); | 166 | | | 167 | 794k | const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8; | 168 | 794k | int y_sec_lvl = y_lvl & 3; | 169 | 794k | y_sec_lvl += y_sec_lvl == 3; | 170 | 794k | y_sec_lvl <<= bitdepth_min_8; | 171 | | | 172 | 794k | const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8; | 173 | 794k | int uv_sec_lvl = uv_lvl & 3; | 174 | 794k | uv_sec_lvl += uv_sec_lvl == 3; | 175 | 794k | uv_sec_lvl <<= bitdepth_min_8; | 176 | | | 177 | 794k | pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] }; | 178 | 6.71M | for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw); | 179 | 5.92M | bx += 2, edges |= CDEF_HAVE_LEFT) | 180 | 5.86M | { | 181 | 5.86M | if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT; | 182 | | | 183 | | // check if this 8x8 block had any coded coefficients; if not, | 184 | | // go to the next block | 185 | 5.86M | const uint32_t bx_mask = 3U << (bx & 30); | 186 | 5.86M | if (!(noskip_mask & bx_mask)) { | 187 | 1.28M | prev_flag = 0; | 188 | 1.28M | goto next_b; | 189 | 1.28M | } | 190 | 4.57M | const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag; | 191 | 4.57M | prev_flag = flag; | 192 | 4.57M | if (do_left && edges & CDEF_HAVE_LEFT) { | 193 | | // we didn't backup the prefilter data because it wasn't | 194 | | // there, so do it here instead | 195 | 216k | backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left); | 196 | 216k | } | 197 | 4.57M | if (edges & CDEF_HAVE_RIGHT) { | 198 | | // backup pre-filter data for next iteration | 199 | 4.52M | backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag); | 200 | 4.52M | } | 201 | | | 202 | 4.57M | int dir; | 203 | 4.57M | unsigned variance; | 204 | 4.57M | if (y_pri_lvl || uv_pri_lvl) | 205 | 4.17M | dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0], | 206 | 4.17M | &variance HIGHBD_CALL_SUFFIX); | 207 | | | 208 | 4.57M | const pixel *top, *bot; | 209 | 4.57M | ptrdiff_t offset; | 210 | | | 211 | 4.57M | if (!have_tt) goto st_y; | 212 | 3.94M | if (sbrow_start && by == by_start) { | 213 | 256k | if (resize) { | 214 | 7.71k | offset = (sby - 1) * 4 * y_stride + bx * 4; | 215 | 7.71k | top = &f->lf.cdef_lpf_line[0][offset]; | 216 | 249k | } else { | 217 | 249k | offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4; | 218 | 249k | top = &f->lf.lr_lpf_line[0][offset]; | 219 | 249k | } | 220 | 256k | bot = bptrs[0] + 8 * y_stride; | 221 | 3.68M | } else if (!sbrow_start && by + 2 >= by_end) { | 222 | 299k | top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4]; | 223 | 299k | if (resize) { | 224 | 9.15k | offset = (sby * 4 + 2) * y_stride + bx * 4; | 225 | 9.15k | bot = &f->lf.cdef_lpf_line[0][offset]; | 226 | 290k | } else { | 227 | 290k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; | 228 | 290k | offset = line * y_stride + bx * 4; | 229 | 290k | bot = &f->lf.lr_lpf_line[0][offset]; | 230 | 290k | } | 231 | 3.38M | } else { | 232 | 4.02M | st_y:; | 233 | 4.02M | offset = sby * 4 * y_stride; | 234 | 4.02M | top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4]; | 235 | 4.02M | bot = bptrs[0] + 8 * y_stride; | 236 | 4.02M | } | 237 | 4.58M | if (y_pri_lvl) { | 238 | 3.68M | const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); | 239 | 3.68M | if (adj_y_pri_lvl || y_sec_lvl) | 240 | 3.20M | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], | 241 | 3.20M | top, bot, adj_y_pri_lvl, y_sec_lvl, | 242 | 3.20M | dir, damping, edges HIGHBD_CALL_SUFFIX); | 243 | 3.68M | } else if (y_sec_lvl) | 244 | 595k | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], | 245 | 595k | top, bot, 0, y_sec_lvl, 0, damping, | 246 | 595k | edges HIGHBD_CALL_SUFFIX); | 247 | | | 248 | 4.58M | if (!uv_lvl) goto skip_uv; | 249 | 4.58M | assert(layout != DAV1D_PIXEL_LAYOUT_I400); | 250 | | | 251 | 4.08M | const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0; | 252 | 12.2M | for (int pl = 1; pl <= 2; pl++) { | 253 | 8.16M | if (!have_tt) goto st_uv; | 254 | 7.16M | if (sbrow_start && by == by_start) { | 255 | 442k | if (resize) { | 256 | 14.6k | offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor); | 257 | 14.6k | top = &f->lf.cdef_lpf_line[pl][offset]; | 258 | 427k | } else { | 259 | 427k | const int line = sby * (4 << sb128) - 4; | 260 | 427k | offset = line * uv_stride + (bx * 4 >> ss_hor); | 261 | 427k | top = &f->lf.lr_lpf_line[pl][offset]; | 262 | 427k | } | 263 | 442k | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; | 264 | 6.71M | } else if (!sbrow_start && by + 2 >= by_end) { | 265 | 510k | const ptrdiff_t top_offset = sby * 8 * uv_stride + | 266 | 510k | (bx * 4 >> ss_hor); | 267 | 510k | top = &f->lf.cdef_line[tf][pl][top_offset]; | 268 | 510k | if (resize) { | 269 | 16.5k | offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor); | 270 | 16.5k | bot = &f->lf.cdef_lpf_line[pl][offset]; | 271 | 493k | } else { | 272 | 493k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; | 273 | 493k | offset = line * uv_stride + (bx * 4 >> ss_hor); | 274 | 493k | bot = &f->lf.lr_lpf_line[pl][offset]; | 275 | 493k | } | 276 | 6.20M | } else { | 277 | 7.22M | st_uv:; | 278 | 7.22M | const ptrdiff_t offset = sby * 8 * uv_stride; | 279 | 7.22M | top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)]; | 280 | 7.22M | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; | 281 | 7.22M | } | 282 | 8.18M | dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], | 283 | 8.18M | lr_bak[bit][pl], top, bot, | 284 | 8.18M | uv_pri_lvl, uv_sec_lvl, uvdir, | 285 | 8.18M | damping - 1, edges HIGHBD_CALL_SUFFIX); | 286 | 8.18M | } | 287 | | | 288 | 4.65M | skip_uv: | 289 | 4.65M | bit ^= 1; | 290 | | | 291 | 5.92M | next_b: | 292 | 5.92M | bptrs[0] += 8; | 293 | 5.92M | bptrs[1] += 8 >> ss_hor; | 294 | 5.92M | bptrs[2] += 8 >> ss_hor; | 295 | 5.92M | } | 296 | | | 297 | 3.41M | next_sb: | 298 | 3.41M | iptrs[0] += sbsz * 4; | 299 | 3.41M | iptrs[1] += sbsz * 4 >> ss_hor; | 300 | 3.41M | iptrs[2] += sbsz * 4 >> ss_hor; | 301 | 3.41M | } | 302 | | | 303 | 1.07M | ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]); | 304 | 1.07M | ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; | 305 | 1.07M | ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; | 306 | 1.07M | tc->top_pre_cdef_toggle ^= 1; | 307 | 1.07M | } | 308 | 219k | } |
Line | Count | Source | 102 | 768k | { | 103 | 768k | Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f; | 104 | 768k | const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8; | 105 | 768k | const Dav1dDSPContext *const dsp = f->dsp; | 106 | 768k | enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0); | 107 | 768k | pixel *ptrs[3] = { p[0], p[1], p[2] }; | 108 | 768k | const int sbsz = 16; | 109 | 768k | const int sb64w = f->sb128w << 1; | 110 | 768k | const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8; | 111 | 768k | const enum Dav1dPixelLayout layout = f->cur.p.layout; | 112 | 768k | const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; | 113 | 768k | const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; | 114 | 768k | const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444; | 115 | 768k | static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 }, | 116 | 768k | { 7, 0, 2, 4, 5, 6, 6, 6 } }; | 117 | 768k | const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422]; | 118 | 768k | const int have_tt = f->c->n_tc > 1; | 119 | 768k | const int sb128 = f->seq_hdr->sb128; | 120 | 768k | const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; | 121 | 768k | const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]); | 122 | 768k | const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]); | 123 | | | 124 | 6.41M | for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) { | 125 | 5.66M | const int tf = tc->top_pre_cdef_toggle; | 126 | 5.66M | const int by_idx = (by & 30) >> 1; | 127 | 5.66M | if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM; | 128 | | | 129 | 5.66M | if ((!have_tt || sbrow_start || by + 2 < by_end) && | 130 | 5.30M | edges & CDEF_HAVE_BOTTOM) | 131 | 5.30M | { | 132 | | // backup pre-filter data for next iteration | 133 | 5.30M | pixel *const cdef_top_bak[3] = { | 134 | 5.30M | f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride, | 135 | 5.30M | f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride, | 136 | 5.30M | f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride | 137 | 5.30M | }; | 138 | 5.30M | backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout); | 139 | 5.30M | } | 140 | | | 141 | 5.66M | ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]); | 142 | 5.66M | pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] }; | 143 | 5.66M | edges &= ~CDEF_HAVE_LEFT; | 144 | 5.66M | edges |= CDEF_HAVE_RIGHT; | 145 | 5.66M | enum Backup2x8Flags prev_flag = 0; | 146 | 17.7M | for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) { | 147 | 12.1M | const int sb128x = sbx >> 1; | 148 | 12.1M | const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1); | 149 | 12.1M | const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx]; | 150 | 12.1M | if (cdef_idx == -1 || | 151 | 1.66M | (!f->frame_hdr->cdef.y_strength[cdef_idx] && | 152 | 1.10M | !f->frame_hdr->cdef.uv_strength[cdef_idx])) | 153 | 11.5M | { | 154 | 11.5M | prev_flag = 0; | 155 | 11.5M | goto next_sb; | 156 | 11.5M | } | 157 | | | 158 | | // Create a complete 32-bit mask for the sb row ahead of time. | 159 | 589k | const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx]; | 160 | 589k | const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 | | 161 | 589k | noskip_row[0][0]; | 162 | | | 163 | 589k | const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx]; | 164 | 589k | const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx]; | 165 | 589k | const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1); | 166 | | | 167 | 589k | const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8; | 168 | 589k | int y_sec_lvl = y_lvl & 3; | 169 | 589k | y_sec_lvl += y_sec_lvl == 3; | 170 | 589k | y_sec_lvl <<= bitdepth_min_8; | 171 | | | 172 | 589k | const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8; | 173 | 589k | int uv_sec_lvl = uv_lvl & 3; | 174 | 589k | uv_sec_lvl += uv_sec_lvl == 3; | 175 | 589k | uv_sec_lvl <<= bitdepth_min_8; | 176 | | | 177 | 589k | pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] }; | 178 | 3.93M | for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw); | 179 | 3.34M | bx += 2, edges |= CDEF_HAVE_LEFT) | 180 | 3.32M | { | 181 | 3.32M | if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT; | 182 | | | 183 | | // check if this 8x8 block had any coded coefficients; if not, | 184 | | // go to the next block | 185 | 3.32M | const uint32_t bx_mask = 3U << (bx & 30); | 186 | 3.32M | if (!(noskip_mask & bx_mask)) { | 187 | 392k | prev_flag = 0; | 188 | 392k | goto next_b; | 189 | 392k | } | 190 | 2.93M | const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag; | 191 | 2.93M | prev_flag = flag; | 192 | 2.93M | if (do_left && edges & CDEF_HAVE_LEFT) { | 193 | | // we didn't backup the prefilter data because it wasn't | 194 | | // there, so do it here instead | 195 | 55.1k | backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left); | 196 | 55.1k | } | 197 | 2.93M | if (edges & CDEF_HAVE_RIGHT) { | 198 | | // backup pre-filter data for next iteration | 199 | 2.78M | backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag); | 200 | 2.78M | } | 201 | | | 202 | 2.93M | int dir; | 203 | 2.93M | unsigned variance; | 204 | 2.93M | if (y_pri_lvl || uv_pri_lvl) | 205 | 2.59M | dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0], | 206 | 2.59M | &variance HIGHBD_CALL_SUFFIX); | 207 | | | 208 | 2.93M | const pixel *top, *bot; | 209 | 2.93M | ptrdiff_t offset; | 210 | | | 211 | 2.93M | if (!have_tt) goto st_y; | 212 | 2.66M | if (sbrow_start && by == by_start) { | 213 | 173k | if (resize) { | 214 | 30.4k | offset = (sby - 1) * 4 * y_stride + bx * 4; | 215 | 30.4k | top = &f->lf.cdef_lpf_line[0][offset]; | 216 | 142k | } else { | 217 | 142k | offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4; | 218 | 142k | top = &f->lf.lr_lpf_line[0][offset]; | 219 | 142k | } | 220 | 173k | bot = bptrs[0] + 8 * y_stride; | 221 | 2.49M | } else if (!sbrow_start && by + 2 >= by_end) { | 222 | 228k | top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4]; | 223 | 228k | if (resize) { | 224 | 49.8k | offset = (sby * 4 + 2) * y_stride + bx * 4; | 225 | 49.8k | bot = &f->lf.cdef_lpf_line[0][offset]; | 226 | 178k | } else { | 227 | 178k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; | 228 | 178k | offset = line * y_stride + bx * 4; | 229 | 178k | bot = &f->lf.lr_lpf_line[0][offset]; | 230 | 178k | } | 231 | 2.26M | } else { | 232 | 2.42M | st_y:; | 233 | 2.42M | offset = sby * 4 * y_stride; | 234 | 2.42M | top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4]; | 235 | 2.42M | bot = bptrs[0] + 8 * y_stride; | 236 | 2.42M | } | 237 | 2.83M | if (y_pri_lvl) { | 238 | 2.41M | const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance); | 239 | 2.41M | if (adj_y_pri_lvl || y_sec_lvl) | 240 | 1.87M | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], | 241 | 1.87M | top, bot, adj_y_pri_lvl, y_sec_lvl, | 242 | 1.87M | dir, damping, edges HIGHBD_CALL_SUFFIX); | 243 | 2.41M | } else if (y_sec_lvl) | 244 | 145k | dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0], | 245 | 145k | top, bot, 0, y_sec_lvl, 0, damping, | 246 | 145k | edges HIGHBD_CALL_SUFFIX); | 247 | | | 248 | 2.83M | if (!uv_lvl) goto skip_uv; | 249 | 2.83M | assert(layout != DAV1D_PIXEL_LAYOUT_I400); | 250 | | | 251 | 1.86M | const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0; | 252 | 5.63M | for (int pl = 1; pl <= 2; pl++) { | 253 | 3.73M | if (!have_tt) goto st_uv; | 254 | 3.52M | if (sbrow_start && by == by_start) { | 255 | 213k | if (resize) { | 256 | 32.1k | offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor); | 257 | 32.1k | top = &f->lf.cdef_lpf_line[pl][offset]; | 258 | 181k | } else { | 259 | 181k | const int line = sby * (4 << sb128) - 4; | 260 | 181k | offset = line * uv_stride + (bx * 4 >> ss_hor); | 261 | 181k | top = &f->lf.lr_lpf_line[pl][offset]; | 262 | 181k | } | 263 | 213k | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; | 264 | 3.33M | } else if (!sbrow_start && by + 2 >= by_end) { | 265 | 254k | const ptrdiff_t top_offset = sby * 8 * uv_stride + | 266 | 254k | (bx * 4 >> ss_hor); | 267 | 254k | top = &f->lf.cdef_line[tf][pl][top_offset]; | 268 | 254k | if (resize) { | 269 | 39.9k | offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor); | 270 | 39.9k | bot = &f->lf.cdef_lpf_line[pl][offset]; | 271 | 214k | } else { | 272 | 214k | const int line = sby * (4 << sb128) + 4 * sb128 + 2; | 273 | 214k | offset = line * uv_stride + (bx * 4 >> ss_hor); | 274 | 214k | bot = &f->lf.lr_lpf_line[pl][offset]; | 275 | 214k | } | 276 | 3.06M | } else { | 277 | 3.30M | st_uv:; | 278 | 3.30M | const ptrdiff_t offset = sby * 8 * uv_stride; | 279 | 3.30M | top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)]; | 280 | 3.30M | bot = bptrs[pl] + (8 >> ss_ver) * uv_stride; | 281 | 3.30M | } | 282 | 3.76M | dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1], | 283 | 3.76M | lr_bak[bit][pl], top, bot, | 284 | 3.76M | uv_pri_lvl, uv_sec_lvl, uvdir, | 285 | 3.76M | damping - 1, edges HIGHBD_CALL_SUFFIX); | 286 | 3.76M | } | 287 | | | 288 | 2.95M | skip_uv: | 289 | 2.95M | bit ^= 1; | 290 | | | 291 | 3.34M | next_b: | 292 | 3.34M | bptrs[0] += 8; | 293 | 3.34M | bptrs[1] += 8 >> ss_hor; | 294 | 3.34M | bptrs[2] += 8 >> ss_hor; | 295 | 3.34M | } | 296 | | | 297 | 12.0M | next_sb: | 298 | 12.0M | iptrs[0] += sbsz * 4; | 299 | 12.0M | iptrs[1] += sbsz * 4 >> ss_hor; | 300 | 12.0M | iptrs[2] += sbsz * 4 >> ss_hor; | 301 | 12.0M | } | 302 | | | 303 | 5.64M | ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]); | 304 | 5.64M | ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; | 305 | 5.64M | ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver; | 306 | 5.64M | tc->top_pre_cdef_toggle ^= 1; | 307 | 5.64M | } | 308 | 768k | } |
|