Coverage Report

Created: 2026-05-16 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/h264_direct.c
Line
Count
Source
1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
/**
23
 * @file
24
 * H.264 / AVC / MPEG-4 part10 direct mb/block decoding.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
#include "avcodec.h"
29
#include "h264dec.h"
30
#include "h264_ps.h"
31
#include "mpegutils.h"
32
#include "rectangle.h"
33
#include "threadframe.h"
34
35
#include <assert.h>
36
37
static int get_scale_factor(const H264SliceContext *sl,
38
                            int poc, int poc1, int i)
39
1.09M
{
40
1.09M
    int poc0 = sl->ref_list[0][i].poc;
41
1.09M
    int64_t pocdiff = poc1 - (int64_t)poc0;
42
1.09M
    int td = av_clip_int8(pocdiff);
43
44
1.09M
    if (pocdiff != (int)pocdiff)
45
1.79k
        avpriv_request_sample(sl->h264->avctx, "pocdiff overflow");
46
47
1.09M
    if (td == 0 || sl->ref_list[0][i].parent->long_ref) {
48
702k
        return 256;
49
702k
    } else {
50
388k
        int64_t pocdiff0 = poc - (int64_t)poc0;
51
388k
        int tb = av_clip_int8(pocdiff0);
52
388k
        int tx = (16384 + (FFABS(td) >> 1)) / td;
53
54
388k
        if (pocdiff0 != (int)pocdiff0)
55
766
            av_log(sl->h264->avctx, AV_LOG_DEBUG, "pocdiff0 overflow\n");
56
57
388k
        return av_clip_intp2((tb * tx + 32) >> 6, 10);
58
388k
    }
59
1.09M
}
60
61
void ff_h264_direct_dist_scale_factor(const H264Context *const h,
62
                                      H264SliceContext *sl)
63
314k
{
64
314k
    const int poc  = FIELD_PICTURE(h) ? h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD]
65
314k
                                      : h->cur_pic_ptr->poc;
66
314k
    const int poc1 = sl->ref_list[1][0].poc;
67
314k
    int i, field;
68
69
314k
    if (FRAME_MBAFF(h))
70
112k
        for (field = 0; field < 2; field++) {
71
75.3k
            const int poc  = h->cur_pic_ptr->field_poc[field];
72
75.3k
            const int poc1 = sl->ref_list[1][0].parent->field_poc[field];
73
421k
            for (i = 0; i < 2 * sl->ref_count[0]; i++)
74
345k
                sl->dist_scale_factor_field[field][i ^ field] =
75
345k
                    get_scale_factor(sl, poc, poc1, i + 16);
76
75.3k
        }
77
78
1.05M
    for (i = 0; i < sl->ref_count[0]; i++)
79
744k
        sl->dist_scale_factor[i] = get_scale_factor(sl, poc, poc1, i);
80
314k
}
81
82
static void fill_colmap(const H264Context *h, H264SliceContext *sl,
83
                        int map[2][16 + 32], int list,
84
                        int field, int colfield, int mbafi)
85
780k
{
86
780k
    const H264Picture *const ref1 = sl->ref_list[1][0].parent;
87
780k
    int j, old_ref, rfield;
88
780k
    int start  = mbafi ? 16                       : 0;
89
780k
    int end    = mbafi ? 16 + 2 * sl->ref_count[0] : sl->ref_count[0];
90
780k
    int interl = mbafi || h->picture_structure != PICT_FRAME;
91
92
    /* bogus; fills in for missing frames */
93
780k
    memset(map[list], 0, sizeof(map[list]));
94
95
2.34M
    for (rfield = 0; rfield < 2; rfield++) {
96
2.28M
        for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
97
723k
            int poc = ref1->ref_poc[colfield][list][old_ref];
98
99
723k
            if (!interl)
100
675k
                poc |= 3;
101
            // FIXME: store all MBAFF references so this is not needed
102
47.9k
            else if (interl && (poc & 3) == 3)
103
31.7k
                poc = (poc & ~3) + rfield + 1;
104
105
1.55M
            for (j = start; j < end; j++) {
106
1.14M
                if (4 * sl->ref_list[0][j].parent->frame_num +
107
1.14M
                    (sl->ref_list[0][j].reference & 3) == poc) {
108
312k
                    int cur_ref = mbafi ? (j - 16) ^ field : j;
109
312k
                    if (ref1->mbaff)
110
28.3k
                        map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
111
312k
                    if (rfield == field || !interl)
112
296k
                        map[list][old_ref] = cur_ref;
113
312k
                    break;
114
312k
                }
115
1.14M
            }
116
723k
        }
117
1.56M
    }
118
780k
}
119
120
void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *sl)
121
2.34M
{
122
2.34M
    H264Ref *const ref1 = &sl->ref_list[1][0];
123
2.34M
    H264Picture *const cur = h->cur_pic_ptr;
124
2.34M
    int list, field;
125
2.34M
    int sidx     = (h->picture_structure & 1) ^ 1;
126
2.34M
    int ref1sidx = (ref1->reference      & 1) ^ 1;
127
128
    /* Updates to cur_pic are not safe once ff_thread_finish_setup() has been
129
     * called (other threads may already be reading these fields). */
130
2.34M
    if (!h->setup_finished) {
131
4.46M
        for (list = 0; list < sl->list_count; list++) {
132
2.11M
            cur->ref_count[sidx][list] = sl->ref_count[list];
133
10.0M
            for (int j = 0; j < sl->ref_count[list]; j++)
134
7.93M
                cur->ref_poc[sidx][list][j] = 4 * sl->ref_list[list][j].parent->frame_num +
135
7.93M
                                                 (sl->ref_list[list][j].reference & 3);
136
2.11M
        }
137
138
2.34M
        if (h->picture_structure == PICT_FRAME) {
139
1.57M
            memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
140
1.57M
            memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
141
1.57M
        }
142
143
2.34M
        if (h->current_slice == 0) {
144
2.30M
            cur->mbaff = FRAME_MBAFF(h);
145
2.30M
        } else {
146
44.7k
            av_assert0(cur->mbaff == FRAME_MBAFF(h));
147
44.7k
        }
148
2.34M
    }
149
150
2.34M
    sl->col_fieldoff = 0;
151
152
2.34M
    if (sl->list_count != 2 || !sl->ref_count[1])
153
1.71M
        return;
154
155
634k
    if (h->picture_structure == PICT_FRAME) {
156
485k
        int cur_poc  = h->cur_pic_ptr->poc;
157
485k
        const int *col_poc = sl->ref_list[1][0].parent->field_poc;
158
485k
        if (col_poc[0] == INT_MAX && col_poc[1] == INT_MAX) {
159
293k
            av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
160
293k
            sl->col_parity = 1;
161
293k
        } else
162
192k
            sl->col_parity = (FFABS(col_poc[0] - (int64_t)cur_poc) >=
163
192k
                              FFABS(col_poc[1] - (int64_t)cur_poc));
164
485k
        ref1sidx =
165
485k
        sidx     = sl->col_parity;
166
    // FL -> FL & differ parity
167
485k
    } else if (!(h->picture_structure & sl->ref_list[1][0].reference) &&
168
66.0k
               !sl->ref_list[1][0].parent->mbaff) {
169
12.5k
        sl->col_fieldoff = 2 * sl->ref_list[1][0].reference - 3;
170
12.5k
    }
171
172
634k
    if (sl->slice_type_nos != AV_PICTURE_TYPE_B || sl->direct_spatial_mv_pred)
173
319k
        return;
174
175
944k
    for (list = 0; list < 2; list++) {
176
629k
        fill_colmap(h, sl, sl->map_col_to_list0, list, sidx, ref1sidx, 0);
177
629k
        if (FRAME_MBAFF(h))
178
225k
            for (field = 0; field < 2; field++)
179
150k
                fill_colmap(h, sl, sl->map_col_to_list0_field[field], list, field,
180
150k
                            field, 1);
181
629k
    }
182
314k
}
183
184
static void await_reference_mb_row(const H264Context *const h, H264Ref *ref,
185
                                   int mb_y)
186
22.1M
{
187
22.1M
    int ref_field         = ref->reference - 1;
188
22.1M
    int ref_field_picture = ref->parent->field_picture;
189
22.1M
    int ref_height        = 16 * h->mb_height >> ref_field_picture;
190
191
22.1M
    if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
192
22.1M
        return;
193
194
    /* FIXME: It can be safe to access mb stuff
195
     * even if pixels aren't deblocked yet. */
196
197
0
    ff_thread_await_progress(&ref->parent->tf,
198
0
                             FFMIN(16 * mb_y >> ref_field_picture,
199
0
                                   ref_height - 1),
200
0
                             ref_field_picture && ref_field);
201
0
}
202
203
static void pred_spatial_direct_motion(const H264Context *const h, H264SliceContext *sl,
204
                                       int *mb_type)
205
8.66M
{
206
8.66M
    int b8_stride = 2;
207
8.66M
    int b4_stride = h->b_stride;
208
8.66M
    int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
209
8.66M
    int mb_type_col[2];
210
8.66M
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
211
8.66M
    const int8_t *l1ref0, *l1ref1;
212
8.66M
    const int is_b8x8 = IS_8X8(*mb_type);
213
8.66M
    unsigned int sub_mb_type = MB_TYPE_L0L1;
214
8.66M
    int i8, i4;
215
8.66M
    int ref[2];
216
8.66M
    int mv[2];
217
8.66M
    int list;
218
219
8.66M
    assert(sl->ref_list[1][0].reference & 3);
220
221
8.66M
    await_reference_mb_row(h, &sl->ref_list[1][0],
222
8.66M
                           sl->mb_y + !!IS_INTERLACED(*mb_type));
223
224
8.66M
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
225
8.34M
                                MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
226
227
    /* ref = min(neighbors) */
228
26.0M
    for (list = 0; list < 2; list++) {
229
17.3M
        int left_ref     = sl->ref_cache[list][scan8[0] - 1];
230
17.3M
        int top_ref      = sl->ref_cache[list][scan8[0] - 8];
231
17.3M
        int refc         = sl->ref_cache[list][scan8[0] - 8 + 4];
232
17.3M
        const int16_t *C = sl->mv_cache[list][scan8[0]  - 8 + 4];
233
17.3M
        if (refc == PART_NOT_AVAILABLE) {
234
10.4M
            refc = sl->ref_cache[list][scan8[0] - 8 - 1];
235
10.4M
            C    = sl->mv_cache[list][scan8[0]  - 8 - 1];
236
10.4M
        }
237
17.3M
        ref[list] = FFMIN3((unsigned)left_ref,
238
17.3M
                           (unsigned)top_ref,
239
17.3M
                           (unsigned)refc);
240
17.3M
        if (ref[list] >= 0) {
241
            /* This is just pred_motion() but with the cases removed that
242
             * cannot happen for direct blocks. */
243
14.1M
            const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
244
14.1M
            const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
245
246
14.1M
            int match_count = (left_ref == ref[list]) +
247
14.1M
                              (top_ref  == ref[list]) +
248
14.1M
                              (refc     == ref[list]);
249
250
14.1M
            if (match_count > 1) { // most common
251
8.19M
                mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
252
8.19M
                                      mid_pred(A[1], B[1], C[1]));
253
8.19M
            } else {
254
5.99M
                assert(match_count == 1);
255
5.99M
                if (left_ref == ref[list])
256
4.18M
                    mv[list] = AV_RN32A(A);
257
1.81M
                else if (top_ref == ref[list])
258
1.56M
                    mv[list] = AV_RN32A(B);
259
243k
                else
260
243k
                    mv[list] = AV_RN32A(C);
261
5.99M
            }
262
14.1M
            av_assert2(ref[list] < (sl->ref_count[list] << !!FRAME_MBAFF(h)));
263
14.1M
        } else {
264
3.14M
            int mask = ~(MB_TYPE_L0 << (2 * list));
265
3.14M
            mv[list]  = 0;
266
3.14M
            ref[list] = -1;
267
3.14M
            if (!is_b8x8)
268
3.00M
                *mb_type &= mask;
269
3.14M
            sub_mb_type &= mask;
270
3.14M
        }
271
17.3M
    }
272
8.66M
    if (ref[0] < 0 && ref[1] < 0) {
273
267k
        ref[0] = ref[1] = 0;
274
267k
        if (!is_b8x8)
275
219k
            *mb_type |= MB_TYPE_L0L1;
276
267k
        sub_mb_type |= MB_TYPE_L0L1;
277
267k
    }
278
279
8.66M
    if (!(is_b8x8 | mv[0] | mv[1])) {
280
5.80M
        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
281
5.80M
        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
282
5.80M
        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
283
5.80M
        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
284
5.80M
        *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
285
5.80M
                                 MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
286
5.80M
                   MB_TYPE_16x16 | MB_TYPE_DIRECT2;
287
5.80M
        return;
288
5.80M
    }
289
290
2.86M
    if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
291
575k
        if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
292
181k
            mb_y  = (sl->mb_y & ~1) + sl->col_parity;
293
181k
            mb_xy = sl->mb_x +
294
181k
                    ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
295
181k
            b8_stride = 0;
296
394k
        } else {
297
394k
            mb_y  += sl->col_fieldoff;
298
394k
            mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
299
394k
        }
300
575k
        goto single_col;
301
2.28M
    } else {                                             // AFL/AFR/FR/FL -> AFR/FR
302
2.28M
        if (IS_INTERLACED(*mb_type)) {                   // AFL       /FL -> AFR/FR
303
570k
            mb_y           =  sl->mb_y & ~1;
304
570k
            mb_xy          = (sl->mb_y & ~1) * h->mb_stride + sl->mb_x;
305
570k
            mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
306
570k
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
307
570k
            b8_stride      = 2 + 4 * h->mb_stride;
308
570k
            b4_stride     *= 6;
309
570k
            if (IS_INTERLACED(mb_type_col[0]) !=
310
570k
                IS_INTERLACED(mb_type_col[1])) {
311
18.7k
                mb_type_col[0] &= ~MB_TYPE_INTERLACED;
312
18.7k
                mb_type_col[1] &= ~MB_TYPE_INTERLACED;
313
18.7k
            }
314
315
570k
            sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
316
570k
            if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
317
114k
                (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
318
108k
                !is_b8x8) {
319
103k
                *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2;  /* B_16x8 */
320
467k
            } else {
321
467k
                *mb_type |= MB_TYPE_8x8;
322
467k
            }
323
1.71M
        } else {                                         //     AFR/FR    -> AFR/FR
324
2.29M
single_col:
325
2.29M
            mb_type_col[0] =
326
2.29M
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
327
328
2.29M
            sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
329
2.29M
            if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
330
1.15M
                *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
331
1.15M
            } else if (!is_b8x8 &&
332
1.03M
                       (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
333
70.0k
                *mb_type |= MB_TYPE_DIRECT2 |
334
70.0k
                            (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
335
1.06M
            } else {
336
1.06M
                if (!h->ps.sps->direct_8x8_inference_flag) {
337
                    /* FIXME: Save sub mb types from previous frames (or derive
338
                     * from MVs) so we know exactly what block size to use. */
339
25.2k
                    sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
340
25.2k
                }
341
1.06M
                *mb_type |= MB_TYPE_8x8;
342
1.06M
            }
343
2.29M
        }
344
2.28M
    }
345
346
2.86M
    await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
347
348
2.86M
    l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
349
2.86M
    l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
350
2.86M
    l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
351
2.86M
    l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
352
2.86M
    if (!b8_stride) {
353
181k
        if (sl->mb_y & 1) {
354
67.1k
            l1ref0 += 2;
355
67.1k
            l1ref1 += 2;
356
67.1k
            l1mv0  += 2 * b4_stride;
357
67.1k
            l1mv1  += 2 * b4_stride;
358
67.1k
        }
359
181k
    }
360
361
2.86M
    if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
362
696k
        int n = 0;
363
3.48M
        for (i8 = 0; i8 < 4; i8++) {
364
2.78M
            int x8  = i8 & 1;
365
2.78M
            int y8  = i8 >> 1;
366
2.78M
            int xy8 = x8     + y8 * b8_stride;
367
2.78M
            int xy4 = x8 * 3 + y8 * b4_stride;
368
2.78M
            int a, b;
369
370
2.78M
            if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
371
162k
                continue;
372
2.62M
            sl->sub_mb_type[i8] = sub_mb_type;
373
374
2.62M
            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
375
2.62M
                           (uint8_t)ref[0], 1);
376
2.62M
            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
377
2.62M
                           (uint8_t)ref[1], 1);
378
2.62M
            if (!IS_INTRA(mb_type_col[y8]) && !sl->ref_list[1][0].parent->long_ref &&
379
1.22M
                ((l1ref0[xy8] == 0 &&
380
1.10M
                  FFABS(l1mv0[xy4][0]) <= 1 &&
381
1.08M
                  FFABS(l1mv0[xy4][1]) <= 1) ||
382
146k
                 (l1ref0[xy8] < 0 &&
383
60.4k
                  l1ref1[xy8] == 0 &&
384
49.0k
                  FFABS(l1mv1[xy4][0]) <= 1 &&
385
1.11M
                  FFABS(l1mv1[xy4][1]) <= 1))) {
386
1.11M
                a =
387
1.11M
                b = 0;
388
1.11M
                if (ref[0] > 0)
389
575k
                    a = mv[0];
390
1.11M
                if (ref[1] > 0)
391
255k
                    b = mv[1];
392
1.11M
                n++;
393
1.51M
            } else {
394
1.51M
                a = mv[0];
395
1.51M
                b = mv[1];
396
1.51M
            }
397
2.62M
            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
398
2.62M
            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
399
2.62M
        }
400
696k
        if (!is_b8x8 && !(n & 3))
401
625k
            *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
402
625k
                                     MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
403
625k
                       MB_TYPE_16x16 | MB_TYPE_DIRECT2;
404
2.16M
    } else if (IS_16X16(*mb_type)) {
405
1.04M
        int a, b;
406
407
1.04M
        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
408
1.04M
        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
409
1.04M
        if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
410
325k
            ((l1ref0[0] == 0 &&
411
200k
              FFABS(l1mv0[0][0]) <= 1 &&
412
120k
              FFABS(l1mv0[0][1]) <= 1) ||
413
223k
             (l1ref0[0] < 0 && !l1ref1[0] &&
414
54.1k
              FFABS(l1mv1[0][0]) <= 1 &&
415
24.4k
              FFABS(l1mv1[0][1]) <= 1 &&
416
118k
              h->x264_build > 33U))) {
417
118k
            a = b = 0;
418
118k
            if (ref[0] > 0)
419
42.7k
                a = mv[0];
420
118k
            if (ref[1] > 0)
421
7.46k
                b = mv[1];
422
930k
        } else {
423
930k
            a = mv[0];
424
930k
            b = mv[1];
425
930k
        }
426
1.04M
        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
427
1.04M
        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
428
1.11M
    } else {
429
1.11M
        int n = 0;
430
5.58M
        for (i8 = 0; i8 < 4; i8++) {
431
4.46M
            const int x8 = i8 & 1;
432
4.46M
            const int y8 = i8 >> 1;
433
434
4.46M
            if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
435
224k
                continue;
436
4.24M
            sl->sub_mb_type[i8] = sub_mb_type;
437
438
4.24M
            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
439
4.24M
            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
440
4.24M
            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
441
4.24M
                           (uint8_t)ref[0], 1);
442
4.24M
            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
443
4.24M
                           (uint8_t)ref[1], 1);
444
445
4.24M
            assert(b8_stride == 2);
446
            /* col_zero_flag */
447
4.24M
            if (!IS_INTRA(mb_type_col[0]) && !sl->ref_list[1][0].parent->long_ref &&
448
2.02M
                (l1ref0[i8] == 0 ||
449
71.1k
                 (l1ref0[i8] < 0 &&
450
38.5k
                  l1ref1[i8] == 0 &&
451
1.98M
                  h->x264_build > 33U))) {
452
1.98M
                const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
453
1.98M
                if (IS_SUB_8X8(sub_mb_type)) {
454
1.92M
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
455
1.92M
                    if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
456
1.78M
                        if (ref[0] == 0)
457
584k
                            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2,
458
584k
                                           8, 0, 4);
459
1.78M
                        if (ref[1] == 0)
460
1.22M
                            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2,
461
1.22M
                                           8, 0, 4);
462
1.78M
                        n += 4;
463
1.78M
                    }
464
1.92M
                } else {
465
65.6k
                    int m = 0;
466
328k
                    for (i4 = 0; i4 < 4; i4++) {
467
262k
                        const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
468
262k
                                                     (y8 * 2 + (i4 >> 1)) * b4_stride];
469
262k
                        if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
470
248k
                            if (ref[0] == 0)
471
248k
                                AV_ZERO32(sl->mv_cache[0][scan8[i8 * 4 + i4]]);
472
248k
                            if (ref[1] == 0)
473
248k
                                AV_ZERO32(sl->mv_cache[1][scan8[i8 * 4 + i4]]);
474
248k
                            m++;
475
248k
                        }
476
262k
                    }
477
65.6k
                    if (!(m & 3))
478
65.0k
                        sl->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
479
65.6k
                    n += m;
480
65.6k
                }
481
1.98M
            }
482
4.24M
        }
483
1.11M
        if (!is_b8x8 && !(n & 15))
484
1.00M
            *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
485
1.00M
                                     MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
486
1.00M
                       MB_TYPE_16x16 | MB_TYPE_DIRECT2;
487
1.11M
    }
488
2.86M
}
489
490
static void pred_temp_direct_motion(const H264Context *const h, H264SliceContext *sl,
491
                                    int *mb_type)
492
5.32M
{
493
5.32M
    int b8_stride = 2;
494
5.32M
    int b4_stride = h->b_stride;
495
5.32M
    int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
496
5.32M
    int mb_type_col[2];
497
5.32M
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
498
5.32M
    const int8_t *l1ref0, *l1ref1;
499
5.32M
    const int is_b8x8 = IS_8X8(*mb_type);
500
5.32M
    unsigned int sub_mb_type;
501
5.32M
    int i8, i4;
502
503
5.32M
    assert(sl->ref_list[1][0].reference & 3);
504
505
5.32M
    await_reference_mb_row(h, &sl->ref_list[1][0],
506
5.32M
                           sl->mb_y + !!IS_INTERLACED(*mb_type));
507
508
5.32M
    if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
509
529k
        if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
510
61.6k
            mb_y  = (sl->mb_y & ~1) + sl->col_parity;
511
61.6k
            mb_xy = sl->mb_x +
512
61.6k
                    ((sl->mb_y & ~1) + sl->col_parity) * h->mb_stride;
513
61.6k
            b8_stride = 0;
514
467k
        } else {
515
467k
            mb_y  += sl->col_fieldoff;
516
467k
            mb_xy += h->mb_stride * sl->col_fieldoff; // non-zero for FL -> FL & differ parity
517
467k
        }
518
529k
        goto single_col;
519
4.79M
    } else {                                        // AFL/AFR/FR/FL -> AFR/FR
520
4.79M
        if (IS_INTERLACED(*mb_type)) {              // AFL       /FL -> AFR/FR
521
1.44M
            mb_y           = sl->mb_y & ~1;
522
1.44M
            mb_xy          = sl->mb_x + (sl->mb_y & ~1) * h->mb_stride;
523
1.44M
            mb_type_col[0] = sl->ref_list[1][0].parent->mb_type[mb_xy];
524
1.44M
            mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy + h->mb_stride];
525
1.44M
            b8_stride      = 2 + 4 * h->mb_stride;
526
1.44M
            b4_stride     *= 6;
527
1.44M
            if (IS_INTERLACED(mb_type_col[0]) !=
528
1.44M
                IS_INTERLACED(mb_type_col[1])) {
529
20.8k
                mb_type_col[0] &= ~MB_TYPE_INTERLACED;
530
20.8k
                mb_type_col[1] &= ~MB_TYPE_INTERLACED;
531
20.8k
            }
532
533
1.44M
            sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
534
1.44M
                          MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
535
536
1.44M
            if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
537
213k
                (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
538
194k
                !is_b8x8) {
539
193k
                *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
540
193k
                            MB_TYPE_DIRECT2;                /* B_16x8 */
541
1.24M
            } else {
542
1.24M
                *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
543
1.24M
            }
544
3.35M
        } else {                                    //     AFR/FR    -> AFR/FR
545
3.88M
single_col:
546
3.88M
            mb_type_col[0]     =
547
3.88M
                mb_type_col[1] = sl->ref_list[1][0].parent->mb_type[mb_xy];
548
549
3.88M
            sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
550
3.88M
                          MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
551
3.88M
            if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
552
1.06M
                *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
553
1.06M
                            MB_TYPE_DIRECT2;                /* B_16x16 */
554
2.82M
            } else if (!is_b8x8 &&
555
2.75M
                       (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
556
287k
                *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
557
287k
                            (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
558
2.53M
            } else {
559
2.53M
                if (!h->ps.sps->direct_8x8_inference_flag) {
560
                    /* FIXME: save sub mb types from previous frames (or derive
561
                     * from MVs) so we know exactly what block size to use */
562
375k
                    sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
563
375k
                                  MB_TYPE_DIRECT2;          /* B_SUB_4x4 */
564
375k
                }
565
2.53M
                *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
566
2.53M
            }
567
3.88M
        }
568
4.79M
    }
569
570
5.32M
    await_reference_mb_row(h, &sl->ref_list[1][0], mb_y);
571
572
5.32M
    l1mv0  = (void*)&sl->ref_list[1][0].parent->motion_val[0][h->mb2b_xy[mb_xy]];
573
5.32M
    l1mv1  = (void*)&sl->ref_list[1][0].parent->motion_val[1][h->mb2b_xy[mb_xy]];
574
5.32M
    l1ref0 = &sl->ref_list[1][0].parent->ref_index[0][4 * mb_xy];
575
5.32M
    l1ref1 = &sl->ref_list[1][0].parent->ref_index[1][4 * mb_xy];
576
5.32M
    if (!b8_stride) {
577
61.6k
        if (sl->mb_y & 1) {
578
28.2k
            l1ref0 += 2;
579
28.2k
            l1ref1 += 2;
580
28.2k
            l1mv0  += 2 * b4_stride;
581
28.2k
            l1mv1  += 2 * b4_stride;
582
28.2k
        }
583
61.6k
    }
584
585
5.32M
    {
586
5.32M
        const int *map_col_to_list0[2] = { sl->map_col_to_list0[0],
587
5.32M
                                           sl->map_col_to_list0[1] };
588
5.32M
        const int *dist_scale_factor = sl->dist_scale_factor;
589
5.32M
        int ref_offset;
590
591
5.32M
        if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
592
454k
            map_col_to_list0[0] = sl->map_col_to_list0_field[sl->mb_y & 1][0];
593
454k
            map_col_to_list0[1] = sl->map_col_to_list0_field[sl->mb_y & 1][1];
594
454k
            dist_scale_factor   = sl->dist_scale_factor_field[sl->mb_y & 1];
595
454k
        }
596
5.32M
        ref_offset = (sl->ref_list[1][0].parent->mbaff << 4) & (mb_type_col[0] >> 3);
597
598
5.32M
        if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
599
1.51M
            int y_shift = 2 * !IS_INTERLACED(*mb_type);
600
1.51M
            assert(h->ps.sps->direct_8x8_inference_flag);
601
602
7.58M
            for (i8 = 0; i8 < 4; i8++) {
603
6.06M
                const int x8 = i8 & 1;
604
6.06M
                const int y8 = i8 >> 1;
605
6.06M
                int ref0, scale;
606
6.06M
                const int16_t (*l1mv)[2] = l1mv0;
607
608
6.06M
                if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
609
29.4k
                    continue;
610
6.03M
                sl->sub_mb_type[i8] = sub_mb_type;
611
612
6.03M
                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
613
6.03M
                if (IS_INTRA(mb_type_col[y8])) {
614
115k
                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
615
115k
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
616
115k
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
617
115k
                    continue;
618
115k
                }
619
620
5.92M
                ref0 = l1ref0[x8 + y8 * b8_stride];
621
5.92M
                if (ref0 >= 0)
622
5.80M
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
623
119k
                else {
624
119k
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
625
119k
                                               ref_offset];
626
119k
                    l1mv = l1mv1;
627
119k
                }
628
5.92M
                scale = dist_scale_factor[ref0];
629
5.92M
                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
630
5.92M
                               ref0, 1);
631
632
5.92M
                {
633
5.92M
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
634
5.92M
                    int my_col            = (mv_col[1] * (1 << y_shift)) / 2;
635
5.92M
                    int mx                = (scale * mv_col[0] + 128) >> 8;
636
5.92M
                    int my                = (scale * my_col    + 128) >> 8;
637
5.92M
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
638
5.92M
                                   pack16to32(mx, my), 4);
639
5.92M
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
640
5.92M
                                   pack16to32(mx - mv_col[0], my - my_col), 4);
641
5.92M
                }
642
5.92M
            }
643
1.51M
            return;
644
1.51M
        }
645
646
        /* one-to-one mv scaling */
647
648
3.81M
        if (IS_16X16(*mb_type)) {
649
1.02M
            int ref, mv0, mv1;
650
651
1.02M
            fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
652
1.02M
            if (IS_INTRA(mb_type_col[0])) {
653
58.4k
                ref = mv0 = mv1 = 0;
654
965k
            } else {
655
965k
                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
656
965k
                                                : map_col_to_list0[1][l1ref1[0] + ref_offset];
657
965k
                const int scale = dist_scale_factor[ref0];
658
965k
                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
659
965k
                int mv_l0[2];
660
965k
                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
661
965k
                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
662
965k
                ref      = ref0;
663
965k
                mv0      = pack16to32(mv_l0[0], mv_l0[1]);
664
965k
                mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
665
965k
            }
666
1.02M
            fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
667
1.02M
            fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
668
1.02M
            fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
669
2.78M
        } else {
670
13.9M
            for (i8 = 0; i8 < 4; i8++) {
671
11.1M
                const int x8 = i8 & 1;
672
11.1M
                const int y8 = i8 >> 1;
673
11.1M
                int ref0, scale;
674
11.1M
                const int16_t (*l1mv)[2] = l1mv0;
675
676
11.1M
                if (is_b8x8 && !IS_DIRECT(sl->sub_mb_type[i8]))
677
165k
                    continue;
678
10.9M
                sl->sub_mb_type[i8] = sub_mb_type;
679
10.9M
                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
680
10.9M
                if (IS_INTRA(mb_type_col[0])) {
681
2.29k
                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
682
2.29k
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
683
2.29k
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
684
2.29k
                    continue;
685
2.29k
                }
686
687
10.9M
                assert(b8_stride == 2);
688
10.9M
                ref0 = l1ref0[i8];
689
10.9M
                if (ref0 >= 0)
690
10.8M
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
691
108k
                else {
692
108k
                    ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
693
108k
                    l1mv = l1mv1;
694
108k
                }
695
10.9M
                scale = dist_scale_factor[ref0];
696
697
10.9M
                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
698
10.9M
                               ref0, 1);
699
10.9M
                if (IS_SUB_8X8(sub_mb_type)) {
700
9.52M
                    const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
701
9.52M
                    int mx                = (scale * mv_col[0] + 128) >> 8;
702
9.52M
                    int my                = (scale * mv_col[1] + 128) >> 8;
703
9.52M
                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
704
9.52M
                                   pack16to32(mx, my), 4);
705
9.52M
                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
706
9.52M
                                   pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
707
9.52M
                } else {
708
7.23M
                    for (i4 = 0; i4 < 4; i4++) {
709
5.78M
                        const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
710
5.78M
                                                     (y8 * 2 + (i4 >> 1)) * b4_stride];
711
5.78M
                        int16_t *mv_l0 = sl->mv_cache[0][scan8[i8 * 4 + i4]];
712
5.78M
                        mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
713
5.78M
                        mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
714
5.78M
                        AV_WN32A(sl->mv_cache[1][scan8[i8 * 4 + i4]],
715
5.78M
                                 pack16to32(mv_l0[0] - mv_col[0],
716
5.78M
                                            mv_l0[1] - mv_col[1]));
717
5.78M
                    }
718
1.44M
                }
719
10.9M
            }
720
2.78M
        }
721
3.81M
    }
722
3.81M
}
723
724
void ff_h264_pred_direct_motion(const H264Context *const h, H264SliceContext *sl,
725
                                int *mb_type)
726
13.9M
{
727
13.9M
    if (sl->direct_spatial_mv_pred)
728
8.66M
        pred_spatial_direct_motion(h, sl, mb_type);
729
5.32M
    else
730
5.32M
        pred_temp_direct_motion(h, sl, mb_type);
731
13.9M
}