obu.c:clz:
  186|  5.50k|static inline int clz(const unsigned int mask) {
  187|  5.50k|    return __builtin_clz(mask);
  188|  5.50k|}
decode.c:ctz:
  182|   133k|static inline int ctz(const unsigned int mask) {
  183|   133k|    return __builtin_ctz(mask);
  184|   133k|}
decode.c:clz:
  186|  6.75M|static inline int clz(const unsigned int mask) {
  187|  6.75M|    return __builtin_clz(mask);
  188|  6.75M|}
getbits.c:clz:
  186|  86.6k|static inline int clz(const unsigned int mask) {
  187|  86.6k|    return __builtin_clz(mask);
  188|  86.6k|}
lf_mask.c:clz:
  186|  1.78M|static inline int clz(const unsigned int mask) {
  187|  1.78M|    return __builtin_clz(mask);
  188|  1.78M|}
warpmv.c:clz:
  186|  94.8k|static inline int clz(const unsigned int mask) {
  187|  94.8k|    return __builtin_clz(mask);
  188|  94.8k|}
warpmv.c:clzll:
  190|  89.2k|static inline int clzll(const unsigned long long mask) {
  191|  89.2k|    return __builtin_clzll(mask);
  192|  89.2k|}
looprestoration_tmpl.c:clz:
  186|   510k|static inline int clz(const unsigned int mask) {
  187|   510k|    return __builtin_clz(mask);
  188|   510k|}
recon_tmpl.c:clz:
  186|  16.1M|static inline int clz(const unsigned int mask) {
  187|  16.1M|    return __builtin_clz(mask);
  188|  16.1M|}
cdef_apply_tmpl.c:clz:
  186|   317k|static inline int clz(const unsigned int mask) {
  187|   317k|    return __builtin_clz(mask);
  188|   317k|}
ipred_prepare_tmpl.c:clz:
  186|  6.73M|static inline int clz(const unsigned int mask) {
  187|  6.73M|    return __builtin_clz(mask);
  188|  6.73M|}

fg_apply_tmpl.c:PXSTRIDE:
   79|  35.2k|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  35.2k|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 35.2k, False: 0]
  ------------------
   81|  35.2k|    return x >> 1;
   82|  35.2k|}
itx_tmpl.c:PXSTRIDE:
   79|  4.69M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  4.69M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 4.69M, False: 0]
  ------------------
   81|  4.69M|    return x >> 1;
   82|  4.69M|}
looprestoration_tmpl.c:PXSTRIDE:
   79|  1.35M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  1.35M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 1.35M, False: 0]
  ------------------
   81|  1.35M|    return x >> 1;
   82|  1.35M|}
recon_tmpl.c:PXSTRIDE:
   79|  13.2M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  13.2M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 13.2M, False: 0]
  ------------------
   81|  13.2M|    return x >> 1;
   82|  13.2M|}
cdef_apply_tmpl.c:PXSTRIDE:
   79|  3.36M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  3.36M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 3.36M, False: 0]
  ------------------
   81|  3.36M|    return x >> 1;
   82|  3.36M|}
ipred_prepare_tmpl.c:PXSTRIDE:
   79|  74.0M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  74.0M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 74.0M, False: 0]
  ------------------
   81|  74.0M|    return x >> 1;
   82|  74.0M|}
ipred_prepare_tmpl.c:pixel_set:
   66|  1.09M|static inline void pixel_set(pixel *const dst, const int val, const int num) {
   67|  19.4M|    for (int n = 0; n < num; n++)
  ------------------
  |  Branch (67:21): [True: 18.3M, False: 1.09M]
  ------------------
   68|  18.3M|        dst[n] = val;
   69|  1.09M|}
lf_apply_tmpl.c:PXSTRIDE:
   79|  2.42M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  2.42M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 2.42M, False: 0]
  ------------------
   81|  2.42M|    return x >> 1;
   82|  2.42M|}
lr_apply_tmpl.c:PXSTRIDE:
   79|   521k|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|   521k|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 521k, False: 0]
  ------------------
   81|   521k|    return x >> 1;
   82|   521k|}

lib.c:umin:
   47|  9.51k|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  9.51k|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 0, False: 9.51k]
  ------------------
   49|  9.51k|}
obu.c:ulog2:
   67|  5.50k|static inline int ulog2(const unsigned v) {
   68|  5.50k|    return 31 ^ clz(v);
   69|  5.50k|}
obu.c:imin:
   39|   271k|static inline int imin(const int a, const int b) {
   40|   271k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 212k, False: 58.5k]
  ------------------
   41|   271k|}
obu.c:imax:
   35|   209k|static inline int imax(const int a, const int b) {
   36|   209k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 20.1k, False: 189k]
  ------------------
   37|   209k|}
obu.c:iclip_u8:
   55|  81.3k|static inline int iclip_u8(const int v) {
   56|  81.3k|    return iclip(v, 0, 255);
   57|  81.3k|}
obu.c:iclip:
   51|  81.3k|static inline int iclip(const int v, const int min, const int max) {
   52|  81.3k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 2.16k, False: 79.1k]
  |  Branch (52:28): [True: 926, False: 78.2k]
  ------------------
   53|  81.3k|}
refmvs.c:imin:
   39|  23.2M|static inline int imin(const int a, const int b) {
   40|  23.2M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 10.9M, False: 12.2M]
  ------------------
   41|  23.2M|}
refmvs.c:apply_sign:
   59|   433k|static inline int apply_sign(const int v, const int s) {
   60|   433k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 149k, False: 284k]
  ------------------
   61|   433k|}
refmvs.c:imax:
   35|  11.6M|static inline int imax(const int a, const int b) {
   36|  11.6M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 1.99M, False: 9.66M]
  ------------------
   37|  11.6M|}
refmvs.c:iclip:
   51|  11.2M|static inline int iclip(const int v, const int min, const int max) {
   52|  11.2M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 195k, False: 11.0M]
  |  Branch (52:28): [True: 238k, False: 10.8M]
  ------------------
   53|  11.2M|}
wedge.c:imax:
   35|    256|static inline int imax(const int a, const int b) {
   36|    256|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 128, False: 128]
  ------------------
   37|    256|}
wedge.c:imin:
   39|  2.48k|static inline int imin(const int a, const int b) {
   40|  2.48k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 1.41k, False: 1.06k]
  ------------------
   41|  2.48k|}
fg_apply_tmpl.c:imin:
   39|  14.8k|static inline int imin(const int a, const int b) {
   40|  14.8k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 5.37k, False: 9.50k]
  ------------------
   41|  14.8k|}
cdf.c:imin:
   39|   132k|static inline int imin(const int a, const int b) {
   40|   132k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 33.1k, False: 99.3k]
  ------------------
   41|   132k|}
decode.c:iclip:
   51|  1.09M|static inline int iclip(const int v, const int min, const int max) {
   52|  1.09M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 68.1k, False: 1.02M]
  |  Branch (52:28): [True: 25.8k, False: 999k]
  ------------------
   53|  1.09M|}
decode.c:apply_sign:
   59|   123k|static inline int apply_sign(const int v, const int s) {
   60|   123k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 52.3k, False: 70.7k]
  ------------------
   61|   123k|}
decode.c:ulog2:
   67|  6.75M|static inline int ulog2(const unsigned v) {
   68|  6.75M|    return 31 ^ clz(v);
   69|  6.75M|}
decode.c:imax:
   35|  6.67M|static inline int imax(const int a, const int b) {
   36|  6.67M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.50M, False: 4.16M]
  ------------------
   37|  6.67M|}
decode.c:imin:
   39|  16.3M|static inline int imin(const int a, const int b) {
   40|  16.3M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 9.89M, False: 6.42M]
  ------------------
   41|  16.3M|}
decode.c:iclip_u8:
   55|   741k|static inline int iclip_u8(const int v) {
   56|   741k|    return iclip(v, 0, 255);
   57|   741k|}
getbits.c:ulog2:
   67|  86.6k|static inline int ulog2(const unsigned v) {
   68|  86.6k|    return 31 ^ clz(v);
   69|  86.6k|}
getbits.c:inv_recenter:
   75|  40.6k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|  40.6k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 2.09k, False: 38.5k]
  ------------------
   77|  2.09k|        return v;
   78|  38.5k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 25.1k, False: 13.3k]
  ------------------
   79|  25.1k|        return (v >> 1) + r;
   80|  13.3k|    else
   81|  13.3k|        return r - ((v + 1) >> 1);
   82|  40.6k|}
lf_mask.c:imin:
   39|  25.4M|static inline int imin(const int a, const int b) {
   40|  25.4M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 2.61M, False: 22.8M]
  ------------------
   41|  25.4M|}
lf_mask.c:ulog2:
   67|  1.78M|static inline int ulog2(const unsigned v) {
   68|  1.78M|    return 31 ^ clz(v);
   69|  1.78M|}
lf_mask.c:imax:
   35|   968k|static inline int imax(const int a, const int b) {
   36|   968k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 909k, False: 58.7k]
  ------------------
   37|   968k|}
lf_mask.c:iclip:
   51|  2.72M|static inline int iclip(const int v, const int min, const int max) {
   52|  2.72M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 273k, False: 2.45M]
  |  Branch (52:28): [True: 45.1k, False: 2.40M]
  ------------------
   53|  2.72M|}
msac.c:inv_recenter:
   75|   131k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|   131k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 29.9k, False: 101k]
  ------------------
   77|  29.9k|        return v;
   78|   101k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 47.8k, False: 53.7k]
  ------------------
   79|  47.8k|        return (v >> 1) + r;
   80|  53.7k|    else
   81|  53.7k|        return r - ((v + 1) >> 1);
   82|   131k|}
warpmv.c:apply_sign:
   59|   474k|static inline int apply_sign(const int v, const int s) {
   60|   474k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 98.1k, False: 376k]
  ------------------
   61|   474k|}
warpmv.c:ulog2:
   67|  94.8k|static inline int ulog2(const unsigned v) {
   68|  94.8k|    return 31 ^ clz(v);
   69|  94.8k|}
warpmv.c:apply_sign64:
   63|   635k|static inline int apply_sign64(const int v, const int64_t s) {
   64|   635k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 55.8k, False: 580k]
  ------------------
   65|   635k|}
warpmv.c:iclip:
   51|   914k|static inline int iclip(const int v, const int min, const int max) {
   52|   914k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 16.2k, False: 898k]
  |  Branch (52:28): [True: 24.0k, False: 874k]
  ------------------
   53|   914k|}
warpmv.c:u64log2:
   71|  89.2k|static inline int u64log2(const uint64_t v) {
   72|  89.2k|    return 63 ^ clzll(v);
   73|  89.2k|}
itx_tmpl.c:iclip:
   51|   312M|static inline int iclip(const int v, const int min, const int max) {
   52|   312M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 15.0M, False: 297M]
  |  Branch (52:28): [True: 15.7M, False: 281M]
  ------------------
   53|   312M|}
itx_tmpl.c:imin:
   39|   202k|static inline int imin(const int a, const int b) {
   40|   202k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 43.4k, False: 159k]
  ------------------
   41|   202k|}
looprestoration_tmpl.c:iclip:
   51|  57.9M|static inline int iclip(const int v, const int min, const int max) {
   52|  57.9M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 6.14k, False: 57.9M]
  |  Branch (52:28): [True: 5.03k, False: 57.9M]
  ------------------
   53|  57.9M|}
looprestoration_tmpl.c:imax:
   35|  60.2M|static inline int imax(const int a, const int b) {
   36|  60.2M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 3.65M, False: 56.5M]
  ------------------
   37|  60.2M|}
looprestoration_tmpl.c:umin:
   47|  60.2M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  60.2M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 60.2M, False: 25.1k]
  ------------------
   49|  60.2M|}
recon_tmpl.c:ulog2:
   67|  16.1M|static inline int ulog2(const unsigned v) {
   68|  16.1M|    return 31 ^ clz(v);
   69|  16.1M|}
recon_tmpl.c:imin:
   39|  53.2M|static inline int imin(const int a, const int b) {
   40|  53.2M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 44.4M, False: 8.82M]
  ------------------
   41|  53.2M|}
recon_tmpl.c:imax:
   35|  3.69M|static inline int imax(const int a, const int b) {
   36|  3.69M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.13M, False: 1.56M]
  ------------------
   37|  3.69M|}
recon_tmpl.c:umin:
   47|   161M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|   161M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 90.9M, False: 70.2M]
  ------------------
   49|   161M|}
recon_tmpl.c:apply_sign64:
   63|  2.39M|static inline int apply_sign64(const int v, const int64_t s) {
   64|  2.39M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 219k, False: 2.18M]
  ------------------
   65|  2.39M|}
recon_tmpl.c:iclip:
   51|   761k|static inline int iclip(const int v, const int min, const int max) {
   52|   761k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 57.1k, False: 704k]
  |  Branch (52:28): [True: 7.90k, False: 696k]
  ------------------
   53|   761k|}
itx_1d.c:iclip:
   51|   800M|static inline int iclip(const int v, const int min, const int max) {
   52|   800M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 8.78M, False: 791M]
  |  Branch (52:28): [True: 8.59M, False: 783M]
  ------------------
   53|   800M|}
scan.c:imax:
   35|  3.34k|static inline int imax(const int a, const int b) {
   36|  3.34k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.82k, False: 523]
  ------------------
   37|  3.34k|}
cdef_apply_tmpl.c:imin:
   39|  1.78M|static inline int imin(const int a, const int b) {
   40|  1.78M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 1.52M, False: 261k]
  ------------------
   41|  1.78M|}
cdef_apply_tmpl.c:ulog2:
   67|   317k|static inline int ulog2(const unsigned v) {
   68|   317k|    return 31 ^ clz(v);
   69|   317k|}
ipred_prepare_tmpl.c:imin:
   39|  20.5M|static inline int imin(const int a, const int b) {
   40|  20.5M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 18.1M, False: 2.39M]
  ------------------
   41|  20.5M|}
lf_apply_tmpl.c:imin:
   39|   637k|static inline int imin(const int a, const int b) {
   40|   637k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 205k, False: 431k]
  ------------------
   41|   637k|}
lr_apply_tmpl.c:imin:
   39|  88.6k|static inline int imin(const int a, const int b) {
   40|  88.6k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 37.1k, False: 51.4k]
  ------------------
   41|  88.6k|}

dav1d_cdef_brow_8bpc:
  102|  68.9k|{
  103|  68.9k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|  68.9k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [True: 68.9k, Folded]
  ------------------
  105|  68.9k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|  68.9k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 66.0k, False: 2.86k]
  ------------------
  107|  68.9k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|  68.9k|    const int sbsz = 16;
  109|  68.9k|    const int sb64w = f->sb128w << 1;
  110|  68.9k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|  68.9k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|  68.9k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|  68.9k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|  68.9k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|  68.9k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|  68.9k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|  68.9k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|  68.9k|    const int have_tt = f->c->n_tc > 1;
  119|  68.9k|    const int sb128 = f->seq_hdr->sb128;
  120|  68.9k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|  68.9k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  68.9k|#define PXSTRIDE(x) (x)
  ------------------
  122|  68.9k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  68.9k|#define PXSTRIDE(x) (x)
  ------------------
  123|       |
  124|   391k|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 322k, False: 68.9k]
  ------------------
  125|   322k|        const int tf = tc->top_pre_cdef_toggle;
  126|   322k|        const int by_idx = (by & 30) >> 1;
  127|   322k|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 2.63k, False: 319k]
  ------------------
  128|       |
  129|   322k|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 322k, False: 0]
  |  Branch (129:26): [True: 0, False: 0]
  |  Branch (129:41): [True: 0, False: 0]
  ------------------
  130|   322k|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 319k, False: 2.63k]
  ------------------
  131|   319k|        {
  132|       |            // backup pre-filter data for next iteration
  133|   319k|            pixel *const cdef_top_bak[3] = {
  134|   319k|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|   319k|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|   319k|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|   319k|            };
  138|   319k|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|   319k|        }
  140|       |
  141|   322k|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|   322k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   322k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|   322k|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|   322k|        edges &= ~CDEF_HAVE_LEFT;
  144|   322k|        edges |= CDEF_HAVE_RIGHT;
  145|   322k|        enum Backup2x8Flags prev_flag = 0;
  146|  1.22M|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 903k, False: 322k]
  ------------------
  147|   903k|            const int sb128x = sbx >> 1;
  148|   903k|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|   903k|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|   903k|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 672k, False: 230k]
  ------------------
  151|   230k|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 96.5k, False: 133k]
  ------------------
  152|  96.5k|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 85.0k, False: 11.4k]
  ------------------
  153|   758k|            {
  154|   758k|                prev_flag = 0;
  155|   758k|                goto next_sb;
  156|   758k|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|   145k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|   145k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|   145k|                                                    noskip_row[0][0];
  162|       |
  163|   145k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|   145k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|   145k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|   145k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|   145k|            int y_sec_lvl = y_lvl & 3;
  169|   145k|            y_sec_lvl += y_sec_lvl == 3;
  170|   145k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|   145k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|   145k|            int uv_sec_lvl = uv_lvl & 3;
  174|   145k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|   145k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|   145k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|  1.16M|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 1.02M, False: 145k]
  ------------------
  179|  1.02M|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|  1.02M|            {
  181|  1.02M|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 30.4k, False: 990k]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|  1.02M|                const uint32_t bx_mask = 3U << (bx & 30);
  186|  1.02M|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 178k, False: 841k]
  ------------------
  187|   178k|                    prev_flag = 0;
  188|   178k|                    goto next_b;
  189|   178k|                }
  190|   841k|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|   841k|                prev_flag = flag;
  192|   841k|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 61.6k, False: 779k]
  |  Branch (192:32): [True: 33.9k, False: 27.7k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|  33.9k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|  33.9k|                }
  197|   841k|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 815k, False: 26.4k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|   815k|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|   815k|                }
  201|       |
  202|   841k|                int dir;
  203|   841k|                unsigned variance;
  204|   841k|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 640k, False: 201k]
  |  Branch (204:34): [True: 115k, False: 85.9k]
  ------------------
  205|   755k|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|   755k|                                        &variance HIGHBD_CALL_SUFFIX);
  207|       |
  208|   841k|                const pixel *top, *bot;
  209|   841k|                ptrdiff_t offset;
  210|       |
  211|   841k|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 841k, False: 0]
  ------------------
  212|      0|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 0, False: 0]
  |  Branch (212:36): [True: 0, False: 0]
  ------------------
  213|      0|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 0, False: 0]
  ------------------
  214|      0|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|      0|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|      0|                    } else {
  217|      0|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|      0|                        top = &f->lf.lr_lpf_line[0][offset];
  219|      0|                    }
  220|      0|                    bot = bptrs[0] + 8 * y_stride;
  221|      0|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 0, False: 0]
  |  Branch (221:44): [True: 0, False: 0]
  ------------------
  222|      0|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|      0|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 0, False: 0]
  ------------------
  224|      0|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|      0|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|      0|                    } else {
  227|      0|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|      0|                        offset = line * y_stride + bx * 4;
  229|      0|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|      0|                    }
  231|      0|                } else {
  232|   841k|            st_y:;
  233|   841k|                    offset = sby * 4 * y_stride;
  234|   841k|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|   841k|                    bot = bptrs[0] + 8 * y_stride;
  236|   841k|                }
  237|   841k|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 640k, False: 201k]
  ------------------
  238|   640k|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|   640k|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 340k, False: 299k]
  |  Branch (239:42): [True: 202k, False: 97.4k]
  ------------------
  240|   542k|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|   542k|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|   542k|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  243|   640k|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 126k, False: 75.0k]
  ------------------
  244|   126k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|   126k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|   126k|                                    edges HIGHBD_CALL_SUFFIX);
  247|       |
  248|   841k|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 160k, False: 681k]
  ------------------
  249|   841k|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 681k, False: 0]
  ------------------
  250|       |
  251|   681k|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 601k, False: 79.8k]
  ------------------
  252|  2.04M|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 1.36M, False: 681k]
  ------------------
  253|  1.36M|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 1.36M, False: 0]
  ------------------
  254|      0|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 0, False: 0]
  |  Branch (254:40): [True: 0, False: 0]
  ------------------
  255|      0|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 0, False: 0]
  ------------------
  256|      0|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|      0|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|      0|                        } else {
  259|      0|                            const int line = sby * (4 << sb128) - 4;
  260|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|      0|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|      0|                        }
  263|      0|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|      0|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 0, False: 0]
  |  Branch (264:48): [True: 0, False: 0]
  ------------------
  265|      0|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|      0|                                                     (bx * 4 >> ss_hor);
  267|      0|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|      0|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 0, False: 0]
  ------------------
  269|      0|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|      0|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|      0|                        } else {
  272|      0|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|      0|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|      0|                        }
  276|      0|                    } else {
  277|  1.36M|                st_uv:;
  278|  1.36M|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|  1.36M|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|  1.36M|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|  1.36M|                    }
  282|  1.36M|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|  1.36M|                                         lr_bak[bit][pl], top, bot,
  284|  1.36M|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|  1.36M|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  286|  1.36M|                }
  287|       |
  288|   841k|            skip_uv:
  289|   841k|                bit ^= 1;
  290|       |
  291|  1.02M|            next_b:
  292|  1.02M|                bptrs[0] += 8;
  293|  1.02M|                bptrs[1] += 8 >> ss_hor;
  294|  1.02M|                bptrs[2] += 8 >> ss_hor;
  295|  1.02M|            }
  296|       |
  297|   903k|        next_sb:
  298|   903k|            iptrs[0] += sbsz * 4;
  299|   903k|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|   903k|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|   903k|        }
  302|       |
  303|   322k|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   322k|#define PXSTRIDE(x) (x)
  ------------------
  304|   322k|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   322k|#define PXSTRIDE(x) (x)
  ------------------
  305|   322k|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   322k|#define PXSTRIDE(x) (x)
  ------------------
  306|   322k|        tc->top_pre_cdef_toggle ^= 1;
  307|   322k|    }
  308|  68.9k|}
cdef_apply_tmpl.c:backup2lines:
   44|   588k|{
   45|   588k|    const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
  ------------------
  |  |   53|   588k|#define PXSTRIDE(x) (x)
  ------------------
   46|   588k|    if (y_stride < 0)
  ------------------
  |  Branch (46:9): [True: 0, False: 588k]
  ------------------
   47|      0|        pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   48|   588k|    else
   49|   588k|        pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
  ------------------
  |  |   47|   588k|#define pixel_copy memcpy
  ------------------
   50|       |
   51|   588k|    if (layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (51:9): [True: 224k, False: 363k]
  ------------------
   52|   224k|        const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
  ------------------
  |  |   53|   224k|#define PXSTRIDE(x) (x)
  ------------------
   53|   224k|        if (uv_stride < 0) {
  ------------------
  |  Branch (53:13): [True: 0, False: 224k]
  ------------------
   54|      0|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
  ------------------
  |  Branch (54:32): [True: 0, False: 0]
  ------------------
   55|      0|            pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   56|      0|            pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   57|   224k|        } else {
   58|   224k|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
  ------------------
  |  Branch (58:32): [True: 105k, False: 118k]
  ------------------
   59|   224k|            pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|   224k|#define pixel_copy memcpy
  ------------------
   60|   224k|            pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|   224k|#define pixel_copy memcpy
  ------------------
   61|   224k|        }
   62|   224k|    }
   63|   588k|}
cdef_apply_tmpl.c:backup2x8:
   70|  1.07M|{
   71|  1.07M|    ptrdiff_t y_off = 0;
   72|  1.07M|    if (flag & BACKUP_2X8_Y) {
  ------------------
  |  Branch (72:9): [True: 955k, False: 117k]
  ------------------
   73|  8.60M|        for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
  ------------------
  |  |   53|  7.64M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (73:25): [True: 7.64M, False: 955k]
  ------------------
   74|  7.64M|            pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  7.64M|#define pixel_copy memcpy
  ------------------
   75|   955k|    }
   76|       |
   77|  1.07M|    if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
  ------------------
  |  Branch (77:9): [True: 127k, False: 945k]
  |  Branch (77:46): [True: 140k, False: 805k]
  ------------------
   78|   267k|        return;
   79|       |
   80|   805k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   81|   805k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   82|       |
   83|   805k|    x_off >>= ss_hor;
   84|   805k|    y_off = 0;
   85|  4.67M|    for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
  ------------------
  |  |   53|  3.86M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (85:21): [True: 3.86M, False: 805k]
  ------------------
   86|  3.86M|        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  3.86M|#define pixel_copy memcpy
  ------------------
   87|  3.86M|        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  3.86M|#define pixel_copy memcpy
  ------------------
   88|  3.86M|    }
   89|   805k|}
cdef_apply_tmpl.c:adjust_strength:
   91|   813k|static int adjust_strength(const int strength, const unsigned var) {
   92|   813k|    if (!var) return 0;
  ------------------
  |  Branch (92:9): [True: 431k, False: 381k]
  ------------------
   93|   381k|    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
  ------------------
  |  Branch (93:19): [True: 317k, False: 63.9k]
  ------------------
   94|   381k|    return (strength * (4 + i) + 8) >> 4;
   95|   813k|}
dav1d_cdef_brow_16bpc:
  102|  50.2k|{
  103|  50.2k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|  50.2k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [Folded, False: 50.2k]
  ------------------
  105|  50.2k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|  50.2k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 47.5k, False: 2.73k]
  ------------------
  107|  50.2k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|  50.2k|    const int sbsz = 16;
  109|  50.2k|    const int sb64w = f->sb128w << 1;
  110|  50.2k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|  50.2k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|  50.2k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|  50.2k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|  50.2k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|  50.2k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|  50.2k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|  50.2k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|  50.2k|    const int have_tt = f->c->n_tc > 1;
  119|  50.2k|    const int sb128 = f->seq_hdr->sb128;
  120|  50.2k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|  50.2k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  122|  50.2k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  123|       |
  124|   321k|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 270k, False: 50.2k]
  ------------------
  125|   270k|        const int tf = tc->top_pre_cdef_toggle;
  126|   270k|        const int by_idx = (by & 30) >> 1;
  127|   270k|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 2.51k, False: 268k]
  ------------------
  128|       |
  129|   270k|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 270k, False: 0]
  |  Branch (129:26): [True: 0, False: 0]
  |  Branch (129:41): [True: 0, False: 0]
  ------------------
  130|   270k|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 268k, False: 2.51k]
  ------------------
  131|   268k|        {
  132|       |            // backup pre-filter data for next iteration
  133|   268k|            pixel *const cdef_top_bak[3] = {
  134|   268k|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|   268k|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|   268k|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|   268k|            };
  138|   268k|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|   268k|        }
  140|       |
  141|   270k|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|   270k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   270k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|   270k|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|   270k|        edges &= ~CDEF_HAVE_LEFT;
  144|   270k|        edges |= CDEF_HAVE_RIGHT;
  145|   270k|        enum Backup2x8Flags prev_flag = 0;
  146|   927k|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 656k, False: 270k]
  ------------------
  147|   656k|            const int sb128x = sbx >> 1;
  148|   656k|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|   656k|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|   656k|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 545k, False: 110k]
  ------------------
  151|   110k|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 70.3k, False: 40.4k]
  ------------------
  152|  70.3k|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 61.5k, False: 8.80k]
  ------------------
  153|   607k|            {
  154|   607k|                prev_flag = 0;
  155|   607k|                goto next_sb;
  156|   607k|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|  49.2k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|  49.2k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|  49.2k|                                                    noskip_row[0][0];
  162|       |
  163|  49.2k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|  49.2k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|  49.2k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|  49.2k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|  49.2k|            int y_sec_lvl = y_lvl & 3;
  169|  49.2k|            y_sec_lvl += y_sec_lvl == 3;
  170|  49.2k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|  49.2k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|  49.2k|            int uv_sec_lvl = uv_lvl & 3;
  174|  49.2k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|  49.2k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|  49.2k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|   304k|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 255k, False: 49.2k]
  ------------------
  179|   255k|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|   255k|            {
  181|   255k|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 18.6k, False: 236k]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|   255k|                const uint32_t bx_mask = 3U << (bx & 30);
  186|   255k|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 15.4k, False: 240k]
  ------------------
  187|  15.4k|                    prev_flag = 0;
  188|  15.4k|                    goto next_b;
  189|  15.4k|                }
  190|   240k|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|   240k|                prev_flag = flag;
  192|   240k|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 20.5k, False: 219k]
  |  Branch (192:32): [True: 2.37k, False: 18.1k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|  2.37k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|  2.37k|                }
  197|   240k|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 221k, False: 18.2k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|   221k|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|   221k|                }
  201|       |
  202|   240k|                int dir;
  203|   240k|                unsigned variance;
  204|   240k|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 173k, False: 66.9k]
  |  Branch (204:34): [True: 54.5k, False: 12.4k]
  ------------------
  205|   227k|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|   227k|                                        &variance HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   227k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  207|       |
  208|   240k|                const pixel *top, *bot;
  209|   240k|                ptrdiff_t offset;
  210|       |
  211|   240k|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 240k, False: 0]
  ------------------
  212|      0|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 0, False: 0]
  |  Branch (212:36): [True: 0, False: 0]
  ------------------
  213|      0|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 0, False: 0]
  ------------------
  214|      0|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|      0|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|      0|                    } else {
  217|      0|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|      0|                        top = &f->lf.lr_lpf_line[0][offset];
  219|      0|                    }
  220|      0|                    bot = bptrs[0] + 8 * y_stride;
  221|      0|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 0, False: 0]
  |  Branch (221:44): [True: 0, False: 0]
  ------------------
  222|      0|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|      0|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 0, False: 0]
  ------------------
  224|      0|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|      0|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|      0|                    } else {
  227|      0|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|      0|                        offset = line * y_stride + bx * 4;
  229|      0|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|      0|                    }
  231|      0|                } else {
  232|   240k|            st_y:;
  233|   240k|                    offset = sby * 4 * y_stride;
  234|   240k|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|   240k|                    bot = bptrs[0] + 8 * y_stride;
  236|   240k|                }
  237|   240k|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 173k, False: 66.9k]
  ------------------
  238|   173k|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|   173k|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 11.0k, False: 161k]
  |  Branch (239:42): [True: 84.8k, False: 77.1k]
  ------------------
  240|  95.8k|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|  95.8k|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|  95.8k|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  95.8k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  243|   173k|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 23.7k, False: 43.2k]
  ------------------
  244|  23.7k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|  23.7k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|  23.7k|                                    edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  23.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  247|       |
  248|   240k|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 111k, False: 128k]
  ------------------
  249|   240k|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 128k, False: 0]
  ------------------
  250|       |
  251|   128k|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 122k, False: 5.81k]
  ------------------
  252|   384k|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 256k, False: 128k]
  ------------------
  253|   256k|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 256k, False: 0]
  ------------------
  254|      0|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 0, False: 0]
  |  Branch (254:40): [True: 0, False: 0]
  ------------------
  255|      0|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 0, False: 0]
  ------------------
  256|      0|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|      0|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|      0|                        } else {
  259|      0|                            const int line = sby * (4 << sb128) - 4;
  260|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|      0|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|      0|                        }
  263|      0|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|      0|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 0, False: 0]
  |  Branch (264:48): [True: 0, False: 0]
  ------------------
  265|      0|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|      0|                                                     (bx * 4 >> ss_hor);
  267|      0|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|      0|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 0, False: 0]
  ------------------
  269|      0|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|      0|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|      0|                        } else {
  272|      0|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|      0|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|      0|                        }
  276|      0|                    } else {
  277|   256k|                st_uv:;
  278|   256k|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|   256k|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|   256k|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|   256k|                    }
  282|   256k|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|   256k|                                         lr_bak[bit][pl], top, bot,
  284|   256k|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|   256k|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   256k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  286|   256k|                }
  287|       |
  288|   240k|            skip_uv:
  289|   240k|                bit ^= 1;
  290|       |
  291|   255k|            next_b:
  292|   255k|                bptrs[0] += 8;
  293|   255k|                bptrs[1] += 8 >> ss_hor;
  294|   255k|                bptrs[2] += 8 >> ss_hor;
  295|   255k|            }
  296|       |
  297|   656k|        next_sb:
  298|   656k|            iptrs[0] += sbsz * 4;
  299|   656k|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|   656k|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|   656k|        }
  302|       |
  303|   270k|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  304|   270k|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  305|   270k|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  306|   270k|        tc->top_pre_cdef_toggle ^= 1;
  307|   270k|    }
  308|  50.2k|}

dav1d_cdef_dsp_init_8bpc:
  320|  3.41k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  3.41k|    c->dir = cdef_find_dir_c;
  322|  3.41k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  3.41k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  3.41k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  3.41k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  3.41k|#endif
  339|  3.41k|}
dav1d_cdef_dsp_init_16bpc:
  320|  4.61k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  4.61k|    c->dir = cdef_find_dir_c;
  322|  4.61k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  4.61k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  4.61k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  4.61k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  4.61k|#endif
  339|  4.61k|}

dav1d_cdf_thread_update:
 3918|  11.0k|{
 3919|  11.0k|#define update_cdf_1d(n1d, name) \
 3920|  11.0k|    do { \
 3921|  11.0k|        dst->name[n1d] = 0; \
 3922|  11.0k|    } while (0)
 3923|  11.0k|#define update_cdf_2d(n1d, n2d, name) \
 3924|  11.0k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
 3925|  11.0k|#define update_cdf_3d(n1d, n2d, n3d, name) \
 3926|  11.0k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
 3927|  11.0k|#define update_cdf_4d(n1d, n2d, n3d, n4d, name) \
 3928|  11.0k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
 3929|       |
 3930|  11.0k|    memcpy(dst, src, offsetof(CdfContext, m.intrabc));
 3931|       |
 3932|  11.0k|    update_cdf_3d(2, 2, 4, coef.eob_bin_16);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.1k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3933|  11.0k|    update_cdf_3d(2, 2, 5, coef.eob_bin_32);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.1k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3934|  11.0k|    update_cdf_3d(2, 2, 6, coef.eob_bin_64);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.1k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3935|  11.0k|    update_cdf_3d(2, 2, 7, coef.eob_bin_128);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.1k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3936|  11.0k|    update_cdf_3d(2, 2, 8, coef.eob_bin_256);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.1k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3937|  11.0k|    update_cdf_2d(2, 9, coef.eob_bin_512);
  ------------------
  |  | 3924|  33.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  22.0k|    do { \
  |  |  |  | 3921|  22.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  22.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3938|  11.0k|    update_cdf_2d(2, 10, coef.eob_bin_1024);
  ------------------
  |  | 3924|  33.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  22.0k|    do { \
  |  |  |  | 3921|  22.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  22.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3939|  11.0k|    update_cdf_4d(N_TX_SIZES, 2, 4, 2, coef.eob_base_tok);
  ------------------
  |  | 3928|  66.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   165k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   552k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   441k|    do { \
  |  |  |  |  |  |  |  | 3921|   441k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   441k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 441k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 441k, False: 110k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 110k, False: 55.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 55.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3940|  11.0k|    update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 3, coef.base_tok);
  ------------------
  |  | 3928|  66.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   165k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  4.63M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  4.52M|    do { \
  |  |  |  |  |  |  |  | 3921|  4.52M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  4.52M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 4.52M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 4.52M, False: 110k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 110k, False: 55.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 55.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3941|  11.0k|    update_cdf_4d(4, 2, 21, 3, coef.br_tok);
  ------------------
  |  | 3928|  55.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   132k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  1.94M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  1.85M|    do { \
  |  |  |  |  |  |  |  | 3921|  1.85M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  1.85M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 1.85M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 1.85M, False: 88.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 88.3k, False: 44.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 44.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3942|  11.0k|    update_cdf_4d(N_TX_SIZES, 2, 9, 1, coef.eob_hi_bit);
  ------------------
  |  | 3928|  66.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   165k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  1.10M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   993k|    do { \
  |  |  |  |  |  |  |  | 3921|   993k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   993k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 993k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 993k, False: 110k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 110k, False: 55.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 55.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3943|  11.0k|    update_cdf_3d(N_TX_SIZES, 13, 1, coef.skip);
  ------------------
  |  | 3926|  66.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   772k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   717k|    do { \
  |  |  |  |  |  | 3921|   717k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   717k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 717k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 717k, False: 55.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 55.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3944|  11.0k|    update_cdf_3d(2, 3, 1, coef.dc_sign);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  88.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  66.2k|    do { \
  |  |  |  |  |  | 3921|  66.2k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  66.2k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 66.2k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 66.2k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3945|       |
 3946|  11.0k|    update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - 1 - !k, m.uv_mode);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   309k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   287k|    do { \
  |  |  |  |  |  | 3921|   287k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   287k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 287k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 287k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3947|  11.0k|    update_cdf_2d(4, N_PARTITIONS - 3, m.partition[BL_128X128]);
  ------------------
  |  | 3924|  55.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 44.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3948|  44.1k|    for (int k = BL_64X64; k < BL_8X8; k++)
  ------------------
  |  Branch (3948:28): [True: 33.1k, False: 11.0k]
  ------------------
 3949|  33.1k|        update_cdf_2d(4, N_PARTITIONS - 1, m.partition[k]);
  ------------------
  |  | 3924|   165k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   132k|    do { \
  |  |  |  | 3921|   132k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   132k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 132k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 132k, False: 33.1k]
  |  |  ------------------
  ------------------
 3950|  11.0k|    update_cdf_2d(4, N_SUB8X8_PARTITIONS - 1, m.partition[BL_8X8]);
  ------------------
  |  | 3924|  55.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 44.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3951|  11.0k|    update_cdf_2d(6, 15, m.cfl_alpha);
  ------------------
  |  | 3924|  77.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  66.2k|    do { \
  |  |  |  | 3921|  66.2k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  66.2k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 66.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 66.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3952|  11.0k|    update_cdf_2d(2, 15, m.txtp_inter1);
  ------------------
  |  | 3924|  33.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  22.0k|    do { \
  |  |  |  | 3921|  22.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  22.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3953|  11.0k|    update_cdf_1d(11, m.txtp_inter2);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3954|  11.0k|    update_cdf_3d(2, N_INTRA_PRED_MODES, 6, m.txtp_intra1);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   309k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   287k|    do { \
  |  |  |  |  |  | 3921|   287k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   287k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 287k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 287k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3955|  11.0k|    update_cdf_3d(3, N_INTRA_PRED_MODES, 4, m.txtp_intra2);
  ------------------
  |  | 3926|  44.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   463k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   430k|    do { \
  |  |  |  |  |  | 3921|   430k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   430k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 430k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 430k, False: 33.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 33.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3956|  11.0k|    update_cdf_1d(7, m.cfl_sign);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3957|  11.0k|    update_cdf_2d(8, 6, m.angle_delta);
  ------------------
  |  | 3924|  99.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  88.3k|    do { \
  |  |  |  | 3921|  88.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  88.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 88.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 88.3k, False: 11.0k]
  |  |  ------------------
  ------------------
 3958|  11.0k|    update_cdf_1d(4, m.filter_intra);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3959|  11.0k|    update_cdf_2d(3, DAV1D_MAX_SEGMENTS - 1, m.seg_id);
  ------------------
  |  | 3924|  44.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  33.1k|    do { \
  |  |  |  | 3921|  33.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  33.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 33.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 33.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3960|  11.0k|    update_cdf_3d(2, 7, 6, m.pal_sz);
  ------------------
  |  | 3926|  33.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   176k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   154k|    do { \
  |  |  |  |  |  | 3921|   154k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   154k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 154k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 154k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3961|  11.0k|    update_cdf_4d(2, 7, 5, k + 1, m.color_map);
  ------------------
  |  | 3928|  33.1k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   176k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   927k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   772k|    do { \
  |  |  |  |  |  |  |  | 3921|   772k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   772k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 772k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 772k, False: 154k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 154k, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3962|  11.0k|    update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 1, 2), m.txsz);
  ------------------
  |  | 3926|  55.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   176k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   132k|    do { \
  |  |  |  |  |  | 3921|   132k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   132k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 132k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 132k, False: 44.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 44.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3963|  11.0k|    update_cdf_1d(3, m.delta_q);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3964|  11.0k|    update_cdf_2d(5, 3, m.delta_lf);
  ------------------
  |  | 3924|  66.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  55.2k|    do { \
  |  |  |  | 3921|  55.2k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  55.2k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 55.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 55.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3965|  11.0k|    update_cdf_1d(2, m.restore_switchable);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3966|  11.0k|    update_cdf_1d(1, m.restore_wiener);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3967|  11.0k|    update_cdf_1d(1, m.restore_sgrproj);
  ------------------
  |  | 3920|  11.0k|    do { \
  |  | 3921|  11.0k|        dst->name[n1d] = 0; \
  |  | 3922|  11.0k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.0k]
  |  |  ------------------
  ------------------
 3968|  11.0k|    update_cdf_2d(4, 1, m.txtp_inter3);
  ------------------
  |  | 3924|  55.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  44.1k|    do { \
  |  |  |  | 3921|  44.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  44.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 44.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3969|  11.0k|    update_cdf_2d(N_BS_SIZES, 1, m.use_filter_intra);
  ------------------
  |  | 3924|   253k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   242k|    do { \
  |  |  |  | 3921|   242k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   242k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 242k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 242k, False: 11.0k]
  |  |  ------------------
  ------------------
 3970|  11.0k|    update_cdf_3d(7, 3, 1, m.txpart);
  ------------------
  |  | 3926|  88.3k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   309k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   231k|    do { \
  |  |  |  |  |  | 3921|   231k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   231k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 231k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 231k, False: 77.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 77.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3971|  11.0k|    update_cdf_2d(3, 1, m.skip);
  ------------------
  |  | 3924|  44.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  33.1k|    do { \
  |  |  |  | 3921|  33.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  33.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 33.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 33.1k, False: 11.0k]
  |  |  ------------------
  ------------------
 3972|  11.0k|    update_cdf_3d(7, 3, 1, m.pal_y);
  ------------------
  |  | 3926|  88.3k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   309k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   231k|    do { \
  |  |  |  |  |  | 3921|   231k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   231k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 231k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 231k, False: 77.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 77.2k, False: 11.0k]
  |  |  ------------------
  ------------------
 3973|  11.0k|    update_cdf_2d(2, 1, m.pal_uv);
  ------------------
  |  | 3924|  33.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  22.0k|    do { \
  |  |  |  | 3921|  22.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  22.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 22.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 22.0k, False: 11.0k]
  |  |  ------------------
  ------------------
 3974|       |
 3975|  11.0k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|  11.0k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  11.0k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 5.24k, False: 5.79k]
  |  |  ------------------
  ------------------
 3976|  5.24k|        return;
 3977|       |
 3978|  5.79k|    memcpy(dst->m.y_mode, src->m.y_mode,
 3979|  5.79k|           offsetof(CdfContext, kfym) - offsetof(CdfContext, m.y_mode));
 3980|       |
 3981|  5.79k|    update_cdf_2d(4, N_INTRA_PRED_MODES - 1, m.y_mode);
  ------------------
  |  | 3924|  28.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  28.9k|    do { \
  |  |  |  | 3921|  23.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 3982|  5.79k|    update_cdf_2d(9, 15, m.wedge_idx);
  ------------------
  |  | 3924|  57.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  57.9k|    do { \
  |  |  |  | 3921|  52.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  52.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 52.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 52.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 3983|  5.79k|    update_cdf_2d(8, N_COMP_INTER_PRED_MODES - 1, m.comp_inter_mode);
  ------------------
  |  | 3924|  52.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  52.1k|    do { \
  |  |  |  | 3921|  46.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  46.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 46.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 46.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 3984|  5.79k|    update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS - 1, m.filter);
  ------------------
  |  | 3926|  17.3k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   104k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  98.5k|    do { \
  |  |  |  |  |  | 3921|  92.7k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  92.7k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 92.7k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 92.7k, False: 11.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 11.5k, False: 5.79k]
  |  |  ------------------
  ------------------
 3985|  5.79k|    update_cdf_2d(4, 3, m.interintra_mode);
  ------------------
  |  | 3924|  28.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  28.9k|    do { \
  |  |  |  | 3921|  23.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 3986|  5.79k|    update_cdf_2d(N_BS_SIZES, 2, m.motion_mode);
  ------------------
  |  | 3924|   133k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   133k|    do { \
  |  |  |  | 3921|   127k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   127k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 127k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 127k, False: 5.79k]
  |  |  ------------------
  ------------------
 3987|  5.79k|    update_cdf_2d(3, 1, m.skip_mode);
  ------------------
  |  | 3924|  23.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.1k|    do { \
  |  |  |  | 3921|  17.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 3988|  5.79k|    update_cdf_2d(6, 1, m.newmv_mode);
  ------------------
  |  | 3924|  40.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  40.5k|    do { \
  |  |  |  | 3921|  34.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.7k, False: 5.79k]
  |  |  ------------------
  ------------------
 3989|  5.79k|    update_cdf_2d(2, 1, m.globalmv_mode);
  ------------------
  |  | 3924|  17.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.3k|    do { \
  |  |  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  11.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 11.5k, False: 5.79k]
  |  |  ------------------
  ------------------
 3990|  5.79k|    update_cdf_2d(6, 1, m.refmv_mode);
  ------------------
  |  | 3924|  40.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  40.5k|    do { \
  |  |  |  | 3921|  34.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.7k, False: 5.79k]
  |  |  ------------------
  ------------------
 3991|  5.79k|    update_cdf_2d(3, 1, m.drl_bit);
  ------------------
  |  | 3924|  23.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.1k|    do { \
  |  |  |  | 3921|  17.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 3992|  5.79k|    update_cdf_2d(4, 1, m.intra);
  ------------------
  |  | 3924|  28.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  28.9k|    do { \
  |  |  |  | 3921|  23.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 3993|  5.79k|    update_cdf_2d(5, 1, m.comp);
  ------------------
  |  | 3924|  34.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.7k|    do { \
  |  |  |  | 3921|  28.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  28.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 28.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 28.9k, False: 5.79k]
  |  |  ------------------
  ------------------
 3994|  5.79k|    update_cdf_2d(5, 1, m.comp_dir);
  ------------------
  |  | 3924|  34.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.7k|    do { \
  |  |  |  | 3921|  28.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  28.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 28.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 28.9k, False: 5.79k]
  |  |  ------------------
  ------------------
 3995|  5.79k|    update_cdf_2d(6, 1, m.jnt_comp);
  ------------------
  |  | 3924|  40.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  40.5k|    do { \
  |  |  |  | 3921|  34.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.7k, False: 5.79k]
  |  |  ------------------
  ------------------
 3996|  5.79k|    update_cdf_2d(6, 1, m.mask_comp);
  ------------------
  |  | 3924|  40.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  40.5k|    do { \
  |  |  |  | 3921|  34.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.7k, False: 5.79k]
  |  |  ------------------
  ------------------
 3997|  5.79k|    update_cdf_2d(9, 1, m.wedge_comp);
  ------------------
  |  | 3924|  57.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  57.9k|    do { \
  |  |  |  | 3921|  52.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  52.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 52.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 52.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 3998|  5.79k|    update_cdf_3d(6, 3, 1, m.ref);
  ------------------
  |  | 3926|  40.5k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   139k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   110k|    do { \
  |  |  |  |  |  | 3921|   104k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   104k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 104k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 104k, False: 34.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 34.7k, False: 5.79k]
  |  |  ------------------
  ------------------
 3999|  5.79k|    update_cdf_3d(3, 3, 1, m.comp_fwd_ref);
  ------------------
  |  | 3926|  23.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  69.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  57.9k|    do { \
  |  |  |  |  |  | 3921|  52.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  52.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 52.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 52.1k, False: 17.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 4000|  5.79k|    update_cdf_3d(2, 3, 1, m.comp_bwd_ref);
  ------------------
  |  | 3926|  17.3k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  46.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  40.5k|    do { \
  |  |  |  |  |  | 3921|  34.7k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.7k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.7k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.7k, False: 11.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 11.5k, False: 5.79k]
  |  |  ------------------
  ------------------
 4001|  5.79k|    update_cdf_3d(3, 3, 1, m.comp_uni_ref);
  ------------------
  |  | 3926|  23.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  69.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  57.9k|    do { \
  |  |  |  |  |  | 3921|  52.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  52.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 52.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 52.1k, False: 17.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 4002|  5.79k|    update_cdf_2d(3, 1, m.seg_pred);
  ------------------
  |  | 3924|  23.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.1k|    do { \
  |  |  |  | 3921|  17.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.3k, False: 5.79k]
  |  |  ------------------
  ------------------
 4003|  5.79k|    update_cdf_2d(4, 1, m.interintra);
  ------------------
  |  | 3924|  28.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  28.9k|    do { \
  |  |  |  | 3921|  23.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.1k, False: 5.79k]
  |  |  ------------------
  ------------------
 4004|  5.79k|    update_cdf_2d(7, 1, m.interintra_wedge);
  ------------------
  |  | 3924|  46.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  46.3k|    do { \
  |  |  |  | 3921|  40.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  40.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 40.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 40.5k, False: 5.79k]
  |  |  ------------------
  ------------------
 4005|  5.79k|    update_cdf_2d(N_BS_SIZES, 1, m.obmc);
  ------------------
  |  | 3924|   133k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   133k|    do { \
  |  |  |  | 3921|   127k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   127k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 127k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 127k, False: 5.79k]
  |  |  ------------------
  ------------------
 4006|       |
 4007|  17.3k|    for (int k = 0; k < 2; k++) {
  ------------------
  |  Branch (4007:21): [True: 11.5k, False: 5.79k]
  ------------------
 4008|  11.5k|        update_cdf_1d(10, mv.comp[k].classes);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4009|  11.5k|        update_cdf_1d(1, mv.comp[k].sign);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4010|  11.5k|        update_cdf_1d(1, mv.comp[k].class0);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4011|  11.5k|        update_cdf_2d(2, 3, mv.comp[k].class0_fp);
  ------------------
  |  | 3924|  34.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.1k|    do { \
  |  |  |  | 3921|  23.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.1k, False: 11.5k]
  |  |  ------------------
  ------------------
 4012|  11.5k|        update_cdf_1d(1, mv.comp[k].class0_hp);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4013|  11.5k|        update_cdf_2d(10, 1, mv.comp[k].classN);
  ------------------
  |  | 3924|   127k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   115k|    do { \
  |  |  |  | 3921|   115k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   115k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 115k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 115k, False: 11.5k]
  |  |  ------------------
  ------------------
 4014|  11.5k|        update_cdf_1d(3, mv.comp[k].classN_fp);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4015|  11.5k|        update_cdf_1d(1, mv.comp[k].classN_hp);
  ------------------
  |  | 3920|  11.5k|    do { \
  |  | 3921|  11.5k|        dst->name[n1d] = 0; \
  |  | 3922|  11.5k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.5k]
  |  |  ------------------
  ------------------
 4016|  11.5k|    }
 4017|  5.79k|    update_cdf_1d(N_MV_JOINTS - 1, mv.joint);
  ------------------
  |  | 3920|  5.79k|    do { \
  |  | 3921|  5.79k|        dst->name[n1d] = 0; \
  |  | 3922|  5.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 5.79k]
  |  |  ------------------
  ------------------
 4018|  5.79k|}
dav1d_cdf_thread_init_static:
 4023|  31.2k|void dav1d_cdf_thread_init_static(CdfThreadContext *const cdf, const unsigned qidx) {
 4024|       |    cdf->ref = NULL;
 4025|  31.2k|    cdf->data.qcat = (qidx > 20) + (qidx > 60) + (qidx > 120);
 4026|  31.2k|}
dav1d_cdf_thread_copy:
 4028|  59.5k|void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const src) {
 4029|  59.5k|    if (src->ref) {
  ------------------
  |  Branch (4029:9): [True: 12.3k, False: 47.2k]
  ------------------
 4030|  12.3k|        memcpy(dst, src->data.cdf, sizeof(*dst));
 4031|  47.2k|    } else {
 4032|  47.2k|        dst->coef = default_coef_cdf[src->data.qcat];
 4033|  47.2k|        memcpy(&dst->m, &default_cdf.m,
 4034|  47.2k|               offsetof(CdfDefaultContext, mv.joint));
 4035|  47.2k|        memcpy(&dst->mv.comp[1], &default_cdf.mv.comp,
 4036|       |               sizeof(default_cdf) - offsetof(CdfDefaultContext, mv.comp));
 4037|  47.2k|    }
 4038|  59.5k|}
dav1d_cdf_thread_alloc:
 4042|  13.1k|{
 4043|  13.1k|    cdf->ref = dav1d_ref_create_using_pool(c->cdf_pool,
 4044|  13.1k|                                           sizeof(CdfContext) + sizeof(atomic_uint));
 4045|  13.1k|    if (!cdf->ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (4045:9): [True: 0, False: 13.1k]
  ------------------
 4046|  13.1k|    cdf->data.cdf = cdf->ref->data;
 4047|  13.1k|    if (have_frame_mt) {
  ------------------
  |  Branch (4047:9): [True: 0, False: 13.1k]
  ------------------
 4048|      0|        cdf->progress = (atomic_uint *) &cdf->data.cdf[1];
 4049|       |        atomic_init(cdf->progress, 0);
 4050|      0|    }
 4051|  13.1k|    return 0;
 4052|  13.1k|}
dav1d_cdf_thread_ref:
 4056|   314k|{
 4057|   314k|    *dst = *src;
 4058|   314k|    if (src->ref)
  ------------------
  |  Branch (4058:9): [True: 82.2k, False: 232k]
  ------------------
 4059|  82.2k|        dav1d_ref_inc(src->ref);
 4060|   314k|}
dav1d_cdf_thread_unref:
 4062|   736k|void dav1d_cdf_thread_unref(CdfThreadContext *const cdf) {
 4063|       |    memset(&cdf->data, 0, sizeof(*cdf) - offsetof(CdfThreadContext, data));
 4064|   736k|    dav1d_ref_dec(&cdf->ref);
 4065|   736k|}

dav1d_init_cpu:
   63|      1|COLD void dav1d_init_cpu(void) {
   64|      1|#if HAVE_ASM && !__has_feature(memory_sanitizer)
   65|       |// memory sanitizer is inherently incompatible with asm
   66|       |#if ARCH_AARCH64 || ARCH_ARM
   67|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_arm();
   68|       |#elif ARCH_LOONGARCH
   69|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_loongarch();
   70|       |#elif ARCH_PPC64LE
   71|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_ppc();
   72|       |#elif ARCH_RISCV
   73|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_riscv();
   74|       |#elif ARCH_X86
   75|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_x86();
   76|      1|#endif
   77|      1|#endif
   78|      1|}

cpu.c:dav1d_get_default_cpu_flags:
   58|      1|static ALWAYS_INLINE unsigned dav1d_get_default_cpu_flags(void) {
   59|      1|    unsigned flags = 0;
   60|       |
   61|       |#if ARCH_AARCH64 || ARCH_ARM
   62|       |#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64
   63|       |    flags |= DAV1D_ARM_CPU_FLAG_NEON;
   64|       |#endif
   65|       |#ifdef __ARM_FEATURE_DOTPROD
   66|       |    flags |= DAV1D_ARM_CPU_FLAG_DOTPROD;
   67|       |#endif
   68|       |#ifdef __ARM_FEATURE_MATMUL_INT8
   69|       |    flags |= DAV1D_ARM_CPU_FLAG_I8MM;
   70|       |#endif
   71|       |#if ARCH_AARCH64
   72|       |#ifdef __ARM_FEATURE_SVE
   73|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE;
   74|       |#endif
   75|       |#ifdef __ARM_FEATURE_SVE2
   76|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE2;
   77|       |#endif
   78|       |#endif /* ARCH_AARCH64 */
   79|       |#elif ARCH_PPC64LE
   80|       |#if defined(__VSX__)
   81|       |    flags |= DAV1D_PPC_CPU_FLAG_VSX;
   82|       |#endif
   83|       |#if defined(__POWER9_VECTOR__)
   84|       |    flags |= DAV1D_PPC_CPU_FLAG_PWR9;
   85|       |#endif
   86|       |#elif ARCH_RISCV
   87|       |#if defined(__riscv_v)
   88|       |    flags |= DAV1D_RISCV_CPU_FLAG_V;
   89|       |#endif
   90|       |#elif ARCH_X86
   91|       |#if defined(__AVX512F__) && defined(__AVX512CD__) && \
   92|       |    defined(__AVX512BW__) && defined(__AVX512DQ__) && \
   93|       |    defined(__AVX512VL__) && defined(__AVX512VNNI__) && \
   94|       |    defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \
   95|       |    defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
   96|       |    defined(__AVX512BITALG__) && defined(__GFNI__) && \
   97|       |    defined(__VAES__) && defined(__VPCLMULQDQ__)
   98|       |    flags |= DAV1D_X86_CPU_FLAG_AVX512ICL |
   99|       |             DAV1D_X86_CPU_FLAG_AVX2 |
  100|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  101|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  102|       |             DAV1D_X86_CPU_FLAG_SSE2;
  103|       |#elif defined(__AVX2__)
  104|       |    flags |= DAV1D_X86_CPU_FLAG_AVX2 |
  105|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  106|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  107|       |             DAV1D_X86_CPU_FLAG_SSE2;
  108|       |#elif defined(__SSE4_1__) || defined(__AVX__)
  109|       |    flags |= DAV1D_X86_CPU_FLAG_SSE41 |
  110|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  111|       |             DAV1D_X86_CPU_FLAG_SSE2;
  112|       |#elif defined(__SSSE3__)
  113|       |    flags |= DAV1D_X86_CPU_FLAG_SSSE3 |
  114|       |             DAV1D_X86_CPU_FLAG_SSE2;
  115|       |#elif ARCH_X86_64 || defined(__SSE2__) || \
  116|       |      (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
  117|       |    flags |= DAV1D_X86_CPU_FLAG_SSE2;
  118|      1|#endif
  119|      1|#endif
  120|       |
  121|      1|    return flags;
  122|      1|}
pal.c:dav1d_get_cpu_flags:
  124|  9.51k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.51k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.51k|    return flags;
  134|  9.51k|}
refmvs.c:dav1d_get_cpu_flags:
  124|  9.51k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.51k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.51k|    return flags;
  134|  9.51k|}
msac.c:dav1d_get_cpu_flags:
  124|  46.3k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  46.3k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  46.3k|    return flags;
  134|  46.3k|}
cdef_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
filmgrain_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
ipred_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
itx_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
loopfilter_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
looprestoration_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}
mc_tmpl.c:dav1d_get_cpu_flags:
  124|  8.03k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.03k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.03k|    return flags;
  134|  8.03k|}

ctx.c:memset_w1:
   34|  17.8M|static void memset_w1(void *const ptr, const int value) {
   35|  17.8M|    set_ctx1((uint8_t *) ptr, 0, value);
  ------------------
  |  |   56|  17.8M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  ------------------
   36|  17.8M|}
ctx.c:memset_w2:
   38|  8.45M|static void memset_w2(void *const ptr, const int value) {
   39|  8.45M|    set_ctx2((uint8_t *) ptr, 0, value);
  ------------------
  |  |   58|  8.45M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  ------------------
   40|  8.45M|}
ctx.c:memset_w4:
   42|  7.00M|static void memset_w4(void *const ptr, const int value) {
   43|  7.00M|    set_ctx4((uint8_t *) ptr, 0, value);
  ------------------
  |  |   60|  7.00M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  ------------------
   44|  7.00M|}
ctx.c:memset_w8:
   46|  5.34M|static void memset_w8(void *const ptr, const int value) {
   47|  5.34M|    set_ctx8((uint8_t *) ptr, 0, value);
  ------------------
  |  |   62|  5.34M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  ------------------
   48|  5.34M|}
ctx.c:memset_w16:
   50|  2.18M|static void memset_w16(void *const ptr, const int value) {
   51|  2.18M|    set_ctx16((uint8_t *) ptr, 0, value);
  ------------------
  |  |   63|  2.18M|#define set_ctx16(var, off, val) do { \
  |  |   64|  2.18M|        memset(&(var)[off], val, 16); \
  |  |   65|  2.18M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (65:14): [Folded, False: 2.18M]
  |  |  ------------------
  ------------------
   52|  2.18M|}
ctx.c:memset_w32:
   54|   228k|static void memset_w32(void *const ptr, const int value) {
   55|   228k|    set_ctx32((uint8_t *) ptr, 0, value);
  ------------------
  |  |   66|   228k|#define set_ctx32(var, off, val) do { \
  |  |   67|   228k|        memset(&(var)[off], val, 32); \
  |  |   68|   228k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (68:14): [Folded, False: 228k]
  |  |  ------------------
  ------------------
   56|   228k|}

lf_mask.c:dav1d_memset_likely_pow2:
   44|  1.87M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  1.87M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 1.87M, False: 0]
  |  Branch (45:5): [True: 1.87M, False: 0]
  ------------------
   46|  1.87M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 1.78M, False: 90.8k]
  ------------------
   47|  1.78M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  1.78M|    } else {
   49|  90.8k|        memset(ptr, value, n);
   50|  90.8k|    }
   51|  1.87M|}
recon_tmpl.c:dav1d_memset_likely_pow2:
   44|  15.3M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  15.3M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 15.3M, False: 0]
  |  Branch (45:5): [True: 15.3M, False: 0]
  ------------------
   46|  15.3M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 15.1M, False: 145k]
  ------------------
   47|  15.1M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  15.1M|    } else {
   49|   145k|        memset(ptr, value, n);
   50|   145k|    }
   51|  15.3M|}

dav1d_data_create_internal:
   43|  71.9k|uint8_t *dav1d_data_create_internal(Dav1dData *const buf, const size_t sz) {
   44|  71.9k|    validate_input_or_ret(buf != NULL, NULL);
  ------------------
  |  |   52|  71.9k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 71.9k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
   45|       |
   46|  71.9k|    if (sz > SIZE_MAX / 2) return NULL;
  ------------------
  |  Branch (46:9): [True: 0, False: 71.9k]
  ------------------
   47|  71.9k|    buf->ref = dav1d_ref_create(ALLOC_DAV1DDATA, sz);
  ------------------
  |  |   49|  71.9k|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
   48|  71.9k|    if (!buf->ref) return NULL;
  ------------------
  |  Branch (48:9): [True: 0, False: 71.9k]
  ------------------
   49|  71.9k|    buf->data = buf->ref->const_data;
   50|  71.9k|    buf->sz = sz;
   51|  71.9k|    dav1d_data_props_set_defaults(&buf->m);
   52|  71.9k|    buf->m.size = sz;
   53|       |
   54|  71.9k|    return buf->ref->data;
   55|  71.9k|}
dav1d_data_ref:
   98|   118k|void dav1d_data_ref(Dav1dData *const dst, const Dav1dData *const src) {
   99|   118k|    assert(dst != NULL);
  ------------------
  |  Branch (99:5): [True: 118k, False: 0]
  ------------------
  100|   118k|    assert(dst->data == NULL);
  ------------------
  |  Branch (100:5): [True: 118k, False: 0]
  ------------------
  101|   118k|    assert(src != NULL);
  ------------------
  |  Branch (101:5): [True: 118k, False: 0]
  ------------------
  102|       |
  103|   118k|    if (src->ref) {
  ------------------
  |  Branch (103:9): [True: 118k, False: 0]
  ------------------
  104|   118k|        assert(src->data != NULL);
  ------------------
  |  Branch (104:9): [True: 118k, False: 0]
  ------------------
  105|   118k|        dav1d_ref_inc(src->ref);
  106|   118k|    }
  107|   118k|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (107:9): [True: 0, False: 118k]
  ------------------
  108|   118k|    *dst = *src;
  109|   118k|}
dav1d_data_props_copy:
  113|   106k|{
  114|   106k|    assert(dst != NULL);
  ------------------
  |  Branch (114:5): [True: 106k, False: 0]
  ------------------
  115|   106k|    assert(src != NULL);
  ------------------
  |  Branch (115:5): [True: 106k, False: 0]
  ------------------
  116|       |
  117|   106k|    dav1d_ref_dec(&dst->user_data.ref);
  118|   106k|    *dst = *src;
  119|   106k|    if (dst->user_data.ref) dav1d_ref_inc(dst->user_data.ref);
  ------------------
  |  Branch (119:9): [True: 0, False: 106k]
  ------------------
  120|   106k|}
dav1d_data_props_set_defaults:
  122|   969k|void dav1d_data_props_set_defaults(Dav1dDataProps *const props) {
  123|   969k|    assert(props != NULL);
  ------------------
  |  Branch (123:5): [True: 969k, False: 0]
  ------------------
  124|       |
  125|   969k|    memset(props, 0, sizeof(*props));
  126|       |    props->timestamp = INT64_MIN;
  127|   969k|    props->offset = -1;
  128|   969k|}
dav1d_data_props_unref_internal:
  130|  9.51k|void dav1d_data_props_unref_internal(Dav1dDataProps *const props) {
  131|  9.51k|    validate_input(props != NULL);
  ------------------
  |  |   59|  9.51k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  9.51k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  132|       |
  133|  9.51k|    struct Dav1dRef *user_data_ref = props->user_data.ref;
  134|  9.51k|    dav1d_data_props_set_defaults(props);
  135|  9.51k|    dav1d_ref_dec(&user_data_ref);
  136|  9.51k|}
dav1d_data_unref_internal:
  138|   199k|void dav1d_data_unref_internal(Dav1dData *const buf) {
  139|   199k|    validate_input(buf != NULL);
  ------------------
  |  |   59|   199k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   199k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 199k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  140|       |
  141|   199k|    struct Dav1dRef *user_data_ref = buf->m.user_data.ref;
  142|   199k|    if (buf->ref) {
  ------------------
  |  Branch (142:9): [True: 190k, False: 9.51k]
  ------------------
  143|   190k|        validate_input(buf->data != NULL);
  ------------------
  |  |   59|   190k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   190k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 190k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  144|   190k|        dav1d_ref_dec(&buf->ref);
  145|   190k|    }
  146|   199k|    memset(buf, 0, sizeof(*buf));
  147|   199k|    dav1d_data_props_set_defaults(&buf->m);
  148|   199k|    dav1d_ref_dec(&user_data_ref);
  149|   199k|}

dav1d_decode_tile_sbrow:
 2594|   159k|int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
 2595|   159k|    const Dav1dFrameContext *const f = t->f;
 2596|   159k|    const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
  ------------------
  |  Branch (2596:37): [True: 90.7k, False: 68.6k]
  ------------------
 2597|   159k|    Dav1dTileState *const ts = t->ts;
 2598|   159k|    const Dav1dContext *const c = f->c;
 2599|   159k|    const int sb_step = f->sb_step;
 2600|   159k|    const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
 2601|   159k|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2602|   159k|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2603|       |
 2604|   159k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|   318k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 53.7k, False: 105k]
  |  |  ------------------
  ------------------
  |  Branch (2604:45): [True: 77.3k, False: 28.3k]
  ------------------
 2605|   131k|        dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
 2606|   131k|                                     ts->tiling.col_end, ts->tiling.row_start,
 2607|   131k|                                     ts->tiling.row_end, t->by >> f->sb_shift,
 2608|   131k|                                     ts->tiling.row, t->frame_thread.pass);
 2609|   131k|    }
 2610|       |
 2611|   159k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) {
  ------------------
  |  |   36|   318k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 53.7k, False: 105k]
  |  |  ------------------
  ------------------
  |  Branch (2611:45): [True: 0, False: 53.7k]
  ------------------
 2612|      0|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 2613|      0|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 2614|      0|        for (int n = 0; n < 7; n++)
  ------------------
  |  Branch (2614:25): [True: 0, False: 0]
  ------------------
 2615|      0|            for (int m = 0; m < 2; m++)
  ------------------
  |  Branch (2615:29): [True: 0, False: 0]
  ------------------
 2616|      0|                lowest_px[n][m] = INT_MIN;
 2617|      0|    }
 2618|       |
 2619|   159k|    reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass);
  ------------------
  |  |   43|   159k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   159k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 2620|   159k|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2620:9): [True: 0, False: 159k]
  ------------------
 2621|      0|        const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0;
  ------------------
  |  Branch (2621:31): [True: 0, False: 0]
  ------------------
 2622|      0|        for (t->bx = ts->tiling.col_start,
 2623|      0|             t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w;
 2624|      0|             t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2624:14): [True: 0, False: 0]
  ------------------
 2625|      0|        {
 2626|      0|            if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2626:17): [True: 0, False: 0]
  ------------------
 2627|      0|                return 1;
 2628|      0|            if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2628:17): [True: 0, False: 0]
  ------------------
 2629|      0|                return 1;
 2630|      0|            if (t->bx & 16 || f->seq_hdr->sb128)
  ------------------
  |  Branch (2630:17): [True: 0, False: 0]
  |  Branch (2630:31): [True: 0, False: 0]
  ------------------
 2631|      0|                t->a++;
 2632|      0|        }
 2633|      0|        f->bd_fn.backup_ipred_edge(t);
 2634|      0|        return 0;
 2635|      0|    }
 2636|       |
 2637|   159k|    if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (2637:9): [True: 0, False: 159k]
  |  Branch (2637:27): [True: 0, False: 0]
  ------------------
 2638|      0|        f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row,
 2639|      0|                                   ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2640|      0|                                   t->by >> 1, (t->by + sb_step) >> 1);
 2641|      0|    }
 2642|   159k|    memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
 2643|   159k|    const int sb128y = t->by >> 5;
 2644|   159k|    for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
 2645|   159k|         t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
 2646|   717k|         t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2646:10): [True: 566k, False: 150k]
  ------------------
 2647|   566k|    {
 2648|   566k|        if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2648:13): [True: 0, False: 566k]
  ------------------
 2649|      0|            return 1;
 2650|   566k|        if (root_bl == BL_128X128) {
  ------------------
  |  Branch (2650:13): [True: 213k, False: 353k]
  ------------------
 2651|   213k|            t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
 2652|   213k|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2653|   213k|            t->cur_sb_cdef_idx_ptr[1] = -1;
 2654|   213k|            t->cur_sb_cdef_idx_ptr[2] = -1;
 2655|   213k|            t->cur_sb_cdef_idx_ptr[3] = -1;
 2656|   353k|        } else {
 2657|   353k|            t->cur_sb_cdef_idx_ptr =
 2658|   353k|                &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
 2659|   353k|                                      ((t->by & 16) >> 3)];
 2660|   353k|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2661|   353k|        }
 2662|       |        // Restoration filter
 2663|  2.26M|        for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2663:25): [True: 1.69M, False: 566k]
  ------------------
 2664|  1.69M|            if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2664:17): [True: 1.55M, False: 142k]
  ------------------
 2665|  1.55M|                continue;
 2666|       |
 2667|   142k|            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2667:32): [True: 55.3k, False: 86.7k]
  |  Branch (2667:37): [True: 16.9k, False: 38.3k]
  ------------------
 2668|   142k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2668:32): [True: 55.3k, False: 86.7k]
  |  Branch (2668:37): [True: 20.4k, False: 34.9k]
  ------------------
 2669|   142k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2670|   142k|            const int y = t->by * 4 >> ss_ver;
 2671|   142k|            const int h = (f->cur.p.h + ss_ver) >> ss_ver;
 2672|       |
 2673|   142k|            const int unit_size = 1 << unit_size_log2;
 2674|   142k|            const unsigned mask = unit_size - 1;
 2675|   142k|            if (y & mask) continue;
  ------------------
  |  Branch (2675:17): [True: 32.7k, False: 109k]
  ------------------
 2676|   109k|            const int half_unit = unit_size >> 1;
 2677|       |            // Round half up at frame boundaries, if there's more than one
 2678|       |            // restoration unit
 2679|   109k|            if (y && y + half_unit > h) continue;
  ------------------
  |  Branch (2679:17): [True: 27.3k, False: 81.9k]
  |  Branch (2679:22): [True: 2.46k, False: 24.9k]
  ------------------
 2680|       |
 2681|   106k|            const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
 2682|       |
 2683|   106k|            if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2683:17): [True: 18.1k, False: 88.7k]
  ------------------
 2684|  18.1k|                const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2685|  18.1k|                const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
 2686|       |
 2687|  18.1k|                const int d = f->frame_hdr->super_res.width_scale_denominator;
 2688|  18.1k|                const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
 2689|  18.1k|                const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;
 2690|  18.1k|                const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
 2691|       |
 2692|  41.9k|                for (int x = x0; x < imin(x1, n_units); x++) {
  ------------------
  |  Branch (2692:34): [True: 23.8k, False: 18.1k]
  ------------------
 2693|  23.8k|                    const int px_x = x << (unit_size_log2 + ss_hor);
 2694|  23.8k|                    const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
 2695|  23.8k|                    const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
 2696|  23.8k|                    Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2697|       |
 2698|  23.8k|                    read_restoration_info(t, lr, p, frame_type);
 2699|  23.8k|                }
 2700|  88.7k|            } else {
 2701|  88.7k|                const int x = 4 * t->bx >> ss_hor;
 2702|  88.7k|                if (x & mask) continue;
  ------------------
  |  Branch (2702:21): [True: 11.5k, False: 77.1k]
  ------------------
 2703|  77.1k|                const int w = (f->cur.p.w + ss_hor) >> ss_hor;
 2704|       |                // Round half up at frame boundaries, if there's more than one
 2705|       |                // restoration unit
 2706|  77.1k|                if (x && x + half_unit > w) continue;
  ------------------
  |  Branch (2706:21): [True: 51.9k, False: 25.1k]
  |  Branch (2706:26): [True: 1.47k, False: 50.5k]
  ------------------
 2707|  75.6k|                const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
 2708|  75.6k|                const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
 2709|  75.6k|                Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2710|       |
 2711|  75.6k|                read_restoration_info(t, lr, p, frame_type);
 2712|  75.6k|            }
 2713|   106k|        }
 2714|   566k|        if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2714:13): [True: 8.47k, False: 557k]
  ------------------
 2715|  8.47k|            return 1;
 2716|   557k|        if (t->bx & 16 || f->seq_hdr->sb128) {
  ------------------
  |  Branch (2716:13): [True: 168k, False: 389k]
  |  Branch (2716:27): [True: 207k, False: 181k]
  ------------------
 2717|   376k|            t->a++;
 2718|   376k|            t->lf_mask++;
 2719|   376k|        }
 2720|   557k|    }
 2721|       |
 2722|   150k|    if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (2722:9): [True: 106k, False: 43.9k]
  |  Branch (2722:38): [True: 0, False: 106k]
  ------------------
 2723|      0|        dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
 2724|      0|                               ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2725|      0|                               t->by >> 1, (t->by + sb_step) >> 1);
 2726|      0|    }
 2727|       |
 2728|       |    // backup pre-loopfilter pixels for intra prediction of the next sbrow
 2729|   150k|    if (t->frame_thread.pass != 1)
  ------------------
  |  Branch (2729:9): [True: 150k, False: 0]
  ------------------
 2730|   150k|        f->bd_fn.backup_ipred_edge(t);
 2731|       |
 2732|       |    // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
 2733|       |    // up the initial value in neighbour tiles when running the loopfilter
 2734|   150k|    int align_h = (f->bh + 31) & ~31;
 2735|   150k|    memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
 2736|   150k|           &t->l.tx_lpf_y[t->by & 16], sb_step);
 2737|   150k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2738|   150k|    align_h >>= ss_ver;
 2739|   150k|    memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
 2740|   150k|           &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
 2741|       |
 2742|       |    // error out on symbol decoder overread
 2743|   150k|    if (ts->msac.cnt <= -15) return 1;
  ------------------
  |  Branch (2743:9): [True: 13.8k, False: 137k]
  ------------------
 2744|       |
 2745|   137k|    return c->strict_std_compliance &&
  ------------------
  |  Branch (2745:12): [True: 0, False: 137k]
  ------------------
 2746|      0|           (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] &&
  ------------------
  |  Branch (2746:12): [True: 0, False: 0]
  ------------------
 2747|      0|           check_trailing_bits_after_symbol_coder(&ts->msac);
  ------------------
  |  Branch (2747:12): [True: 0, False: 0]
  ------------------
 2748|   150k|}
dav1d_decode_frame_init:
 2750|  41.8k|int dav1d_decode_frame_init(Dav1dFrameContext *const f) {
 2751|  41.8k|    const Dav1dContext *const c = f->c;
 2752|  41.8k|    int retval = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|  41.8k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 2753|       |
 2754|  41.8k|    if (f->sbh > f->lf.start_of_tile_row_sz) {
  ------------------
  |  Branch (2754:9): [True: 8.37k, False: 33.4k]
  ------------------
 2755|  8.37k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  8.37k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2756|  8.37k|        f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t));
  ------------------
  |  |  132|  8.37k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2757|  8.37k|        if (!f->lf.start_of_tile_row) {
  ------------------
  |  Branch (2757:13): [True: 0, False: 8.37k]
  ------------------
 2758|      0|            f->lf.start_of_tile_row_sz = 0;
 2759|      0|            goto error;
 2760|      0|        }
 2761|  8.37k|        f->lf.start_of_tile_row_sz = f->sbh;
 2762|  8.37k|    }
 2763|  41.8k|    int sby = 0;
 2764|  86.5k|    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2764:28): [True: 44.7k, False: 41.8k]
  ------------------
 2765|  44.7k|        f->lf.start_of_tile_row[sby++] = tile_row;
 2766|   399k|        while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1])
  ------------------
  |  Branch (2766:16): [True: 354k, False: 44.7k]
  ------------------
 2767|   354k|            f->lf.start_of_tile_row[sby++] = 0;
 2768|  44.7k|    }
 2769|       |
 2770|  41.8k|    const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
 2771|  41.8k|    if (n_ts != f->n_ts) {
  ------------------
  |  Branch (2771:9): [True: 9.03k, False: 32.7k]
  ------------------
 2772|  9.03k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (2772:13): [True: 0, False: 9.03k]
  ------------------
 2773|      0|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 2774|      0|            f->frame_thread.tile_start_off =
 2775|      0|                dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts);
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2776|      0|            if (!f->frame_thread.tile_start_off) {
  ------------------
  |  Branch (2776:17): [True: 0, False: 0]
  ------------------
 2777|      0|                f->n_ts = 0;
 2778|      0|                goto error;
 2779|      0|            }
 2780|      0|        }
 2781|  9.03k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  9.03k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2782|  9.03k|        f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32);
  ------------------
  |  |  134|  9.03k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2783|  9.03k|        if (!f->ts) goto error;
  ------------------
  |  Branch (2783:13): [True: 0, False: 9.03k]
  ------------------
 2784|  9.03k|        f->n_ts = n_ts;
 2785|  9.03k|    }
 2786|       |
 2787|  41.8k|    const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1));
  ------------------
  |  Branch (2787:68): [True: 0, False: 41.8k]
  |  Branch (2787:83): [True: 0, False: 0]
  ------------------
 2788|  41.8k|    if (a_sz != f->a_sz) {
  ------------------
  |  Branch (2788:9): [True: 9.75k, False: 32.0k]
  ------------------
 2789|  9.75k|        dav1d_free(f->a);
  ------------------
  |  |  135|  9.75k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2790|  9.75k|        f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz);
  ------------------
  |  |  132|  9.75k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2791|  9.75k|        if (!f->a) {
  ------------------
  |  Branch (2791:13): [True: 0, False: 9.75k]
  ------------------
 2792|      0|            f->a_sz = 0;
 2793|      0|            goto error;
 2794|      0|        }
 2795|  9.75k|        f->a_sz = a_sz;
 2796|  9.75k|    }
 2797|       |
 2798|  41.8k|    const int num_sb128 = f->sb128w * f->sb128h;
 2799|  41.8k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2800|  41.8k|    const int hbd = !!f->seq_hdr->hbd;
 2801|  41.8k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (2801:9): [True: 0, False: 41.8k]
  ------------------
 2802|      0|        const unsigned sb_step4 = f->sb_step * 4;
 2803|      0|        int tile_idx = 0;
 2804|      0|        for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2804:32): [True: 0, False: 0]
  ------------------
 2805|      0|            const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
 2806|      0|                                     sb_step4 * f->sb128w * 128;
 2807|      0|            const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2808|      0|                                     f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4;
 2809|      0|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2809:36): [True: 0, False: 0]
  ------------------
 2810|      0|                f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
 2811|      0|                    f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4;
 2812|      0|            }
 2813|      0|        }
 2814|       |
 2815|      0|        const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh;
 2816|      0|        if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) {
  ------------------
  |  Branch (2816:13): [True: 0, False: 0]
  ------------------
 2817|      0|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 2818|      0|            f->tile_thread.lowest_pixel_mem =
 2819|      0|                dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz *
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2820|      0|                             sizeof(*f->tile_thread.lowest_pixel_mem));
 2821|      0|            if (!f->tile_thread.lowest_pixel_mem) {
  ------------------
  |  Branch (2821:17): [True: 0, False: 0]
  ------------------
 2822|      0|                f->tile_thread.lowest_pixel_mem_sz = 0;
 2823|      0|                goto error;
 2824|      0|            }
 2825|      0|            f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz;
 2826|      0|        }
 2827|      0|        int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem;
 2828|      0|        for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows;
  ------------------
  |  Branch (2828:51): [True: 0, False: 0]
  ------------------
 2829|      0|             tile_row++, tile_row_base += f->frame_hdr->tiling.cols)
 2830|      0|        {
 2831|      0|            const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2832|      0|                                      f->frame_hdr->tiling.row_start_sb[tile_row];
 2833|      0|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2833:36): [True: 0, False: 0]
  ------------------
 2834|      0|                f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr;
 2835|      0|                lowest_pixel_ptr += tile_row_sb_h;
 2836|      0|            }
 2837|      0|        }
 2838|       |
 2839|      0|        const int cbi_sz = num_sb128 * size_mul[0];
 2840|      0|        if (cbi_sz != f->frame_thread.cbi_sz) {
  ------------------
  |  Branch (2840:13): [True: 0, False: 0]
  ------------------
 2841|      0|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2842|      0|            f->frame_thread.cbi =
 2843|      0|                dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2844|      0|                                    cbi_sz * 32 * 32 / 4, 64);
 2845|      0|            if (!f->frame_thread.cbi) {
  ------------------
  |  Branch (2845:17): [True: 0, False: 0]
  ------------------
 2846|      0|                f->frame_thread.cbi_sz = 0;
 2847|      0|                goto error;
 2848|      0|            }
 2849|      0|            f->frame_thread.cbi_sz = cbi_sz;
 2850|      0|        }
 2851|       |
 2852|      0|        const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
 2853|      0|        if (cf_sz != f->frame_thread.cf_sz) {
  ------------------
  |  Branch (2853:13): [True: 0, False: 0]
  ------------------
 2854|      0|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2855|      0|            f->frame_thread.cf =
 2856|      0|                dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64);
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2857|      0|            if (!f->frame_thread.cf) {
  ------------------
  |  Branch (2857:17): [True: 0, False: 0]
  ------------------
 2858|      0|                f->frame_thread.cf_sz = 0;
 2859|      0|                goto error;
 2860|      0|            }
 2861|      0|            memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
 2862|      0|            f->frame_thread.cf_sz = cf_sz;
 2863|      0|        }
 2864|       |
 2865|      0|        if (f->frame_hdr->allow_screen_content_tools) {
  ------------------
  |  Branch (2865:13): [True: 0, False: 0]
  ------------------
 2866|      0|            const int pal_sz = num_sb128 << hbd;
 2867|      0|            if (pal_sz != f->frame_thread.pal_sz) {
  ------------------
  |  Branch (2867:17): [True: 0, False: 0]
  ------------------
 2868|      0|                dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2869|      0|                f->frame_thread.pal =
 2870|      0|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2871|      0|                                        pal_sz * 16 * 16, 64);
 2872|      0|                if (!f->frame_thread.pal) {
  ------------------
  |  Branch (2872:21): [True: 0, False: 0]
  ------------------
 2873|      0|                    f->frame_thread.pal_sz = 0;
 2874|      0|                    goto error;
 2875|      0|                }
 2876|      0|                f->frame_thread.pal_sz = pal_sz;
 2877|      0|            }
 2878|       |
 2879|      0|            const int pal_idx_sz = num_sb128 * size_mul[1];
 2880|      0|            if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
  ------------------
  |  Branch (2880:17): [True: 0, False: 0]
  ------------------
 2881|      0|                dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2882|      0|                f->frame_thread.pal_idx =
 2883|      0|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2884|      0|                                        pal_idx_sz * 128 * 128 / 8, 64);
 2885|      0|                if (!f->frame_thread.pal_idx) {
  ------------------
  |  Branch (2885:21): [True: 0, False: 0]
  ------------------
 2886|      0|                    f->frame_thread.pal_idx_sz = 0;
 2887|      0|                    goto error;
 2888|      0|                }
 2889|      0|                f->frame_thread.pal_idx_sz = pal_idx_sz;
 2890|      0|            }
 2891|      0|        } else if (f->frame_thread.pal) {
  ------------------
  |  Branch (2891:20): [True: 0, False: 0]
  ------------------
 2892|      0|            dav1d_freep_aligned(&f->frame_thread.pal);
 2893|      0|            dav1d_freep_aligned(&f->frame_thread.pal_idx);
 2894|      0|            f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
 2895|      0|        }
 2896|      0|    }
 2897|       |
 2898|       |    // update allocation of block contexts for above
 2899|  41.8k|    ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
 2900|  41.8k|    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
 2901|  41.8k|    const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize;
  ------------------
  |  Branch (2901:36): [True: 0, False: 41.8k]
  |  Branch (2901:51): [True: 0, False: 0]
  ------------------
 2902|  41.8k|    if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] ||
  ------------------
  |  Branch (2902:9): [True: 9.46k, False: 32.3k]
  ------------------
 2903|  32.3k|        uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] ||
  ------------------
  |  Branch (2903:9): [True: 228, False: 32.1k]
  ------------------
 2904|  32.1k|        need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy ||
  ------------------
  |  Branch (2904:9): [True: 0, False: 32.1k]
  ------------------
 2905|  32.1k|        f->sbh != f->lf.cdef_buf_sbh)
  ------------------
  |  Branch (2905:9): [True: 353, False: 31.7k]
  ------------------
 2906|  10.0k|    {
 2907|  10.0k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  10.0k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2908|  10.0k|        size_t alloc_sz = 64;
 2909|  10.0k|        alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy;
 2910|  10.0k|        alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy;
 2911|  10.0k|        uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32);
  ------------------
  |  |  134|  10.0k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2912|  10.0k|        if (!ptr) {
  ------------------
  |  Branch (2912:13): [True: 0, False: 10.0k]
  ------------------
 2913|      0|            f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0;
 2914|      0|            goto error;
 2915|      0|        }
 2916|       |
 2917|  10.0k|        ptr += 32;
 2918|  10.0k|        if (y_stride < 0) {
  ------------------
  |  Branch (2918:13): [True: 0, False: 10.0k]
  ------------------
 2919|      0|            f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1);
 2920|      0|            f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3);
 2921|  10.0k|        } else {
 2922|  10.0k|            f->lf.cdef_line[0][0] = ptr + y_stride * 0;
 2923|  10.0k|            f->lf.cdef_line[1][0] = ptr + y_stride * 2;
 2924|  10.0k|        }
 2925|  10.0k|        ptr += llabs(y_stride) * f->sbh * 4;
 2926|  10.0k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2926:13): [True: 0, False: 10.0k]
  ------------------
 2927|      0|            f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1);
 2928|      0|            f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3);
 2929|      0|            f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5);
 2930|      0|            f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7);
 2931|  10.0k|        } else {
 2932|  10.0k|            f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
 2933|  10.0k|            f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
 2934|  10.0k|            f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
 2935|  10.0k|            f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
 2936|  10.0k|        }
 2937|       |
 2938|  10.0k|        if (need_cdef_lpf_copy) {
  ------------------
  |  Branch (2938:13): [True: 0, False: 10.0k]
  ------------------
 2939|      0|            ptr += llabs(uv_stride) * f->sbh * 8;
 2940|      0|            if (y_stride < 0)
  ------------------
  |  Branch (2940:17): [True: 0, False: 0]
  ------------------
 2941|      0|                f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1);
 2942|      0|            else
 2943|      0|                f->lf.cdef_lpf_line[0] = ptr;
 2944|      0|            ptr += llabs(y_stride) * f->sbh * 4;
 2945|      0|            if (uv_stride < 0) {
  ------------------
  |  Branch (2945:17): [True: 0, False: 0]
  ------------------
 2946|      0|                f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1);
 2947|      0|                f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1);
 2948|      0|            } else {
 2949|      0|                f->lf.cdef_lpf_line[1] = ptr;
 2950|      0|                f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4;
 2951|      0|            }
 2952|      0|        }
 2953|       |
 2954|  10.0k|        f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4;
 2955|  10.0k|        f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8;
 2956|  10.0k|        f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
 2957|  10.0k|        f->lf.cdef_buf_sbh = f->sbh;
 2958|  10.0k|    }
 2959|       |
 2960|  41.8k|    const int sb128 = f->seq_hdr->sb128;
 2961|  41.8k|    const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12;
  ------------------
  |  Branch (2961:27): [True: 0, False: 41.8k]
  ------------------
 2962|  41.8k|    y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1];
 2963|  41.8k|    if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] ||
  ------------------
  |  Branch (2963:9): [True: 9.43k, False: 32.3k]
  ------------------
 2964|  32.3k|        uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1])
  ------------------
  |  Branch (2964:9): [True: 232, False: 32.1k]
  ------------------
 2965|  9.66k|    {
 2966|  9.66k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  9.66k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2967|       |        // lr simd may overread the input, so slightly over-allocate the lpf buffer
 2968|  9.66k|        size_t alloc_sz = 128;
 2969|  9.66k|        alloc_sz += (size_t)llabs(y_stride) * num_lines;
 2970|  9.66k|        alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2;
 2971|  9.66k|        uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64);
  ------------------
  |  |  134|  9.66k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2972|  9.66k|        if (!ptr) {
  ------------------
  |  Branch (2972:13): [True: 0, False: 9.66k]
  ------------------
 2973|      0|            f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0;
 2974|      0|            goto error;
 2975|      0|        }
 2976|       |
 2977|  9.66k|        ptr += 64;
 2978|  9.66k|        if (y_stride < 0)
  ------------------
  |  Branch (2978:13): [True: 0, False: 9.66k]
  ------------------
 2979|      0|            f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1);
 2980|  9.66k|        else
 2981|  9.66k|            f->lf.lr_lpf_line[0] = ptr;
 2982|  9.66k|        ptr += llabs(y_stride) * num_lines;
 2983|  9.66k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2983:13): [True: 0, False: 9.66k]
  ------------------
 2984|      0|            f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1);
 2985|      0|            f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1);
 2986|  9.66k|        } else {
 2987|  9.66k|            f->lf.lr_lpf_line[1] = ptr;
 2988|  9.66k|            f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines;
 2989|  9.66k|        }
 2990|       |
 2991|  9.66k|        f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines;
 2992|  9.66k|        f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2;
 2993|  9.66k|    }
 2994|       |
 2995|       |    // update allocation for loopfilter masks
 2996|  41.8k|    if (num_sb128 != f->lf.mask_sz) {
  ------------------
  |  Branch (2996:9): [True: 9.43k, False: 32.3k]
  ------------------
 2997|  9.43k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  9.43k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2998|  9.43k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  9.43k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2999|  9.43k|        f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128);
  ------------------
  |  |  132|  9.43k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3000|       |        // over-allocate by 3 bytes since some of the SIMD implementations
 3001|       |        // index this from the level type and can thus over-read by up to 3
 3002|  9.43k|        f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
  ------------------
  |  |  132|  9.43k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3003|  9.43k|        if (!f->lf.mask || !f->lf.level) {
  ------------------
  |  Branch (3003:13): [True: 0, False: 9.43k]
  |  Branch (3003:28): [True: 0, False: 9.43k]
  ------------------
 3004|      0|            f->lf.mask_sz = 0;
 3005|      0|            goto error;
 3006|      0|        }
 3007|  9.43k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (3007:13): [True: 0, False: 9.43k]
  ------------------
 3008|      0|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 3009|      0|            f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) *
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3010|      0|                                             num_sb128 * 32 * 32);
 3011|      0|            if (!f->frame_thread.b) {
  ------------------
  |  Branch (3011:17): [True: 0, False: 0]
  ------------------
 3012|      0|                f->lf.mask_sz = 0;
 3013|      0|                goto error;
 3014|      0|            }
 3015|      0|        }
 3016|  9.43k|        f->lf.mask_sz = num_sb128;
 3017|  9.43k|    }
 3018|       |
 3019|  41.8k|    f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
 3020|  41.8k|    const int lr_mask_sz = f->sr_sb128w * f->sb128h;
 3021|  41.8k|    if (lr_mask_sz != f->lf.lr_mask_sz) {
  ------------------
  |  Branch (3021:9): [True: 9.22k, False: 32.5k]
  ------------------
 3022|  9.22k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  9.22k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3023|  9.22k|        f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz);
  ------------------
  |  |  132|  9.22k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3024|  9.22k|        if (!f->lf.lr_mask) {
  ------------------
  |  Branch (3024:13): [True: 0, False: 9.22k]
  ------------------
 3025|      0|            f->lf.lr_mask_sz = 0;
 3026|      0|            goto error;
 3027|      0|        }
 3028|  9.22k|        f->lf.lr_mask_sz = lr_mask_sz;
 3029|  9.22k|    }
 3030|  41.8k|    f->lf.restore_planes =
 3031|  41.8k|        ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
 3032|  41.8k|        ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
 3033|  41.8k|        ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
 3034|  41.8k|    if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
  ------------------
  |  Branch (3034:9): [True: 15.1k, False: 26.6k]
  ------------------
 3035|  15.1k|        dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
 3036|  15.1k|        f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
 3037|  15.1k|    }
 3038|  41.8k|    dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
 3039|  41.8k|    memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
 3040|       |
 3041|  41.8k|    const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
 3042|  41.8k|    if (ipred_edge_sz != f->ipred_edge_sz) {
  ------------------
  |  Branch (3042:9): [True: 9.46k, False: 32.3k]
  ------------------
 3043|  9.46k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  9.46k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 3044|  9.46k|        uint8_t *ptr = f->ipred_edge[0] =
 3045|  9.46k|            dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64);
  ------------------
  |  |  134|  9.46k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 3046|  9.46k|        if (!ptr) {
  ------------------
  |  Branch (3046:13): [True: 0, False: 9.46k]
  ------------------
 3047|      0|            f->ipred_edge_sz = 0;
 3048|      0|            goto error;
 3049|      0|        }
 3050|  9.46k|        f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
 3051|  9.46k|        f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
 3052|  9.46k|        f->ipred_edge_sz = ipred_edge_sz;
 3053|  9.46k|    }
 3054|       |
 3055|  41.8k|    const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
 3056|  41.8k|    if (re_sz != f->lf.re_sz) {
  ------------------
  |  Branch (3056:9): [True: 9.04k, False: 32.7k]
  ------------------
 3057|  9.04k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  9.04k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3058|  9.04k|        f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2);
  ------------------
  |  |  132|  9.04k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3059|  9.04k|        if (!f->lf.tx_lpf_right_edge[0]) {
  ------------------
  |  Branch (3059:13): [True: 0, False: 9.04k]
  ------------------
 3060|      0|            f->lf.re_sz = 0;
 3061|      0|            goto error;
 3062|      0|        }
 3063|  9.04k|        f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
 3064|  9.04k|        f->lf.re_sz = re_sz;
 3065|  9.04k|    }
 3066|       |
 3067|       |    // init ref mvs
 3068|  41.8k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|  83.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 12.1k, False: 29.6k]
  |  |  ------------------
  ------------------
  |  Branch (3068:45): [True: 16.1k, False: 13.5k]
  ------------------
 3069|  28.2k|        const int ret =
 3070|  28.2k|            dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
 3071|  28.2k|                                    f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs,
 3072|  28.2k|                                    f->c->n_tc, f->c->n_fc);
 3073|  28.2k|        if (ret < 0) goto error;
  ------------------
  |  Branch (3073:13): [True: 0, False: 28.2k]
  ------------------
 3074|  28.2k|    }
 3075|       |
 3076|       |    // setup dequant tables
 3077|  41.8k|    init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
 3078|  41.8k|    if (f->frame_hdr->quant.qm)
  ------------------
  |  Branch (3078:9): [True: 6.52k, False: 35.2k]
  ------------------
 3079|   130k|        for (int i = 0; i < N_RECT_TX_SIZES; i++) {
  ------------------
  |  Branch (3079:25): [True: 123k, False: 6.52k]
  ------------------
 3080|   123k|            f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
 3081|   123k|            f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
 3082|   123k|            f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
 3083|   123k|        }
 3084|  35.2k|    else
 3085|  35.2k|        memset(f->qm, 0, sizeof(f->qm));
 3086|       |
 3087|       |    // setup jnt_comp weights
 3088|  41.8k|    if (f->frame_hdr->switchable_comp_refs) {
  ------------------
  |  Branch (3088:9): [True: 9.70k, False: 32.1k]
  ------------------
 3089|  77.6k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3089:25): [True: 67.9k, False: 9.70k]
  ------------------
 3090|  67.9k|            const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
 3091|       |
 3092|   271k|            for (int j = i + 1; j < 7; j++) {
  ------------------
  |  Branch (3092:33): [True: 203k, False: 67.9k]
  ------------------
 3093|   203k|                const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
 3094|       |
 3095|   203k|                const unsigned d1 =
 3096|   203k|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
 3097|   203k|                                          f->cur.frame_hdr->frame_offset)), 31);
 3098|   203k|                const unsigned d0 =
 3099|   203k|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
 3100|   203k|                                          f->cur.frame_hdr->frame_offset)), 31);
 3101|   203k|                const int order = d0 <= d1;
 3102|       |
 3103|   203k|                static const uint8_t quant_dist_weight[3][2] = {
 3104|   203k|                    { 2, 3 }, { 2, 5 }, { 2, 7 }
 3105|   203k|                };
 3106|   203k|                static const uint8_t quant_dist_lookup_table[4][2] = {
 3107|   203k|                    { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
 3108|   203k|                };
 3109|       |
 3110|   203k|                int k;
 3111|   644k|                for (k = 0; k < 3; k++) {
  ------------------
  |  Branch (3111:29): [True: 501k, False: 142k]
  ------------------
 3112|   501k|                    const int c0 = quant_dist_weight[k][order];
 3113|   501k|                    const int c1 = quant_dist_weight[k][!order];
 3114|   501k|                    const int d0_c0 = d0 * c0;
 3115|   501k|                    const int d1_c1 = d1 * c1;
 3116|   501k|                    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
  ------------------
  |  Branch (3116:26): [True: 126k, False: 375k]
  |  Branch (3116:37): [True: 9.90k, False: 117k]
  |  Branch (3116:56): [True: 375k, False: 117k]
  |  Branch (3116:68): [True: 51.3k, False: 323k]
  ------------------
 3117|   501k|                }
 3118|       |
 3119|   203k|                f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
 3120|   203k|            }
 3121|  67.9k|        }
 3122|  9.70k|    }
 3123|       |
 3124|       |    /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
 3125|       |     * so just point the chroma pointers in 4:0:0 to the luma plane here to
 3126|       |     * avoid having additional in-loop branches in various places. We never
 3127|       |     * dereference those pointers so it doesn't really matter what they
 3128|       |     * point at, as long as the pointers are valid. */
 3129|  41.8k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 3130|  41.8k|    f->lf.p[0] = f->cur.data[0];
 3131|  41.8k|    f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3131:30): [True: 22.6k, False: 19.2k]
  ------------------
 3132|  41.8k|    f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3132:30): [True: 22.6k, False: 19.2k]
  ------------------
 3133|  41.8k|    f->lf.sr_p[0] = f->sr_cur.p.data[0];
 3134|  41.8k|    f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3134:38): [True: 22.6k, False: 19.2k]
  ------------------
 3135|  41.8k|    f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3135:38): [True: 22.6k, False: 19.2k]
  ------------------
 3136|       |
 3137|  41.8k|    retval = 0;
 3138|  41.8k|error:
 3139|  41.8k|    return retval;
 3140|  41.8k|}
dav1d_decode_frame_init_cdf:
 3142|  41.8k|int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) {
 3143|  41.8k|    const Dav1dContext *const c = f->c;
 3144|  41.8k|    int retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  41.8k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3145|       |
 3146|  41.8k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3146:9): [True: 13.1k, False: 28.6k]
  ------------------
 3147|  13.1k|        dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
 3148|       |
 3149|       |    // parse individual tiles per tile group
 3150|  41.8k|    int tile_row = 0, tile_col = 0;
 3151|  41.8k|    f->task_thread.update_set = 0;
 3152|  82.2k|    for (int i = 0; i < f->n_tile_data; i++) {
  ------------------
  |  Branch (3152:21): [True: 41.9k, False: 40.3k]
  ------------------
 3153|  41.9k|        const uint8_t *data = f->tile[i].data.data;
 3154|  41.9k|        size_t size = f->tile[i].data.sz;
 3155|       |
 3156|  88.2k|        for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
  ------------------
  |  Branch (3156:40): [True: 47.8k, False: 40.4k]
  ------------------
 3157|  47.8k|            size_t tile_sz;
 3158|  47.8k|            if (j == f->tile[i].end) {
  ------------------
  |  Branch (3158:17): [True: 40.4k, False: 7.41k]
  ------------------
 3159|  40.4k|                tile_sz = size;
 3160|  40.4k|            } else {
 3161|  7.41k|                if (f->frame_hdr->tiling.n_bytes > size) goto error;
  ------------------
  |  Branch (3161:21): [True: 515, False: 6.89k]
  ------------------
 3162|  6.89k|                tile_sz = 0;
 3163|  14.8k|                for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
  ------------------
  |  Branch (3163:38): [True: 7.99k, False: 6.89k]
  ------------------
 3164|  7.99k|                    tile_sz |= (unsigned)*data++ << (k * 8);
 3165|  6.89k|                tile_sz++;
 3166|  6.89k|                size -= f->frame_hdr->tiling.n_bytes;
 3167|  6.89k|                if (tile_sz > size) goto error;
  ------------------
  |  Branch (3167:21): [True: 970, False: 5.92k]
  ------------------
 3168|  6.89k|            }
 3169|       |
 3170|  46.3k|            setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
 3171|  46.3k|                       c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
  ------------------
  |  Branch (3171:24): [True: 0, False: 46.3k]
  ------------------
 3172|       |
 3173|  46.3k|            if (tile_col == f->frame_hdr->tiling.cols) {
  ------------------
  |  Branch (3173:17): [True: 41.8k, False: 4.50k]
  ------------------
 3174|  41.8k|                tile_col = 0;
 3175|  41.8k|                tile_row++;
 3176|  41.8k|            }
 3177|  46.3k|            if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3177:17): [True: 40.4k, False: 5.86k]
  |  Branch (3177:53): [True: 12.9k, False: 27.5k]
  ------------------
 3178|  12.9k|                f->task_thread.update_set = 1;
 3179|  46.3k|            data += tile_sz;
 3180|  46.3k|            size -= tile_sz;
 3181|  46.3k|        }
 3182|  41.9k|    }
 3183|       |
 3184|  40.3k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (3184:9): [True: 0, False: 40.3k]
  ------------------
 3185|      0|        const int uses_2pass = c->n_fc > 1;
 3186|      0|        for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++)
  ------------------
  |  Branch (3186:25): [True: 0, False: 0]
  ------------------
 3187|      0|            reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr),
  ------------------
  |  |   43|      0|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 3188|      0|                          uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0);
  ------------------
  |  Branch (3188:27): [True: 0, False: 0]
  ------------------
 3189|      0|    }
 3190|       |
 3191|  40.3k|    retval = 0;
 3192|  41.8k|error:
 3193|  41.8k|    return retval;
 3194|  40.3k|}
dav1d_decode_frame_main:
 3196|  40.3k|int dav1d_decode_frame_main(Dav1dFrameContext *const f) {
 3197|  40.3k|    const Dav1dContext *const c = f->c;
 3198|  40.3k|    int retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  40.3k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3199|       |
 3200|  40.3k|    assert(f->c->n_tc == 1);
  ------------------
  |  Branch (3200:5): [True: 40.3k, False: 0]
  ------------------
 3201|       |
 3202|  40.3k|    Dav1dTaskContext *const t = &c->tc[f - c->fc];
 3203|  40.3k|    t->f = f;
 3204|  40.3k|    t->frame_thread.pass = 0;
 3205|       |
 3206|   370k|    for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++)
  ------------------
  |  Branch (3206:21): [True: 330k, False: 40.3k]
  ------------------
 3207|   330k|        reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0);
  ------------------
  |  |   43|   330k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   330k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 3208|       |
 3209|       |    // no threading - we explicitly interleave tile/sbrow decoding
 3210|       |    // and post-filtering, so that the full process runs in-line
 3211|  59.3k|    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (3211:28): [True: 41.3k, False: 17.9k]
  ------------------
 3212|  41.3k|        const int sbh_end =
 3213|  41.3k|            imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh);
 3214|  41.3k|        for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
 3215|   175k|             sby < sbh_end; sby++)
  ------------------
  |  Branch (3215:14): [True: 156k, False: 19.0k]
  ------------------
 3216|   156k|        {
 3217|   156k|            t->by = sby << (4 + f->seq_hdr->sb128);
 3218|   156k|            const int by_end = (t->by + f->sb_step) >> 1;
 3219|   156k|            if (f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (3219:17): [True: 19.7k, False: 137k]
  ------------------
 3220|  19.7k|                f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row,
 3221|  19.7k|                                           0, f->bw >> 1, t->by >> 1, by_end);
 3222|  19.7k|            }
 3223|   293k|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (3223:36): [True: 159k, False: 134k]
  ------------------
 3224|   159k|                t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
 3225|   159k|                if (dav1d_decode_tile_sbrow(t)) goto error;
  ------------------
  |  Branch (3225:21): [True: 22.3k, False: 137k]
  ------------------
 3226|   159k|            }
 3227|   134k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|   134k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 48.3k, False: 86.2k]
  |  |  ------------------
  ------------------
 3228|  48.3k|                dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
 3229|  48.3k|                                       0, f->bw >> 1, t->by >> 1, by_end);
 3230|  48.3k|            }
 3231|       |
 3232|       |            // loopfilter + cdef + restoration
 3233|   134k|            f->bd_fn.filter_sbrow(f, sby);
 3234|   134k|        }
 3235|  41.3k|    }
 3236|       |
 3237|  17.9k|    retval = 0;
 3238|  40.3k|error:
 3239|  40.3k|    return retval;
 3240|  17.9k|}
dav1d_decode_frame_exit:
 3242|  41.8k|void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) {
 3243|  41.8k|    const Dav1dContext *const c = f->c;
 3244|       |
 3245|  41.8k|    if (f->sr_cur.p.data[0])
  ------------------
  |  Branch (3245:9): [True: 41.8k, False: 0]
  ------------------
 3246|  41.8k|        atomic_init(&f->task_thread.error, 0);
 3247|       |
 3248|  41.8k|    if (c->n_fc > 1 && retval && f->frame_thread.cf) {
  ------------------
  |  Branch (3248:9): [True: 0, False: 41.8k]
  |  Branch (3248:24): [True: 0, False: 0]
  |  Branch (3248:34): [True: 0, False: 0]
  ------------------
 3249|      0|        memset(f->frame_thread.cf, 0,
 3250|      0|               (size_t)f->frame_thread.cf_sz * 128 * 128 / 2);
 3251|      0|    }
 3252|   334k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3252:21): [True: 292k, False: 41.8k]
  ------------------
 3253|   292k|        if (f->refp[i].p.frame_hdr) {
  ------------------
  |  Branch (3253:13): [True: 84.7k, False: 207k]
  ------------------
 3254|  84.7k|            if (!retval && c->n_fc > 1 && c->strict_std_compliance &&
  ------------------
  |  Branch (3254:17): [True: 53.5k, False: 31.2k]
  |  Branch (3254:28): [True: 0, False: 53.5k]
  |  Branch (3254:43): [True: 0, False: 0]
  ------------------
 3255|  84.7k|                atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3255:17): [True: 0, False: 0]
  ------------------
 3256|      0|            {
 3257|      0|                retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3258|      0|                atomic_store(&f->task_thread.error, 1);
 3259|      0|                atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
 3260|      0|            }
 3261|  84.7k|            dav1d_thread_picture_unref(&f->refp[i]);
 3262|  84.7k|        }
 3263|   292k|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3264|   292k|    }
 3265|       |
 3266|  41.8k|    dav1d_picture_unref_internal(&f->cur);
 3267|  41.8k|    dav1d_thread_picture_unref(&f->sr_cur);
 3268|  41.8k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3269|  41.8k|    if (f->frame_hdr && f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3269:9): [True: 41.8k, False: 0]
  |  Branch (3269:25): [True: 13.1k, False: 28.6k]
  ------------------
 3270|  13.1k|        if (f->out_cdf.progress)
  ------------------
  |  Branch (3270:13): [True: 0, False: 13.1k]
  ------------------
 3271|  13.1k|            atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR);
  ------------------
  |  Branch (3271:13): [True: 0, False: 0]
  ------------------
 3272|  13.1k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3273|  13.1k|    }
 3274|  41.8k|    dav1d_ref_dec(&f->cur_segmap_ref);
 3275|  41.8k|    dav1d_ref_dec(&f->prev_segmap_ref);
 3276|  41.8k|    dav1d_ref_dec(&f->mvs_ref);
 3277|  41.8k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3278|  41.8k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3279|       |
 3280|  83.7k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3280:21): [True: 41.9k, False: 41.8k]
  ------------------
 3281|  41.9k|        dav1d_data_unref_internal(&f->tile[i].data);
 3282|  41.8k|    f->task_thread.retval = retval;
 3283|  41.8k|}
dav1d_decode_frame:
 3285|  41.8k|int dav1d_decode_frame(Dav1dFrameContext *const f) {
 3286|  41.8k|    assert(f->c->n_fc == 1);
  ------------------
  |  Branch (3286:5): [True: 41.8k, False: 0]
  ------------------
 3287|       |    // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task
 3288|       |    // threads also. Not sure it makes a measurable difference.
 3289|  41.8k|    int res = dav1d_decode_frame_init(f);
 3290|  41.8k|    if (!res) res = dav1d_decode_frame_init_cdf(f);
  ------------------
  |  Branch (3290:9): [True: 41.8k, False: 0]
  ------------------
 3291|       |    // wait until all threads have completed
 3292|  41.8k|    if (!res) {
  ------------------
  |  Branch (3292:9): [True: 40.3k, False: 1.48k]
  ------------------
 3293|  40.3k|        if (f->c->n_tc > 1) {
  ------------------
  |  Branch (3293:13): [True: 0, False: 40.3k]
  ------------------
 3294|      0|            res = dav1d_task_create_tile_sbrow(f, 0, 1);
 3295|      0|            pthread_mutex_lock(&f->task_thread.ttd->lock);
 3296|      0|            pthread_cond_signal(&f->task_thread.ttd->cond);
 3297|      0|            if (!res) {
  ------------------
  |  Branch (3297:17): [True: 0, False: 0]
  ------------------
 3298|      0|                while (!f->task_thread.done[0] ||
  ------------------
  |  Branch (3298:24): [True: 0, False: 0]
  ------------------
 3299|      0|                       atomic_load(&f->task_thread.task_counter) > 0)
  ------------------
  |  Branch (3299:24): [True: 0, False: 0]
  ------------------
 3300|      0|                {
 3301|      0|                    pthread_cond_wait(&f->task_thread.cond,
 3302|      0|                                      &f->task_thread.ttd->lock);
 3303|      0|                }
 3304|      0|            }
 3305|      0|            pthread_mutex_unlock(&f->task_thread.ttd->lock);
 3306|      0|            res = f->task_thread.retval;
 3307|  40.3k|        } else {
 3308|  40.3k|            res = dav1d_decode_frame_main(f);
 3309|  40.3k|            if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) {
  ------------------
  |  Branch (3309:17): [True: 17.9k, False: 22.3k]
  |  Branch (3309:25): [True: 11.0k, False: 6.93k]
  |  Branch (3309:58): [True: 11.0k, False: 0]
  ------------------
 3310|  11.0k|                dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
 3311|  11.0k|                                        &f->ts[f->frame_hdr->tiling.update].cdf);
 3312|  11.0k|            }
 3313|  40.3k|        }
 3314|  40.3k|    }
 3315|  41.8k|    dav1d_decode_frame_exit(f, res);
 3316|  41.8k|    res = f->task_thread.retval;
 3317|  41.8k|    f->n_tile_data = 0;
 3318|  41.8k|    return res;
 3319|  41.8k|}
dav1d_submit_frame:
 3327|  45.0k|int dav1d_submit_frame(Dav1dContext *const c) {
 3328|  45.0k|    Dav1dFrameContext *f;
 3329|  45.0k|    int res = -1;
 3330|       |
 3331|       |    // wait for c->out_delayed[next] and move into c->out if visible
 3332|  45.0k|    Dav1dThreadPicture *out_delayed;
 3333|  45.0k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (3333:9): [True: 0, False: 45.0k]
  ------------------
 3334|      0|        pthread_mutex_lock(&c->task_thread.lock);
 3335|      0|        const unsigned next = c->frame_thread.next++;
 3336|      0|        if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (3336:13): [True: 0, False: 0]
  ------------------
 3337|      0|            c->frame_thread.next = 0;
 3338|       |
 3339|      0|        f = &c->fc[next];
 3340|      0|        while (f->n_tile_data > 0)
  ------------------
  |  Branch (3340:16): [True: 0, False: 0]
  ------------------
 3341|      0|            pthread_cond_wait(&f->task_thread.cond,
 3342|      0|                              &c->task_thread.lock);
 3343|      0|        out_delayed = &c->frame_thread.out_delayed[next];
 3344|      0|        if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (3344:13): [True: 0, False: 0]
  |  Branch (3344:39): [True: 0, False: 0]
  ------------------
 3345|      0|            unsigned first = atomic_load(&c->task_thread.first);
 3346|      0|            if (first + 1U < c->n_fc)
  ------------------
  |  Branch (3346:17): [True: 0, False: 0]
  ------------------
 3347|      0|                atomic_fetch_add(&c->task_thread.first, 1U);
 3348|      0|            else
 3349|      0|                atomic_store(&c->task_thread.first, 0);
 3350|      0|            atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 3351|      0|                                           &first, UINT_MAX);
 3352|      0|            if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (3352:17): [True: 0, False: 0]
  |  Branch (3352:39): [True: 0, False: 0]
  ------------------
 3353|      0|                c->task_thread.cur--;
 3354|      0|        }
 3355|      0|        const int error = f->task_thread.retval;
 3356|      0|        if (error) {
  ------------------
  |  Branch (3356:13): [True: 0, False: 0]
  ------------------
 3357|      0|            f->task_thread.retval = 0;
 3358|      0|            c->cached_error = error;
 3359|      0|            dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 3360|      0|            dav1d_thread_picture_unref(out_delayed);
 3361|      0|        } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (3361:20): [True: 0, False: 0]
  ------------------
 3362|      0|            const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 3363|      0|                                                           memory_order_relaxed);
 3364|      0|            if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (3364:18): [True: 0, False: 0]
  |  Branch (3364:42): [True: 0, False: 0]
  ------------------
 3365|      0|                progress != FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3365:17): [True: 0, False: 0]
  ------------------
 3366|      0|            {
 3367|      0|                dav1d_thread_picture_ref(&c->out, out_delayed);
 3368|      0|                c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 3369|      0|            }
 3370|      0|            dav1d_thread_picture_unref(out_delayed);
 3371|      0|        }
 3372|  45.0k|    } else {
 3373|  45.0k|        f = c->fc;
 3374|  45.0k|    }
 3375|       |
 3376|  45.0k|    f->seq_hdr = c->seq_hdr;
 3377|  45.0k|    f->seq_hdr_ref = c->seq_hdr_ref;
 3378|  45.0k|    dav1d_ref_inc(f->seq_hdr_ref);
 3379|  45.0k|    f->frame_hdr = c->frame_hdr;
 3380|  45.0k|    f->frame_hdr_ref = c->frame_hdr_ref;
 3381|  45.0k|    c->frame_hdr = NULL;
 3382|  45.0k|    c->frame_hdr_ref = NULL;
 3383|  45.0k|    f->dsp = &c->dsp[f->seq_hdr->hbd];
 3384|       |
 3385|  45.0k|    const int bpc = 8 + 2 * f->seq_hdr->hbd;
 3386|       |
 3387|  45.0k|    if (!f->dsp->ipred.intra_pred[DC_PRED]) {
  ------------------
  |  Branch (3387:9): [True: 8.03k, False: 37.0k]
  ------------------
 3388|  8.03k|        Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
 3389|       |
 3390|  8.03k|        switch (bpc) {
 3391|      0|#define assign_bitdepth_case(bd) \
 3392|      0|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
 3393|      0|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
 3394|      0|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
 3395|      0|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
 3396|      0|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
 3397|      0|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
 3398|      0|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
 3399|      0|            break
 3400|      0|#if CONFIG_8BPC
 3401|  3.41k|        case 8:
  ------------------
  |  Branch (3401:9): [True: 3.41k, False: 4.61k]
  ------------------
 3402|  3.41k|            assign_bitdepth_case(8);
  ------------------
  |  | 3392|  3.41k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  3.41k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  3.41k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  3.41k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  3.41k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  3.41k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  3.41k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  3.41k|            break
  ------------------
 3403|      0|#endif
 3404|      0|#if CONFIG_16BPC
 3405|  2.25k|        case 10:
  ------------------
  |  Branch (3405:9): [True: 2.25k, False: 5.77k]
  ------------------
 3406|  4.61k|        case 12:
  ------------------
  |  Branch (3406:9): [True: 2.35k, False: 5.67k]
  ------------------
 3407|  4.61k|            assign_bitdepth_case(16);
  ------------------
  |  | 3392|  4.61k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  4.61k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  4.61k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  4.61k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  4.61k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  4.61k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  4.61k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  4.61k|            break
  ------------------
 3408|      0|#endif
 3409|      0|#undef assign_bitdepth_case
 3410|      0|        default:
  ------------------
  |  Branch (3410:9): [True: 0, False: 8.03k]
  ------------------
 3411|      0|            dav1d_log(c, "Compiled without support for %d-bit decoding\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
 3412|      0|                    8 + 2 * f->seq_hdr->hbd);
 3413|      0|            res = DAV1D_ERR(ENOPROTOOPT);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3414|      0|            goto error;
 3415|  8.03k|        }
 3416|  8.03k|    }
 3417|       |
 3418|  45.0k|#define assign_bitdepth_case(bd) \
 3419|  45.0k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
 3420|  45.0k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
 3421|  45.0k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
 3422|  45.0k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
 3423|  45.0k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
 3424|  45.0k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
 3425|  45.0k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
 3426|  45.0k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
 3427|  45.0k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
 3428|  45.0k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
 3429|  45.0k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
 3430|  45.0k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
 3431|  45.0k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
 3432|  45.0k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
 3433|  45.0k|    if (!f->seq_hdr->hbd) {
  ------------------
  |  Branch (3433:9): [True: 23.7k, False: 21.3k]
  ------------------
 3434|  23.7k|#if CONFIG_8BPC
 3435|  23.7k|        assign_bitdepth_case(8);
  ------------------
  |  | 3419|  23.7k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|  23.7k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|  23.7k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|  23.7k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|  23.7k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|  23.7k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|  23.7k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|  23.7k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|  23.7k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|  23.7k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|  23.7k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|  23.7k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|  23.7k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|  23.7k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3436|  23.7k|#endif
 3437|  23.7k|    } else {
 3438|  21.3k|#if CONFIG_16BPC
 3439|  21.3k|        assign_bitdepth_case(16);
  ------------------
  |  | 3419|  21.3k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|  21.3k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|  21.3k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|  21.3k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|  21.3k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|  21.3k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|  21.3k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|  21.3k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|  21.3k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|  21.3k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|  21.3k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|  21.3k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|  21.3k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|  21.3k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3440|  21.3k|#endif
 3441|  21.3k|    }
 3442|  45.0k|#undef assign_bitdepth_case
 3443|       |
 3444|  45.0k|    int ref_coded_width[7];
 3445|  45.0k|    if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  45.0k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 15.3k, False: 29.6k]
  |  |  ------------------
  ------------------
 3446|  15.3k|        if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  15.3k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3446:13): [True: 13.3k, False: 1.97k]
  ------------------
 3447|  13.3k|            const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3448|  13.3k|            if (!c->refs[pri_ref].p.p.data[0]) {
  ------------------
  |  Branch (3448:17): [True: 257, False: 13.1k]
  ------------------
 3449|    257|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    257|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3450|    257|                goto error;
 3451|    257|            }
 3452|  13.3k|        }
 3453|   107k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3453:25): [True: 95.4k, False: 12.1k]
  ------------------
 3454|  95.4k|            const int refidx = f->frame_hdr->refidx[i];
 3455|  95.4k|            if (!c->refs[refidx].p.p.data[0] ||
  ------------------
  |  Branch (3455:17): [True: 520, False: 94.9k]
  ------------------
 3456|  94.9k|                f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3456:17): [True: 601, False: 94.3k]
  ------------------
 3457|  94.3k|                f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
  ------------------
  |  Branch (3457:17): [True: 501, False: 93.8k]
  ------------------
 3458|  93.8k|                f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
  ------------------
  |  Branch (3458:17): [True: 1.08k, False: 92.7k]
  ------------------
 3459|  92.7k|                f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
  ------------------
  |  Branch (3459:17): [True: 297, False: 92.4k]
  ------------------
 3460|  92.4k|                f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
  ------------------
  |  Branch (3460:17): [True: 0, False: 92.4k]
  ------------------
 3461|  92.4k|                bpc != c->refs[refidx].p.p.p.bpc)
  ------------------
  |  Branch (3461:17): [True: 0, False: 92.4k]
  ------------------
 3462|  3.00k|            {
 3463|  10.6k|                for (int j = 0; j < i; j++)
  ------------------
  |  Branch (3463:33): [True: 7.67k, False: 3.00k]
  ------------------
 3464|  7.67k|                    dav1d_thread_picture_unref(&f->refp[j]);
 3465|  3.00k|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  3.00k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3466|  3.00k|                goto error;
 3467|  3.00k|            }
 3468|  92.4k|            dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
 3469|  92.4k|            ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
 3470|  92.4k|            if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3470:17): [True: 13.6k, False: 78.8k]
  ------------------
 3471|  78.8k|                f->frame_hdr->height != c->refs[refidx].p.p.p.h)
  ------------------
  |  Branch (3471:17): [True: 1.98k, False: 76.8k]
  ------------------
 3472|  15.6k|            {
 3473|  15.6k|#define scale_fac(ref_sz, this_sz) \
 3474|  15.6k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
 3475|  15.6k|                f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
  ------------------
  |  | 3474|  15.6k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3476|  15.6k|                                               f->frame_hdr->width[0]);
 3477|  15.6k|                f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
  ------------------
  |  | 3474|  15.6k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3478|  15.6k|                                               f->frame_hdr->height);
 3479|  15.6k|                f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
 3480|  15.6k|                f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
 3481|  76.8k|            } else {
 3482|  76.8k|                f->svc[i][0].scale = f->svc[i][1].scale = 0;
 3483|  76.8k|            }
 3484|  92.4k|            f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
  ------------------
  |  Branch (3484:38): [True: 6.47k, False: 85.9k]
  ------------------
 3485|  6.47k|                                     !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (3485:38): [True: 5.57k, False: 897]
  ------------------
 3486|  5.57k|                                     !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
  ------------------
  |  Branch (3486:38): [True: 4.64k, False: 932]
  ------------------
 3487|  4.64k|                                     !f->svc[i][0].scale;
  ------------------
  |  Branch (3487:38): [True: 3.61k, False: 1.02k]
  ------------------
 3488|  92.4k|        }
 3489|  15.1k|    }
 3490|       |
 3491|       |    // setup entropy
 3492|  41.8k|    if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  41.8k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3492:9): [True: 31.2k, False: 10.5k]
  ------------------
 3493|  31.2k|        dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
 3494|  31.2k|    } else {
 3495|  10.5k|        const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3496|  10.5k|        dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
 3497|  10.5k|    }
 3498|  41.8k|    if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3498:9): [True: 13.1k, False: 28.6k]
  ------------------
 3499|  13.1k|        res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1);
 3500|  13.1k|        if (res < 0) goto error;
  ------------------
  |  Branch (3500:13): [True: 0, False: 13.1k]
  ------------------
 3501|  13.1k|    }
 3502|       |
 3503|       |    // FIXME qsort so tiles are in order (for frame threading)
 3504|  41.8k|    if (f->n_tile_data_alloc < c->n_tile_data) {
  ------------------
  |  Branch (3504:9): [True: 7.96k, False: 33.8k]
  ------------------
 3505|  7.96k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  7.96k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3506|  7.96k|        assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
  ------------------
  |  Branch (3506:9): [True: 7.96k, False: 0]
  ------------------
 3507|  7.96k|        f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile));
  ------------------
  |  |  132|  7.96k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3508|  7.96k|        if (!f->tile) {
  ------------------
  |  Branch (3508:13): [True: 0, False: 7.96k]
  ------------------
 3509|      0|            f->n_tile_data_alloc = f->n_tile_data = 0;
 3510|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3511|      0|            goto error;
 3512|      0|        }
 3513|  7.96k|        f->n_tile_data_alloc = c->n_tile_data;
 3514|  7.96k|    }
 3515|  41.8k|    memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
 3516|  41.8k|    memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
 3517|  41.8k|    f->n_tile_data = c->n_tile_data;
 3518|  41.8k|    c->n_tile_data = 0;
 3519|       |
 3520|       |    // allocate frame
 3521|  41.8k|    res = dav1d_thread_picture_alloc(c, f, bpc);
 3522|  41.8k|    if (res < 0) goto error;
  ------------------
  |  Branch (3522:9): [True: 0, False: 41.8k]
  ------------------
 3523|       |
 3524|  41.8k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3524:9): [True: 4.11k, False: 37.6k]
  ------------------
 3525|  4.11k|        res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
 3526|  4.11k|        if (res < 0) goto error;
  ------------------
  |  Branch (3526:13): [True: 0, False: 4.11k]
  ------------------
 3527|  37.6k|    } else {
 3528|  37.6k|        dav1d_picture_ref(&f->cur, &f->sr_cur.p);
 3529|  37.6k|    }
 3530|       |
 3531|  41.8k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3531:9): [True: 4.11k, False: 37.6k]
  ------------------
 3532|  4.11k|        f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
  ------------------
  |  | 3474|  4.11k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3533|  4.11k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 3534|  4.11k|        const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
 3535|  4.11k|        const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 3536|  4.11k|        f->resize_step[1] = scale_fac(in_cw, out_cw);
  ------------------
  |  | 3474|  4.11k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3537|  4.11k|#undef scale_fac
 3538|  4.11k|        f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
 3539|  4.11k|        f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
 3540|  4.11k|    }
 3541|       |
 3542|       |    // move f->cur into output queue
 3543|  41.8k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3543:9): [True: 41.8k, False: 0]
  ------------------
 3544|  41.8k|        if (f->frame_hdr->show_frame || c->output_invisible_frames) {
  ------------------
  |  Branch (3544:13): [True: 38.2k, False: 3.60k]
  |  Branch (3544:41): [True: 0, False: 3.60k]
  ------------------
 3545|  38.2k|            dav1d_thread_picture_ref(&c->out, &f->sr_cur);
 3546|  38.2k|            c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
 3547|  38.2k|        }
 3548|  41.8k|    } else {
 3549|      0|        dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
 3550|      0|    }
 3551|       |
 3552|  41.8k|    f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
 3553|  41.8k|    f->h4 = (f->frame_hdr->height + 3) >> 2;
 3554|  41.8k|    f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
 3555|  41.8k|    f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
 3556|  41.8k|    f->sb128w = (f->bw + 31) >> 5;
 3557|  41.8k|    f->sb128h = (f->bh + 31) >> 5;
 3558|  41.8k|    f->sb_shift = 4 + f->seq_hdr->sb128;
 3559|  41.8k|    f->sb_step = 16 << f->seq_hdr->sb128;
 3560|  41.8k|    f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
 3561|  41.8k|    f->b4_stride = (f->bw + 31) & ~31;
 3562|  41.8k|    f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
 3563|  41.8k|    atomic_init(&f->task_thread.error, 0);
 3564|  41.8k|    const int uses_2pass = c->n_fc > 1;
 3565|  41.8k|    const int cols = f->frame_hdr->tiling.cols;
 3566|  41.8k|    const int rows = f->frame_hdr->tiling.rows;
 3567|  41.8k|    atomic_store(&f->task_thread.task_counter,
 3568|  41.8k|                 (cols * rows + f->sbh) << uses_2pass);
 3569|       |
 3570|       |    // ref_mvs
 3571|  41.8k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|  83.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 12.1k, False: 29.6k]
  |  |  ------------------
  ------------------
  |  Branch (3571:45): [True: 16.1k, False: 13.5k]
  ------------------
 3572|  28.2k|        f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
 3573|  28.2k|            sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
 3574|  28.2k|        if (!f->mvs_ref) {
  ------------------
  |  Branch (3574:13): [True: 0, False: 28.2k]
  ------------------
 3575|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3576|      0|            goto error;
 3577|      0|        }
 3578|  28.2k|        f->mvs = f->mvs_ref->data;
 3579|  28.2k|        if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3579:13): [True: 12.1k, False: 16.1k]
  ------------------
 3580|  96.8k|            for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (3580:29): [True: 84.7k, False: 12.1k]
  ------------------
 3581|  84.7k|                f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
 3582|  16.1k|        } else {
 3583|  16.1k|            memset(f->refpoc, 0, sizeof(f->refpoc));
 3584|  16.1k|        }
 3585|  28.2k|        if (f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (3585:13): [True: 7.76k, False: 20.5k]
  ------------------
 3586|  62.1k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3586:29): [True: 54.3k, False: 7.76k]
  ------------------
 3587|  54.3k|                const int refidx = f->frame_hdr->refidx[i];
 3588|  54.3k|                const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1;
 3589|  54.3k|                const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1;
 3590|  54.3k|                if (c->refs[refidx].refmvs != NULL &&
  ------------------
  |  Branch (3590:21): [True: 36.5k, False: 17.7k]
  ------------------
 3591|  36.5k|                    ref_w == f->bw && ref_h == f->bh)
  ------------------
  |  Branch (3591:21): [True: 35.5k, False: 970]
  |  Branch (3591:39): [True: 35.2k, False: 324]
  ------------------
 3592|  35.2k|                {
 3593|  35.2k|                    f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
 3594|  35.2k|                    dav1d_ref_inc(f->ref_mvs_ref[i]);
 3595|  35.2k|                    f->ref_mvs[i] = c->refs[refidx].refmvs->data;
 3596|  35.2k|                } else {
 3597|  19.0k|                    f->ref_mvs[i] = NULL;
 3598|  19.0k|                    f->ref_mvs_ref[i] = NULL;
 3599|  19.0k|                }
 3600|  54.3k|                memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
 3601|  54.3k|                       sizeof(*f->refrefpoc));
 3602|  54.3k|            }
 3603|  20.5k|        } else {
 3604|  20.5k|            memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3605|  20.5k|        }
 3606|  28.2k|    } else {
 3607|  13.5k|        f->mvs_ref = NULL;
 3608|  13.5k|        memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3609|  13.5k|    }
 3610|       |
 3611|       |    // segmap
 3612|  41.8k|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (3612:9): [True: 8.14k, False: 33.6k]
  ------------------
 3613|       |        // By default, the previous segmentation map is not initialised.
 3614|  8.14k|        f->prev_segmap_ref = NULL;
 3615|  8.14k|        f->prev_segmap = NULL;
 3616|       |
 3617|       |        // We might need a previous frame's segmentation map. This
 3618|       |        // happens if there is either no update or a temporal update.
 3619|  8.14k|        if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3619:13): [True: 575, False: 7.56k]
  |  Branch (3619:52): [True: 5.47k, False: 2.09k]
  ------------------
 3620|  6.05k|            const int pri_ref = f->frame_hdr->primary_ref_frame;
 3621|  6.05k|            assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (3621:13): [True: 6.05k, False: 0]
  ------------------
 3622|  6.05k|            const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
 3623|  6.05k|            const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
 3624|  6.05k|            if (ref_w == f->bw && ref_h == f->bh) {
  ------------------
  |  Branch (3624:17): [True: 5.57k, False: 478]
  |  Branch (3624:35): [True: 5.32k, False: 252]
  ------------------
 3625|  5.32k|                f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
 3626|  5.32k|                if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3626:21): [True: 4.70k, False: 616]
  ------------------
 3627|  4.70k|                    dav1d_ref_inc(f->prev_segmap_ref);
 3628|  4.70k|                    f->prev_segmap = f->prev_segmap_ref->data;
 3629|  4.70k|                }
 3630|  5.32k|            }
 3631|  6.05k|        }
 3632|       |
 3633|  8.14k|        if (f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3633:13): [True: 2.66k, False: 5.47k]
  ------------------
 3634|       |            // We're updating an existing map, but need somewhere to
 3635|       |            // put the new values. Allocate them here (the data
 3636|       |            // actually gets set elsewhere)
 3637|  2.66k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
 3638|  2.66k|                sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
 3639|  2.66k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3639:17): [True: 0, False: 2.66k]
  ------------------
 3640|      0|                dav1d_ref_dec(&f->prev_segmap_ref);
 3641|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3642|      0|                goto error;
 3643|      0|            }
 3644|  2.66k|            f->cur_segmap = f->cur_segmap_ref->data;
 3645|  5.47k|        } else if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3645:20): [True: 4.35k, False: 1.11k]
  ------------------
 3646|       |            // We're not updating an existing map, and we have a valid
 3647|       |            // reference. Use that.
 3648|  4.35k|            f->cur_segmap_ref = f->prev_segmap_ref;
 3649|  4.35k|            dav1d_ref_inc(f->cur_segmap_ref);
 3650|  4.35k|            f->cur_segmap = f->prev_segmap_ref->data;
 3651|  4.35k|        } else {
 3652|       |            // We need to make a new map. Allocate one here and zero it out.
 3653|  1.11k|            const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
 3654|  1.11k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
 3655|  1.11k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3655:17): [True: 0, False: 1.11k]
  ------------------
 3656|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3657|      0|                goto error;
 3658|      0|            }
 3659|  1.11k|            f->cur_segmap = f->cur_segmap_ref->data;
 3660|  1.11k|            memset(f->cur_segmap, 0, segmap_size);
 3661|  1.11k|        }
 3662|  33.6k|    } else {
 3663|  33.6k|        f->cur_segmap = NULL;
 3664|  33.6k|        f->cur_segmap_ref = NULL;
 3665|  33.6k|        f->prev_segmap_ref = NULL;
 3666|  33.6k|    }
 3667|       |
 3668|       |    // update references etc.
 3669|  41.8k|    const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
 3670|   376k|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3670:21): [True: 334k, False: 41.8k]
  ------------------
 3671|   334k|        if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3671:13): [True: 270k, False: 63.8k]
  ------------------
 3672|   270k|            if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3672:17): [True: 86.3k, False: 184k]
  ------------------
 3673|  86.3k|                dav1d_thread_picture_unref(&c->refs[i].p);
 3674|   270k|            dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
 3675|       |
 3676|   270k|            dav1d_cdf_thread_unref(&c->cdf[i]);
 3677|   270k|            if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3677:17): [True: 70.4k, False: 200k]
  ------------------
 3678|  70.4k|                dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
 3679|   200k|            } else {
 3680|   200k|                dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
 3681|   200k|            }
 3682|       |
 3683|   270k|            dav1d_ref_dec(&c->refs[i].segmap);
 3684|   270k|            c->refs[i].segmap = f->cur_segmap_ref;
 3685|   270k|            if (f->cur_segmap_ref)
  ------------------
  |  Branch (3685:17): [True: 43.8k, False: 226k]
  ------------------
 3686|  43.8k|                dav1d_ref_inc(f->cur_segmap_ref);
 3687|   270k|            dav1d_ref_dec(&c->refs[i].refmvs);
 3688|   270k|            if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3688:17): [True: 142k, False: 128k]
  ------------------
 3689|   142k|                c->refs[i].refmvs = f->mvs_ref;
 3690|   142k|                if (f->mvs_ref)
  ------------------
  |  Branch (3690:21): [True: 39.2k, False: 102k]
  ------------------
 3691|  39.2k|                    dav1d_ref_inc(f->mvs_ref);
 3692|   142k|            }
 3693|   270k|            memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
 3694|   270k|        }
 3695|   334k|    }
 3696|       |
 3697|  41.8k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3697:9): [True: 41.8k, False: 0]
  ------------------
 3698|  41.8k|        if ((res = dav1d_decode_frame(f)) < 0) {
  ------------------
  |  Branch (3698:13): [True: 23.8k, False: 17.9k]
  ------------------
 3699|  23.8k|            dav1d_thread_picture_unref(&c->out);
 3700|   214k|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3700:29): [True: 190k, False: 23.8k]
  ------------------
 3701|   190k|                if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3701:21): [True: 161k, False: 29.0k]
  ------------------
 3702|   161k|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3702:25): [True: 161k, False: 0]
  ------------------
 3703|   161k|                        dav1d_thread_picture_unref(&c->refs[i].p);
 3704|   161k|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 3705|   161k|                    dav1d_ref_dec(&c->refs[i].segmap);
 3706|   161k|                    dav1d_ref_dec(&c->refs[i].refmvs);
 3707|   161k|                }
 3708|   190k|            }
 3709|  23.8k|            goto error;
 3710|  23.8k|        }
 3711|  41.8k|    } else {
 3712|      0|        dav1d_task_frame_init(f);
 3713|      0|        pthread_mutex_unlock(&c->task_thread.lock);
 3714|      0|    }
 3715|       |
 3716|  17.9k|    return 0;
 3717|  27.0k|error:
 3718|  27.0k|    atomic_init(&f->task_thread.error, 1);
 3719|  27.0k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3720|  27.0k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3720:9): [True: 5.14k, False: 21.9k]
  ------------------
 3721|  5.14k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3722|   216k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3722:21): [True: 189k, False: 27.0k]
  ------------------
 3723|   189k|        if (f->refp[i].p.frame_hdr)
  ------------------
  |  Branch (3723:13): [True: 0, False: 189k]
  ------------------
 3724|      0|            dav1d_thread_picture_unref(&f->refp[i]);
 3725|   189k|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3726|   189k|    }
 3727|  27.0k|    if (c->n_fc == 1)
  ------------------
  |  Branch (3727:9): [True: 27.0k, False: 0]
  ------------------
 3728|  27.0k|        dav1d_thread_picture_unref(&c->out);
 3729|      0|    else
 3730|      0|        dav1d_thread_picture_unref(out_delayed);
 3731|  27.0k|    dav1d_picture_unref_internal(&f->cur);
 3732|  27.0k|    dav1d_thread_picture_unref(&f->sr_cur);
 3733|  27.0k|    dav1d_ref_dec(&f->mvs_ref);
 3734|  27.0k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3735|  27.0k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3736|  27.0k|    dav1d_data_props_copy(&c->cached_error_props, &c->in.m);
 3737|       |
 3738|  27.0k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3738:21): [True: 0, False: 27.0k]
  ------------------
 3739|      0|        dav1d_data_unref_internal(&f->tile[i].data);
 3740|  27.0k|    f->n_tile_data = 0;
 3741|       |
 3742|  27.0k|    if (c->n_fc > 1)
  ------------------
  |  Branch (3742:9): [True: 0, False: 27.0k]
  ------------------
 3743|      0|        pthread_mutex_unlock(&c->task_thread.lock);
 3744|       |
 3745|  27.0k|    return res;
 3746|  41.8k|}
decode.c:reset_context:
 2390|   489k|static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
 2391|   489k|    memset(ctx->intra, keyframe, sizeof(ctx->intra));
 2392|   489k|    memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
 2393|   489k|    if (keyframe)
  ------------------
  |  Branch (2393:9): [True: 398k, False: 91.1k]
  ------------------
 2394|   398k|        memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
 2395|       |
 2396|   489k|    if (pass == 2) return;
  ------------------
  |  Branch (2396:9): [True: 0, False: 489k]
  ------------------
 2397|       |
 2398|   489k|    memset(ctx->partition, 0, sizeof(ctx->partition));
 2399|   489k|    memset(ctx->skip, 0, sizeof(ctx->skip));
 2400|   489k|    memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
 2401|   489k|    memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
 2402|   489k|    memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
 2403|   489k|    memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
 2404|   489k|    memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
 2405|   489k|    if (!keyframe) {
  ------------------
  |  Branch (2405:9): [True: 91.1k, False: 398k]
  ------------------
 2406|  91.1k|        memset(ctx->ref, -1, sizeof(ctx->ref));
 2407|  91.1k|        memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
 2408|  91.1k|        memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
 2409|  91.1k|    }
 2410|   489k|    memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
 2411|   489k|    memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
 2412|   489k|    memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
 2413|   489k|    memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
 2414|   489k|    memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
 2415|   489k|}
decode.c:decode_sb:
 2119|  2.85M|{
 2120|  2.85M|    const Dav1dFrameContext *const f = t->f;
 2121|  2.85M|    Dav1dTileState *const ts = t->ts;
 2122|  2.85M|    const int hsz = 16 >> bl;
 2123|  2.85M|    const int have_h_split = f->bw > t->bx + hsz;
 2124|  2.85M|    const int have_v_split = f->bh > t->by + hsz;
 2125|       |
 2126|  2.85M|    if (!have_h_split && !have_v_split) {
  ------------------
  |  Branch (2126:9): [True: 100k, False: 2.75M]
  |  Branch (2126:26): [True: 45.3k, False: 54.6k]
  ------------------
 2127|  45.3k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2127:9): [True: 45.3k, False: 0]
  ------------------
 2128|  45.3k|        return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
  ------------------
  |  |   51|  45.3k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
 2129|  45.3k|    }
 2130|       |
 2131|  2.81M|    uint16_t *pc;
 2132|  2.81M|    enum BlockPartition bp;
 2133|  2.81M|    int ctx, bx8, by8;
 2134|  2.81M|    if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (2134:9): [True: 2.81M, False: 0]
  ------------------
 2135|  2.81M|        if (0 && bl == BL_64X64)
  ------------------
  |  Branch (2135:13): [Folded, False: 2.81M]
  |  Branch (2135:18): [True: 0, False: 0]
  ------------------
 2136|      0|            printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
 2137|      0|                   f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng);
 2138|  2.81M|        bx8 = (t->bx & 31) >> 1;
 2139|  2.81M|        by8 = (t->by & 31) >> 1;
 2140|  2.81M|        ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
 2141|  2.81M|        pc = ts->cdf.m.partition[bl][ctx];
 2142|  2.81M|    }
 2143|       |
 2144|  2.81M|    if (have_h_split && have_v_split) {
  ------------------
  |  Branch (2144:9): [True: 2.75M, False: 54.6k]
  |  Branch (2144:25): [True: 2.26M, False: 491k]
  ------------------
 2145|  2.26M|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2145:13): [True: 0, False: 2.26M]
  ------------------
 2146|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2147|      0|            bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
  ------------------
  |  Branch (2147:18): [True: 0, False: 0]
  ------------------
 2148|  2.26M|        } else {
 2149|  2.26M|            bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc,
  ------------------
  |  |   57|  2.26M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 2150|  2.26M|                                                  dav1d_partition_type_count[bl]);
 2151|  2.26M|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
  ------------------
  |  Branch (2151:17): [True: 12.4k, False: 2.25M]
  ------------------
 2152|  12.4k|                (bp == PARTITION_V || bp == PARTITION_V4 ||
  ------------------
  |  Branch (2152:18): [True: 433, False: 11.9k]
  |  Branch (2152:39): [True: 409, False: 11.5k]
  ------------------
 2153|  11.5k|                 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
  ------------------
  |  Branch (2153:18): [True: 159, False: 11.4k]
  |  Branch (2153:50): [True: 372, False: 11.0k]
  ------------------
 2154|  1.37k|            {
 2155|  1.37k|                return 1;
 2156|  1.37k|            }
 2157|  2.26M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.26M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.26M]
  |  |  ------------------
  |  |   35|  2.26M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.26M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2158|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2159|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
 2160|      0|                       ts->msac.rng);
 2161|  2.26M|        }
 2162|  2.26M|        const uint8_t *const b = dav1d_block_sizes[bl][bp];
 2163|       |
 2164|  2.26M|        switch (bp) {
 2165|   894k|        case PARTITION_NONE:
  ------------------
  |  Branch (2165:9): [True: 894k, False: 1.36M]
  ------------------
 2166|   894k|            if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
  ------------------
  |  Branch (2166:17): [True: 353, False: 893k]
  ------------------
 2167|    353|                return -1;
 2168|   893k|            break;
 2169|   893k|        case PARTITION_H:
  ------------------
  |  Branch (2169:9): [True: 259k, False: 2.00M]
  ------------------
 2170|   259k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
  ------------------
  |  Branch (2170:17): [True: 262, False: 259k]
  ------------------
 2171|    262|                return -1;
 2172|   259k|            t->by += hsz;
 2173|   259k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
  ------------------
  |  Branch (2173:17): [True: 272, False: 258k]
  ------------------
 2174|    272|                return -1;
 2175|   258k|            t->by -= hsz;
 2176|   258k|            break;
 2177|   170k|        case PARTITION_V:
  ------------------
  |  Branch (2177:9): [True: 170k, False: 2.09M]
  ------------------
 2178|   170k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
  ------------------
  |  Branch (2178:17): [True: 80, False: 169k]
  ------------------
 2179|     80|                return -1;
 2180|   169k|            t->bx += hsz;
 2181|   169k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
  ------------------
  |  Branch (2181:17): [True: 232, False: 169k]
  ------------------
 2182|    232|                return -1;
 2183|   169k|            t->bx -= hsz;
 2184|   169k|            break;
 2185|   536k|        case PARTITION_SPLIT:
  ------------------
  |  Branch (2185:9): [True: 536k, False: 1.72M]
  ------------------
 2186|   536k|            if (bl == BL_8X8) {
  ------------------
  |  Branch (2186:17): [True: 148k, False: 388k]
  ------------------
 2187|   148k|                const EdgeTip *const tip = (const EdgeTip *) node;
 2188|   148k|                assert(hsz == 1);
  ------------------
  |  Branch (2188:17): [True: 148k, False: 0]
  ------------------
 2189|   148k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2189:21): [True: 202, False: 147k]
  ------------------
 2190|    202|                    return -1;
 2191|   147k|                const enum Filter2d tl_filter = t->tl_4x4_filter;
 2192|   147k|                t->bx++;
 2193|   147k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
  ------------------
  |  Branch (2193:21): [True: 179, False: 147k]
  ------------------
 2194|    179|                    return -1;
 2195|   147k|                t->bx--;
 2196|   147k|                t->by++;
 2197|   147k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
  ------------------
  |  Branch (2197:21): [True: 209, False: 147k]
  ------------------
 2198|    209|                    return -1;
 2199|   147k|                t->bx++;
 2200|   147k|                t->tl_4x4_filter = tl_filter;
 2201|   147k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
  ------------------
  |  Branch (2201:21): [True: 195, False: 147k]
  ------------------
 2202|    195|                    return -1;
 2203|   147k|                t->bx--;
 2204|   147k|                t->by--;
 2205|   147k|#if ARCH_X86_64
 2206|   147k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (2206:21): [True: 0, False: 147k]
  ------------------
 2207|       |                    /* In 8-bit mode with 2-pass decoding the coefficient buffer
 2208|       |                     * can end up misaligned due to skips here. Work around
 2209|       |                     * the issue by explicitly realigning the buffer. */
 2210|      0|                    const int p = t->frame_thread.pass & 1;
 2211|      0|                    ts->frame_thread[p].cf =
 2212|      0|                        (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63);
 2213|      0|                }
 2214|   147k|#endif
 2215|   388k|            } else {
 2216|   388k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
  ------------------
  |  |   51|   388k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2216:21): [True: 1.44k, False: 386k]
  ------------------
 2217|  1.44k|                    return 1;
 2218|   386k|                t->bx += hsz;
 2219|   386k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
  ------------------
  |  |   51|   386k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2219:21): [True: 1.38k, False: 385k]
  ------------------
 2220|  1.38k|                    return 1;
 2221|   385k|                t->bx -= hsz;
 2222|   385k|                t->by += hsz;
 2223|   385k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
  ------------------
  |  |   51|   385k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2223:21): [True: 528, False: 384k]
  ------------------
 2224|    528|                    return 1;
 2225|   384k|                t->bx += hsz;
 2226|   384k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
  ------------------
  |  |   51|   384k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2226:21): [True: 1.10k, False: 383k]
  ------------------
 2227|  1.10k|                    return 1;
 2228|   383k|                t->bx -= hsz;
 2229|   383k|                t->by -= hsz;
 2230|   383k|            }
 2231|   531k|            break;
 2232|   531k|        case PARTITION_T_TOP_SPLIT: {
  ------------------
  |  Branch (2232:9): [True: 39.8k, False: 2.22M]
  ------------------
 2233|  39.8k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2233:17): [True: 196, False: 39.6k]
  ------------------
 2234|    196|                return -1;
 2235|  39.6k|            t->bx += hsz;
 2236|  39.6k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
  ------------------
  |  Branch (2236:17): [True: 197, False: 39.4k]
  ------------------
 2237|    197|                return -1;
 2238|  39.4k|            t->bx -= hsz;
 2239|  39.4k|            t->by += hsz;
 2240|  39.4k|            if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
  ------------------
  |  Branch (2240:17): [True: 194, False: 39.2k]
  ------------------
 2241|    194|                return -1;
 2242|  39.2k|            t->by -= hsz;
 2243|  39.2k|            break;
 2244|  39.4k|        }
 2245|  41.7k|        case PARTITION_T_BOTTOM_SPLIT: {
  ------------------
  |  Branch (2245:9): [True: 41.7k, False: 2.22M]
  ------------------
 2246|  41.7k|            if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
  ------------------
  |  Branch (2246:17): [True: 331, False: 41.4k]
  ------------------
 2247|    331|                return -1;
 2248|  41.4k|            t->by += hsz;
 2249|  41.4k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
  ------------------
  |  Branch (2249:17): [True: 69, False: 41.3k]
  ------------------
 2250|     69|                return -1;
 2251|  41.3k|            t->bx += hsz;
 2252|  41.3k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
  ------------------
  |  Branch (2252:17): [True: 228, False: 41.1k]
  ------------------
 2253|    228|                return -1;
 2254|  41.1k|            t->bx -= hsz;
 2255|  41.1k|            t->by -= hsz;
 2256|  41.1k|            break;
 2257|  41.3k|        }
 2258|  30.0k|        case PARTITION_T_LEFT_SPLIT: {
  ------------------
  |  Branch (2258:9): [True: 30.0k, False: 2.23M]
  ------------------
 2259|  30.0k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2259:17): [True: 327, False: 29.7k]
  ------------------
 2260|    327|                return -1;
 2261|  29.7k|            t->by += hsz;
 2262|  29.7k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
  ------------------
  |  Branch (2262:17): [True: 206, False: 29.5k]
  ------------------
 2263|    206|                return -1;
 2264|  29.5k|            t->by -= hsz;
 2265|  29.5k|            t->bx += hsz;
 2266|  29.5k|            if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
  ------------------
  |  Branch (2266:17): [True: 197, False: 29.3k]
  ------------------
 2267|    197|                return -1;
 2268|  29.3k|            t->bx -= hsz;
 2269|  29.3k|            break;
 2270|  29.5k|        }
 2271|  54.9k|        case PARTITION_T_RIGHT_SPLIT: {
  ------------------
  |  Branch (2271:9): [True: 54.9k, False: 2.20M]
  ------------------
 2272|  54.9k|            if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
  ------------------
  |  Branch (2272:17): [True: 200, False: 54.7k]
  ------------------
 2273|    200|                return -1;
 2274|  54.7k|            t->bx += hsz;
 2275|  54.7k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
  ------------------
  |  Branch (2275:17): [True: 196, False: 54.5k]
  ------------------
 2276|    196|                return -1;
 2277|  54.5k|            t->by += hsz;
 2278|  54.5k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
  ------------------
  |  Branch (2278:17): [True: 74, False: 54.4k]
  ------------------
 2279|     74|                return -1;
 2280|  54.4k|            t->by -= hsz;
 2281|  54.4k|            t->bx -= hsz;
 2282|  54.4k|            break;
 2283|  54.5k|        }
 2284|   102k|        case PARTITION_H4: {
  ------------------
  |  Branch (2284:9): [True: 102k, False: 2.15M]
  ------------------
 2285|   102k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2286|   102k|            if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
  ------------------
  |  Branch (2286:17): [True: 192, False: 102k]
  ------------------
 2287|    192|                return -1;
 2288|   102k|            t->by += hsz >> 1;
 2289|   102k|            if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
  ------------------
  |  Branch (2289:17): [True: 213, False: 102k]
  ------------------
 2290|    213|                return -1;
 2291|   102k|            t->by += hsz >> 1;
 2292|   102k|            if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
  ------------------
  |  Branch (2292:17): [True: 194, False: 102k]
  ------------------
 2293|    194|                return -1;
 2294|   102k|            t->by += hsz >> 1;
 2295|   102k|            if (t->by < f->bh)
  ------------------
  |  Branch (2295:17): [True: 98.2k, False: 3.98k]
  ------------------
 2296|  98.2k|                if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
  ------------------
  |  Branch (2296:21): [True: 230, False: 98.0k]
  ------------------
 2297|    230|                    return -1;
 2298|   102k|            t->by -= hsz * 3 >> 1;
 2299|   102k|            break;
 2300|   102k|        }
 2301|   133k|        case PARTITION_V4: {
  ------------------
  |  Branch (2301:9): [True: 133k, False: 2.12M]
  ------------------
 2302|   133k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2303|   133k|            if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
  ------------------
  |  Branch (2303:17): [True: 316, False: 132k]
  ------------------
 2304|    316|                return -1;
 2305|   132k|            t->bx += hsz >> 1;
 2306|   132k|            if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
  ------------------
  |  Branch (2306:17): [True: 292, False: 132k]
  ------------------
 2307|    292|                return -1;
 2308|   132k|            t->bx += hsz >> 1;
 2309|   132k|            if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
  ------------------
  |  Branch (2309:17): [True: 235, False: 132k]
  ------------------
 2310|    235|                return -1;
 2311|   132k|            t->bx += hsz >> 1;
 2312|   132k|            if (t->bx < f->bw)
  ------------------
  |  Branch (2312:17): [True: 128k, False: 3.26k]
  ------------------
 2313|   128k|                if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
  ------------------
  |  Branch (2313:21): [True: 195, False: 128k]
  ------------------
 2314|    195|                    return -1;
 2315|   132k|            t->bx -= hsz * 3 >> 1;
 2316|   132k|            break;
 2317|   132k|        }
 2318|      0|        default: assert(0);
  ------------------
  |  Branch (2318:9): [True: 0, False: 2.26M]
  |  Branch (2318:18): [Folded, False: 0]
  ------------------
 2319|  2.26M|        }
 2320|  2.26M|    } else if (have_h_split) {
  ------------------
  |  Branch (2320:16): [True: 491k, False: 54.6k]
  ------------------
 2321|   491k|        unsigned is_split;
 2322|   491k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2322:13): [True: 0, False: 491k]
  ------------------
 2323|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2324|      0|            is_split = b->bl != bl;
 2325|   491k|        } else {
 2326|   491k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|   491k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2327|   491k|                           gather_top_partition_prob(pc, bl));
 2328|   491k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   491k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 491k]
  |  |  ------------------
  |  |   35|   491k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   491k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2329|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2330|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2331|      0|                       is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng);
  ------------------
  |  Branch (2331:24): [True: 0, False: 0]
  ------------------
 2332|   491k|        }
 2333|       |
 2334|   491k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2334:9): [True: 491k, False: 0]
  ------------------
 2335|   491k|        if (is_split) {
  ------------------
  |  Branch (2335:13): [True: 322k, False: 169k]
  ------------------
 2336|   322k|            bp = PARTITION_SPLIT;
 2337|   322k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|   322k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2337:17): [True: 4.47k, False: 317k]
  ------------------
 2338|   317k|            t->bx += hsz;
 2339|   317k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
  ------------------
  |  |   51|   317k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2339:17): [True: 2.73k, False: 314k]
  ------------------
 2340|   314k|            t->bx -= hsz;
 2341|   314k|        } else {
 2342|   169k|            bp = PARTITION_H;
 2343|   169k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
  ------------------
  |  Branch (2343:17): [True: 249, False: 169k]
  ------------------
 2344|   169k|                         PARTITION_H, node->h[0]))
 2345|    249|                return -1;
 2346|   169k|        }
 2347|   491k|    } else {
 2348|  54.6k|        assert(have_v_split);
  ------------------
  |  Branch (2348:9): [True: 54.6k, False: 0]
  ------------------
 2349|  54.6k|        unsigned is_split;
 2350|  54.6k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2350:13): [True: 0, False: 54.6k]
  ------------------
 2351|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2352|      0|            is_split = b->bl != bl;
 2353|  54.6k|        } else {
 2354|  54.6k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|  54.6k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2355|  54.6k|                           gather_left_partition_prob(pc, bl));
 2356|  54.6k|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
  ------------------
  |  Branch (2356:17): [True: 1.36k, False: 53.3k]
  |  Branch (2356:63): [True: 245, False: 1.12k]
  ------------------
 2357|    245|                return 1;
 2358|  54.4k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  54.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 54.4k]
  |  |  ------------------
  |  |   35|  54.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  54.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2359|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2360|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2361|      0|                       is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng);
  ------------------
  |  Branch (2361:24): [True: 0, False: 0]
  ------------------
 2362|  54.4k|        }
 2363|       |
 2364|  54.6k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2364:9): [True: 54.4k, False: 0]
  ------------------
 2365|  54.4k|        if (is_split) {
  ------------------
  |  Branch (2365:13): [True: 31.1k, False: 23.2k]
  ------------------
 2366|  31.1k|            bp = PARTITION_SPLIT;
 2367|  31.1k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|  31.1k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2367:17): [True: 3.54k, False: 27.6k]
  ------------------
 2368|  27.6k|            t->by += hsz;
 2369|  27.6k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
  ------------------
  |  |   51|  27.6k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2369:17): [True: 1.76k, False: 25.8k]
  ------------------
 2370|  25.8k|            t->by -= hsz;
 2371|  25.8k|        } else {
 2372|  23.2k|            bp = PARTITION_V;
 2373|  23.2k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
  ------------------
  |  Branch (2373:17): [True: 337, False: 22.9k]
  ------------------
 2374|  23.2k|                         PARTITION_V, node->v[0]))
 2375|    337|                return -1;
 2376|  23.2k|        }
 2377|  54.4k|    }
 2378|       |
 2379|  2.78M|    if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
  ------------------
  |  Branch (2379:9): [True: 2.78M, False: 0]
  |  Branch (2379:39): [True: 1.91M, False: 871k]
  |  Branch (2379:64): [True: 147k, False: 724k]
  ------------------
 2380|  2.06M|#define set_ctx(rep_macro) \
 2381|  2.06M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
 2382|  2.06M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
 2383|  2.06M|        case_set_upto16(ulog2(hsz));
  ------------------
  |  |   80|  2.06M|    switch (var) { \
  |  |   81|   599k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 2381|   599k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   599k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   599k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   599k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   599k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   599k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 599k, False: 1.46M]
  |  |  ------------------
  |  |   82|   671k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 2381|   671k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   671k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   671k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   671k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   671k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   671k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 671k, False: 1.38M]
  |  |  ------------------
  |  |   83|   371k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 2381|   371k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   371k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   371k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   371k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   371k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   371k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 371k, False: 1.68M]
  |  |  ------------------
  |  |   84|   269k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 2381|   269k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   269k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   269k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   269k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   269k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   269k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 269k, False: 1.79M]
  |  |  ------------------
  |  |   85|   148k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 2381|   148k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   148k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   148k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   148k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   148k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 148k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   148k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   148k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   148k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   148k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   148k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 148k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 148k, False: 1.91M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 2.06M]
  |  |  ------------------
  |  |   87|  2.06M|    }
  ------------------
  |  Branch (2383:9): [Folded, False: 0]
  ------------------
 2384|  2.06M|#undef set_ctx
 2385|  2.06M|    }
 2386|       |
 2387|  2.78M|    return 0;
 2388|  2.78M|}
decode.c:decode_b:
  687|  3.96M|                    const enum EdgeFlags intra_edge_flags) {
  688|  3.96M|    Dav1dTileState *const ts = t->ts;
  689|  3.96M|    const Dav1dFrameContext *const f = t->f;
  690|  3.96M|    Av1Block b_mem, *const b = t->frame_thread.pass ?
  ------------------
  |  Branch (690:32): [True: 0, False: 3.96M]
  ------------------
  691|  3.96M|        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
  692|  3.96M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  693|  3.96M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  694|  3.96M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  695|  3.96M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  696|  3.96M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
  697|  3.96M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  698|  3.96M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
  699|  3.96M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
  700|  3.96M|    const int have_left = t->bx > ts->tiling.col_start;
  701|  3.96M|    const int have_top = t->by > ts->tiling.row_start;
  702|  3.96M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (702:28): [True: 2.42M, False: 1.54M]
  ------------------
  703|  2.42M|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (703:29): [True: 2.28M, False: 136k]
  |  Branch (703:45): [True: 68.1k, False: 68.1k]
  ------------------
  704|  2.35M|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (704:29): [True: 2.25M, False: 97.9k]
  |  Branch (704:45): [True: 48.9k, False: 48.9k]
  ------------------
  705|       |
  706|  3.96M|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (706:9): [True: 0, False: 3.96M]
  ------------------
  707|      0|        if (b->intra) {
  ------------------
  |  Branch (707:13): [True: 0, False: 0]
  ------------------
  708|      0|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
  709|       |
  710|      0|            const enum IntraPredMode y_mode_nofilt =
  711|      0|                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (711:17): [True: 0, False: 0]
  ------------------
  712|      0|#define set_ctx(rep_macro) \
  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  714|      0|            rep_macro(edge->intra, off, 1)
  715|      0|            BlockContext *edge = t->a;
  716|      0|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (716:40): [True: 0, False: 0]
  ------------------
  717|      0|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|      0|    switch (var) { \
  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   78|      0|    }
  ------------------
  |  Branch (717:17): [Folded, False: 0]
  ------------------
  718|      0|            }
  719|      0|#undef set_ctx
  720|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  721|      0|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  722|      0|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (722:33): [True: 0, False: 0]
  ------------------
  723|      0|                    r[x].ref.ref[0] = 0;
  724|      0|                    r[x].bs = bs;
  725|      0|                }
  726|      0|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  727|      0|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (727:33): [True: 0, False: 0]
  ------------------
  728|      0|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
  729|      0|                    rr[y][t->bx + bw4 - 1].bs = bs;
  730|      0|                }
  731|      0|            }
  732|       |
  733|      0|            if (has_chroma) {
  ------------------
  |  Branch (733:17): [True: 0, False: 0]
  ------------------
  734|      0|                uint8_t uv_mode = b->uv_mode;
  735|      0|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
  736|      0|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
  737|      0|            }
  738|      0|        } else {
  739|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  740|      0|                b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
  ------------------
  |  Branch (740:17): [True: 0, False: 0]
  |  Branch (740:52): [True: 0, False: 0]
  ------------------
  741|      0|            {
  742|      0|                if (b->matrix[0] == INT16_MIN) {
  ------------------
  |  Branch (742:21): [True: 0, False: 0]
  ------------------
  743|      0|                    t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
  744|      0|                } else {
  745|      0|                    t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
  746|      0|                    t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
  747|      0|                    t->warpmv.matrix[3] = b->matrix[1];
  748|      0|                    t->warpmv.matrix[4] = b->matrix[2];
  749|      0|                    t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
  750|      0|                    dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
  751|      0|                                          t->bx, t->by);
  752|      0|                    dav1d_get_shear_params(&t->warpmv);
  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  754|      0|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|      0|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 0]
  |  |  ------------------
  |  |   35|      0|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|      0|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  755|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
  756|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
  757|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  758|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  759|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  760|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  761|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  762|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  763|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  764|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  765|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  766|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  767|      0|                               b->mv2d.y, b->mv2d.x);
  768|      0|#undef signabs
  769|      0|                }
  770|      0|            }
  771|      0|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (771:17): [True: 0, False: 0]
  ------------------
  772|       |
  773|      0|            const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
  774|      0|            BlockContext *edge = t->a;
  775|      0|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (775:40): [True: 0, False: 0]
  ------------------
  776|      0|#define set_ctx(rep_macro) \
  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  779|      0|                rep_macro(edge->intra, off, 0)
  780|      0|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|      0|    switch (var) { \
  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   78|      0|    }
  ------------------
  |  Branch (780:17): [Folded, False: 0]
  ------------------
  781|      0|#undef set_ctx
  782|      0|            }
  783|       |
  784|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  785|      0|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  786|      0|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (786:33): [True: 0, False: 0]
  ------------------
  787|      0|                    r[x].ref.ref[0] = b->ref[0] + 1;
  788|      0|                    r[x].mv.mv[0] = b->mv[0];
  789|      0|                    r[x].bs = bs;
  790|      0|                }
  791|      0|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  792|      0|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (792:33): [True: 0, False: 0]
  ------------------
  793|      0|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
  794|      0|                    rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
  795|      0|                    rr[y][t->bx + bw4 - 1].bs = bs;
  796|      0|                }
  797|      0|            }
  798|       |
  799|      0|            if (has_chroma) {
  ------------------
  |  Branch (799:17): [True: 0, False: 0]
  ------------------
  800|      0|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
  801|      0|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
  802|      0|            }
  803|      0|        }
  804|      0|        return 0;
  805|      0|    }
  806|       |
  807|  3.96M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
  808|       |
  809|  3.96M|    b->bl = bl;
  810|  3.96M|    b->bp = bp;
  811|  3.96M|    b->bs = bs;
  812|       |
  813|  3.96M|    const Dav1dSegmentationData *seg = NULL;
  814|       |
  815|       |    // segment_id (if seg_feature for skip/ref/gmv is enabled)
  816|  3.96M|    int seg_pred = 0;
  817|  3.96M|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (817:9): [True: 1.27M, False: 2.69M]
  ------------------
  818|  1.27M|        if (!f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (818:13): [True: 269k, False: 1.00M]
  ------------------
  819|   269k|            if (f->prev_segmap) {
  ------------------
  |  Branch (819:17): [True: 180k, False: 89.0k]
  ------------------
  820|   180k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  821|   180k|                                                       f->prev_segmap,
  822|   180k|                                                       f->b4_stride);
  823|   180k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (823:21): [True: 0, False: 180k]
  ------------------
  824|   180k|                b->seg_id = seg_id;
  825|   180k|            } else {
  826|  89.0k|                b->seg_id = 0;
  827|  89.0k|            }
  828|   269k|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  829|  1.00M|        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
  ------------------
  |  Branch (829:20): [True: 524k, False: 482k]
  ------------------
  830|   524k|            if (f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (830:17): [True: 47.6k, False: 476k]
  ------------------
  831|  47.6k|                (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  47.6k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (831:17): [True: 22.1k, False: 25.4k]
  ------------------
  832|  47.6k|                                ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  833|  47.6k|                                t->l.seg_pred[by4]])))
  834|  22.1k|            {
  835|       |                // temporal predicted seg_id
  836|  22.1k|                if (f->prev_segmap) {
  ------------------
  |  Branch (836:21): [True: 5.11k, False: 17.0k]
  ------------------
  837|  5.11k|                    unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
  838|  5.11k|                                                           w4, h4,
  839|  5.11k|                                                           f->prev_segmap,
  840|  5.11k|                                                           f->b4_stride);
  841|  5.11k|                    if (seg_id >= 8) return -1;
  ------------------
  |  Branch (841:25): [True: 0, False: 5.11k]
  ------------------
  842|  5.11k|                    b->seg_id = seg_id;
  843|  17.0k|                } else {
  844|  17.0k|                    b->seg_id = 0;
  845|  17.0k|                }
  846|   502k|            } else {
  847|   502k|                int seg_ctx;
  848|   502k|                const unsigned pred_seg_id =
  849|   502k|                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  850|   502k|                                        &seg_ctx, f->cur_segmap, f->b4_stride);
  851|   502k|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   502k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  852|   502k|                                          ts->cdf.m.seg_id[seg_ctx],
  853|   502k|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|   502k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  854|   502k|                const unsigned last_active_seg_id =
  855|   502k|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  856|   502k|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  857|   502k|                                             last_active_seg_id + 1);
  858|   502k|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (858:21): [True: 27.7k, False: 474k]
  ------------------
  859|   502k|                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|   502k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (859:21): [True: 0, False: 502k]
  ------------------
  860|   502k|            }
  861|       |
  862|   524k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   524k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 524k]
  |  |  ------------------
  |  |   35|   524k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   524k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  863|      0|                printf("Post-segid[preskip;%d]: r=%d\n",
  864|      0|                       b->seg_id, ts->msac.rng);
  865|       |
  866|   524k|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  867|   524k|        }
  868|  2.69M|    } else {
  869|  2.69M|        b->seg_id = 0;
  870|  2.69M|    }
  871|       |
  872|       |    // skip_mode
  873|  3.96M|    if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
  ------------------
  |  Branch (873:10): [True: 3.17M, False: 794k]
  |  Branch (873:19): [True: 465k, False: 328k]
  |  Branch (873:37): [True: 181k, False: 283k]
  |  Branch (873:55): [True: 85.3k, False: 96.6k]
  ------------------
  874|  3.25M|        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (874:9): [True: 113k, False: 3.14M]
  |  Branch (874:44): [True: 91.0k, False: 22.4k]
  ------------------
  875|  91.0k|    {
  876|  91.0k|        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
  877|  91.0k|        b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  91.0k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  878|  91.0k|                           ts->cdf.m.skip_mode[smctx]);
  879|  91.0k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  91.0k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 91.0k]
  |  |  ------------------
  |  |   35|  91.0k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  91.0k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  880|      0|            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
  881|  3.87M|    } else {
  882|  3.87M|        b->skip_mode = 0;
  883|  3.87M|    }
  884|       |
  885|       |    // skip
  886|  3.96M|    if (b->skip_mode || (seg && seg->skip)) {
  ------------------
  |  Branch (886:9): [True: 23.8k, False: 3.94M]
  |  Branch (886:26): [True: 794k, False: 3.14M]
  |  Branch (886:33): [True: 614k, False: 179k]
  ------------------
  887|   638k|        b->skip = 1;
  888|  3.32M|    } else {
  889|  3.32M|        const int sctx = t->a->skip[bx4] + t->l.skip[by4];
  890|  3.32M|        b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
  ------------------
  |  |   52|  3.32M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  891|  3.32M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  3.32M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.32M]
  |  |  ------------------
  |  |   35|  3.32M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.32M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  892|      0|            printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
  893|  3.32M|    }
  894|       |
  895|       |    // segment_id
  896|  3.96M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (896:9): [True: 1.27M, False: 2.69M]
  ------------------
  897|  1.27M|        f->frame_hdr->segmentation.update_map &&
  ------------------
  |  Branch (897:9): [True: 1.00M, False: 269k]
  ------------------
  898|  1.00M|        !f->frame_hdr->segmentation.seg_data.preskip)
  ------------------
  |  Branch (898:9): [True: 482k, False: 524k]
  ------------------
  899|   482k|    {
  900|   482k|        if (!b->skip && f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (900:13): [True: 208k, False: 273k]
  |  Branch (900:25): [True: 8.32k, False: 200k]
  ------------------
  901|  8.32k|            (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  8.32k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (901:13): [True: 4.04k, False: 4.28k]
  ------------------
  902|  8.32k|                            ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  903|  8.32k|                            t->l.seg_pred[by4]])))
  904|  4.04k|        {
  905|       |            // temporal predicted seg_id
  906|  4.04k|            if (f->prev_segmap) {
  ------------------
  |  Branch (906:17): [True: 1.34k, False: 2.70k]
  ------------------
  907|  1.34k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  908|  1.34k|                                                       f->prev_segmap,
  909|  1.34k|                                                       f->b4_stride);
  910|  1.34k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (910:21): [True: 0, False: 1.34k]
  ------------------
  911|  1.34k|                b->seg_id = seg_id;
  912|  2.70k|            } else {
  913|  2.70k|                b->seg_id = 0;
  914|  2.70k|            }
  915|   478k|        } else {
  916|   478k|            int seg_ctx;
  917|   478k|            const unsigned pred_seg_id =
  918|   478k|                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  919|   478k|                                    &seg_ctx, f->cur_segmap, f->b4_stride);
  920|   478k|            if (b->skip) {
  ------------------
  |  Branch (920:17): [True: 273k, False: 204k]
  ------------------
  921|   273k|                b->seg_id = pred_seg_id;
  922|   273k|            } else {
  923|   204k|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   204k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  924|   204k|                                          ts->cdf.m.seg_id[seg_ctx],
  925|   204k|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|   204k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  926|   204k|                const unsigned last_active_seg_id =
  927|   204k|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  928|   204k|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  929|   204k|                                             last_active_seg_id + 1);
  930|   204k|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (930:21): [True: 3.85k, False: 200k]
  ------------------
  931|   204k|            }
  932|   478k|            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|   478k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (932:17): [True: 7.37k, False: 470k]
  ------------------
  933|   478k|        }
  934|       |
  935|   482k|        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  936|       |
  937|   482k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   482k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 482k]
  |  |  ------------------
  |  |   35|   482k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   482k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  938|      0|            printf("Post-segid[postskip;%d]: r=%d\n",
  939|      0|                   b->seg_id, ts->msac.rng);
  940|   482k|    }
  941|       |
  942|       |    // cdef index
  943|  3.96M|    if (!b->skip) {
  ------------------
  |  Branch (943:9): [True: 2.05M, False: 1.90M]
  ------------------
  944|  2.05M|        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
  ------------------
  |  Branch (944:25): [True: 918k, False: 1.13M]
  ------------------
  945|  1.13M|                                           ((t->by & 16) >> 3) : 0;
  946|  2.05M|        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
  ------------------
  |  Branch (946:13): [True: 334k, False: 1.72M]
  ------------------
  947|   334k|            const int v = dav1d_msac_decode_bools(&ts->msac,
  948|   334k|                              f->frame_hdr->cdef.n_bits);
  949|   334k|            t->cur_sb_cdef_idx_ptr[idx] = v;
  950|   334k|            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
  ------------------
  |  Branch (950:17): [True: 32.4k, False: 302k]
  ------------------
  951|   334k|            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
  ------------------
  |  Branch (951:17): [True: 25.4k, False: 309k]
  ------------------
  952|   334k|            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
  ------------------
  |  Branch (952:17): [True: 32.4k, False: 302k]
  |  Branch (952:30): [True: 22.0k, False: 10.3k]
  ------------------
  953|       |
  954|   334k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   334k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 334k]
  |  |  ------------------
  |  |   35|   334k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   334k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  955|      0|                printf("Post-cdef_idx[%d]: r=%d\n",
  956|      0|                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
  957|   334k|        }
  958|  2.05M|    }
  959|       |
  960|       |    // delta-q/lf
  961|  3.96M|    if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) {
  ------------------
  |  Branch (961:9): [True: 565k, False: 3.40M]
  ------------------
  962|   565k|        const int prev_qidx = ts->last_qidx;
  963|   565k|        const int have_delta_q = f->frame_hdr->delta.q.present &&
  ------------------
  |  Branch (963:34): [True: 199k, False: 365k]
  ------------------
  964|   199k|            (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
  ------------------
  |  Branch (964:14): [True: 114k, False: 85.2k]
  |  Branch (964:21): [True: 20.9k, False: 178k]
  |  Branch (964:67): [True: 9.01k, False: 76.2k]
  ------------------
  965|       |
  966|   565k|        uint32_t prev_delta_lf = ts->last_delta_lf.u32;
  967|       |
  968|   565k|        if (have_delta_q) {
  ------------------
  |  Branch (968:13): [True: 123k, False: 441k]
  ------------------
  969|   123k|            int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   123k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  970|   123k|                                                          ts->cdf.m.delta_q, 3);
  971|   123k|            if (delta_q == 3) {
  ------------------
  |  Branch (971:17): [True: 24.7k, False: 98.5k]
  ------------------
  972|  24.7k|                const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  973|  24.7k|                delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  974|  24.7k|                          1 + (1 << n_bits);
  975|  24.7k|            }
  976|   123k|            if (delta_q) {
  ------------------
  |  Branch (976:17): [True: 36.0k, False: 87.3k]
  ------------------
  977|  36.0k|                if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
  ------------------
  |  |   53|  36.0k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (977:21): [True: 29.0k, False: 6.91k]
  ------------------
  978|  36.0k|                delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
  979|  36.0k|            }
  980|   123k|            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
  981|   123k|            if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   123k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 123k]
  |  |  ------------------
  |  |   35|   123k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   123k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (981:17): [True: 123k, False: 0]
  ------------------
  982|      0|                printf("Post-delta_q[%d->%d]: r=%d\n",
  983|      0|                       delta_q, ts->last_qidx, ts->msac.rng);
  984|       |
  985|   123k|            if (f->frame_hdr->delta.lf.present) {
  ------------------
  |  Branch (985:17): [True: 43.8k, False: 79.4k]
  ------------------
  986|  43.8k|                const int n_lfs = f->frame_hdr->delta.lf.multi ?
  ------------------
  |  Branch (986:35): [True: 36.1k, False: 7.66k]
  ------------------
  987|  36.1k|                    f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
  ------------------
  |  Branch (987:21): [True: 28.0k, False: 8.08k]
  ------------------
  988|       |
  989|   180k|                for (int i = 0; i < n_lfs; i++) {
  ------------------
  |  Branch (989:33): [True: 136k, False: 43.8k]
  ------------------
  990|   136k|                    int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   136k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  991|   136k|                        ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
  992|   136k|                    if (delta_lf == 3) {
  ------------------
  |  Branch (992:25): [True: 9.52k, False: 126k]
  ------------------
  993|  9.52k|                        const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  994|  9.52k|                        delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  995|  9.52k|                                   1 + (1 << n_bits);
  996|  9.52k|                    }
  997|   136k|                    if (delta_lf) {
  ------------------
  |  Branch (997:25): [True: 28.4k, False: 107k]
  ------------------
  998|  28.4k|                        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|  28.4k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (998:29): [True: 19.0k, False: 9.36k]
  ------------------
  999|  19.0k|                            delta_lf = -delta_lf;
 1000|  28.4k|                        delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
 1001|  28.4k|                    }
 1002|   136k|                    ts->last_delta_lf.i8[i] =
 1003|   136k|                        iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
 1004|   136k|                    if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   136k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 136k]
  |  |  ------------------
  |  |   35|   136k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   136k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1004:25): [True: 136k, False: 0]
  ------------------
 1005|      0|                        printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
 1006|      0|                               ts->msac.rng);
 1007|   136k|                }
 1008|  43.8k|            }
 1009|   123k|        }
 1010|   565k|        if (ts->last_qidx == f->frame_hdr->quant.yac) {
  ------------------
  |  Branch (1010:13): [True: 471k, False: 93.5k]
  ------------------
 1011|       |            // assign frame-wide q values to this sb
 1012|   471k|            ts->dq = f->dq;
 1013|   471k|        } else if (ts->last_qidx != prev_qidx) {
  ------------------
  |  Branch (1013:20): [True: 10.4k, False: 83.0k]
  ------------------
 1014|       |            // find sb-specific quant parameters
 1015|  10.4k|            init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
 1016|  10.4k|            ts->dq = ts->dqmem;
 1017|  10.4k|        }
 1018|   565k|        if (!ts->last_delta_lf.u32) {
  ------------------
  |  Branch (1018:13): [True: 520k, False: 44.7k]
  ------------------
 1019|       |            // assign frame-wide lf values to this sb
 1020|   520k|            ts->lflvl = f->lf.lvl;
 1021|   520k|        } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
  ------------------
  |  Branch (1021:20): [True: 15.1k, False: 29.5k]
  ------------------
 1022|       |            // find sb-specific lf lvl parameters
 1023|  15.1k|            ts->lflvl = ts->lflvlmem;
 1024|  15.1k|            dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
 1025|  15.1k|        }
 1026|   565k|    }
 1027|       |
 1028|  3.96M|    if (b->skip_mode) {
  ------------------
  |  Branch (1028:9): [True: 23.8k, False: 3.94M]
  ------------------
 1029|  23.8k|        b->intra = 0;
 1030|  3.94M|    } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  3.94M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 1.22M, False: 2.72M]
  |  |  ------------------
  ------------------
 1031|  1.22M|        if (seg && (seg->ref >= 0 || seg->globalmv)) {
  ------------------
  |  Branch (1031:13): [True: 424k, False: 796k]
  |  Branch (1031:21): [True: 301k, False: 123k]
  |  Branch (1031:38): [True: 46.2k, False: 76.8k]
  ------------------
 1032|   347k|            b->intra = !seg->ref;
 1033|   873k|        } else {
 1034|   873k|            const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
 1035|   873k|                                           have_top, have_left);
 1036|   873k|            b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   873k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1037|   873k|                            ts->cdf.m.intra[ictx]);
 1038|   873k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   873k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 873k]
  |  |  ------------------
  |  |   35|   873k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   873k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1039|      0|                printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
 1040|   873k|        }
 1041|  2.72M|    } else if (f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (1041:16): [True: 2.08M, False: 636k]
  ------------------
 1042|  2.08M|        b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
  ------------------
  |  |   52|  2.08M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1043|  2.08M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.08M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.08M]
  |  |  ------------------
  |  |   35|  2.08M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.08M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1044|      0|            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
 1045|  2.08M|    } else {
 1046|   636k|        b->intra = 1;
 1047|   636k|    }
 1048|       |
 1049|       |    // intra/inter-specific stuff
 1050|  3.96M|    if (b->intra) {
  ------------------
  |  Branch (1050:9): [True: 2.20M, False: 1.76M]
  ------------------
 1051|  2.20M|        uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
  ------------------
  |  |   36|  2.20M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 103k, False: 2.10M]
  |  |  ------------------
  ------------------
 1052|   103k|            ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
 1053|  2.20M|            ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
 1054|  2.10M|                        [dav1d_intra_mode_context[t->l.mode[by4]]];
 1055|  2.20M|        b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
  ------------------
  |  |   57|  2.20M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1056|  2.20M|                                                     N_INTRA_PRED_MODES - 1);
 1057|  2.20M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.20M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.20M]
  |  |  ------------------
  |  |   35|  2.20M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.20M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1058|      0|            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
 1059|       |
 1060|       |        // angle delta
 1061|  2.20M|        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
  ------------------
  |  Branch (1061:13): [True: 1.74M, False: 460k]
  |  Branch (1061:41): [True: 900k, False: 845k]
  ------------------
 1062|   900k|            b->y_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1062:13): [True: 490k, False: 409k]
  ------------------
 1063|   490k|        {
 1064|   490k|            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
 1065|   490k|            const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|   490k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1066|   490k|            b->y_angle = angle - 3;
 1067|  1.71M|        } else {
 1068|  1.71M|            b->y_angle = 0;
 1069|  1.71M|        }
 1070|       |
 1071|  2.20M|        if (has_chroma) {
  ------------------
  |  Branch (1071:13): [True: 1.48M, False: 722k]
  ------------------
 1072|  1.48M|            const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
  ------------------
  |  Branch (1072:37): [True: 38.5k, False: 1.44M]
  ------------------
 1073|  1.44M|                cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
  ------------------
  |  Branch (1073:17): [True: 17.3k, False: 21.1k]
  |  Branch (1073:30): [True: 11.1k, False: 6.19k]
  ------------------
 1074|  1.48M|            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
 1075|  1.48M|            b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
  ------------------
  |  |   57|  1.48M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1076|  1.48M|                             N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
 1077|  1.48M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.48M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.48M]
  |  |  ------------------
  |  |   35|  1.48M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.48M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1078|      0|                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
 1079|       |
 1080|  1.48M|            b->uv_angle = 0;
 1081|  1.48M|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1081:17): [True: 304k, False: 1.17M]
  ------------------
 1082|   304k|#define SIGN(a) (!!(a) + ((a) > 0))
 1083|   304k|                const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   304k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1084|   304k|                                     ts->cdf.m.cfl_sign, 7) + 1;
 1085|   304k|                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
 1086|   304k|                assert(sign_u == sign / 3);
  ------------------
  |  Branch (1086:17): [True: 304k, False: 0]
  ------------------
 1087|   304k|                if (sign_u) {
  ------------------
  |  Branch (1087:21): [True: 282k, False: 21.9k]
  ------------------
 1088|   282k|                    const int ctx = (sign_u == 2) * 3 + sign_v;
 1089|   282k|                    b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   282k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1090|   282k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1091|   282k|                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
  ------------------
  |  Branch (1091:25): [True: 206k, False: 76.0k]
  ------------------
 1092|   282k|                } else {
 1093|  21.9k|                    b->cfl_alpha[0] = 0;
 1094|  21.9k|                }
 1095|   304k|                if (sign_v) {
  ------------------
  |  Branch (1095:21): [True: 203k, False: 101k]
  ------------------
 1096|   203k|                    const int ctx = (sign_v == 2) * 3 + sign_u;
 1097|   203k|                    b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   203k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1098|   203k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1099|   203k|                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
  ------------------
  |  Branch (1099:25): [True: 89.0k, False: 114k]
  ------------------
 1100|   203k|                } else {
 1101|   101k|                    b->cfl_alpha[1] = 0;
 1102|   101k|                }
 1103|   304k|#undef SIGN
 1104|   304k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   304k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 304k]
  |  |  ------------------
  |  |   35|   304k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   304k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1105|      0|                    printf("Post-uvalphas[%d/%d]: r=%d\n",
 1106|      0|                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
 1107|  1.17M|            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
  ------------------
  |  Branch (1107:24): [True: 988k, False: 190k]
  |  Branch (1107:52): [True: 583k, False: 404k]
  ------------------
 1108|   583k|                       b->uv_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1108:24): [True: 297k, False: 285k]
  ------------------
 1109|   297k|            {
 1110|   297k|                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
 1111|   297k|                const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|   297k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1112|   297k|                b->uv_angle = angle - 3;
 1113|   297k|            }
 1114|  1.48M|        }
 1115|       |
 1116|  2.20M|        b->pal_sz[0] = b->pal_sz[1] = 0;
 1117|  2.20M|        if (f->frame_hdr->allow_screen_content_tools &&
  ------------------
  |  Branch (1117:13): [True: 1.62M, False: 576k]
  ------------------
 1118|  1.62M|            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
  ------------------
  |  Branch (1118:13): [True: 1.55M, False: 77.8k]
  |  Branch (1118:37): [True: 1.28M, False: 264k]
  ------------------
 1119|  1.28M|        {
 1120|  1.28M|            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
 1121|  1.28M|            if (b->y_mode == DC_PRED) {
  ------------------
  |  Branch (1121:17): [True: 638k, False: 649k]
  ------------------
 1122|   638k|                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
 1123|   638k|                const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   638k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1124|   638k|                                          ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
 1125|   638k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   638k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 638k]
  |  |  ------------------
  |  |   35|   638k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   638k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1126|      0|                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
 1127|   638k|                if (use_y_pal)
  ------------------
  |  Branch (1127:21): [True: 72.2k, False: 566k]
  ------------------
 1128|  72.2k|                    f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
 1129|   638k|            }
 1130|       |
 1131|  1.28M|            if (has_chroma && b->uv_mode == DC_PRED) {
  ------------------
  |  Branch (1131:17): [True: 978k, False: 309k]
  |  Branch (1131:31): [True: 307k, False: 670k]
  ------------------
 1132|   307k|                const int pal_ctx = b->pal_sz[0] > 0;
 1133|   307k|                const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   307k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1134|   307k|                                           ts->cdf.m.pal_uv[pal_ctx]);
 1135|   307k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   307k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 307k]
  |  |  ------------------
  |  |   35|   307k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   307k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1136|      0|                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
 1137|   307k|                if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
  ------------------
  |  Branch (1137:21): [True: 18.4k, False: 289k]
  ------------------
 1138|  18.4k|                    f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
 1139|   307k|            }
 1140|  1.28M|        }
 1141|       |
 1142|  2.20M|        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
  ------------------
  |  Branch (1142:13): [True: 1.00M, False: 1.20M]
  |  Branch (1142:37): [True: 931k, False: 72.2k]
  ------------------
 1143|   931k|            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
  ------------------
  |  Branch (1143:13): [True: 711k, False: 220k]
  |  Branch (1143:46): [True: 471k, False: 239k]
  ------------------
 1144|   471k|        {
 1145|   471k|            const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   471k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1146|   471k|                                      ts->cdf.m.use_filter_intra[bs]);
 1147|   471k|            if (is_filter) {
  ------------------
  |  Branch (1147:17): [True: 314k, False: 157k]
  ------------------
 1148|   314k|                b->y_mode = FILTER_PRED;
 1149|   314k|                b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   314k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1150|   314k|                                 ts->cdf.m.filter_intra, 4);
 1151|   314k|            }
 1152|   471k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   471k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 471k]
  |  |  ------------------
  |  |   35|   471k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   471k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1153|      0|                printf("Post-filterintramode[%d/%d]: r=%d\n",
 1154|      0|                       b->y_mode, b->y_angle, ts->msac.rng);
 1155|   471k|        }
 1156|       |
 1157|  2.20M|        if (b->pal_sz[0]) {
  ------------------
  |  Branch (1157:13): [True: 72.2k, False: 2.13M]
  ------------------
 1158|  72.2k|            uint8_t *pal_idx;
 1159|  72.2k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1159:17): [True: 0, False: 72.2k]
  ------------------
 1160|      0|                const int p = t->frame_thread.pass & 1;
 1161|      0|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1161:17): [True: 0, False: 0]
  ------------------
 1162|      0|                pal_idx = ts->frame_thread[p].pal_idx;
 1163|      0|                ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1164|      0|            } else
 1165|  72.2k|                pal_idx = t->scratch.pal_idx_y;
 1166|  72.2k|            read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
 1167|  72.2k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  72.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 72.2k]
  |  |  ------------------
  |  |   35|  72.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  72.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1168|      0|                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
 1169|  72.2k|        }
 1170|       |
 1171|  2.20M|        if (has_chroma && b->pal_sz[1]) {
  ------------------
  |  Branch (1171:13): [True: 1.48M, False: 722k]
  |  Branch (1171:27): [True: 18.4k, False: 1.46M]
  ------------------
 1172|  18.4k|            uint8_t *pal_idx;
 1173|  18.4k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1173:17): [True: 0, False: 18.4k]
  ------------------
 1174|      0|                const int p = t->frame_thread.pass & 1;
 1175|      0|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1175:17): [True: 0, False: 0]
  ------------------
 1176|      0|                pal_idx = ts->frame_thread[p].pal_idx;
 1177|      0|                ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1178|      0|            } else
 1179|  18.4k|                pal_idx = t->scratch.pal_idx_uv;
 1180|  18.4k|            read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
 1181|  18.4k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4k]
  |  |  ------------------
  |  |   35|  18.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1182|      0|                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
 1183|  18.4k|        }
 1184|       |
 1185|  2.20M|        const TxfmInfo *t_dim;
 1186|  2.20M|        if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1186:13): [True: 48.6k, False: 2.15M]
  ------------------
 1187|  48.6k|            b->tx = b->uvtx = (int) TX_4X4;
 1188|  48.6k|            t_dim = &dav1d_txfm_dimensions[TX_4X4];
 1189|  2.15M|        } else {
 1190|  2.15M|            b->tx = dav1d_max_txfm_size_for_bs[bs][0];
 1191|  2.15M|            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
 1192|  2.15M|            t_dim = &dav1d_txfm_dimensions[b->tx];
 1193|  2.15M|            if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
  ------------------
  |  Branch (1193:17): [True: 605k, False: 1.55M]
  |  Branch (1193:67): [True: 480k, False: 125k]
  ------------------
 1194|   480k|                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
 1195|   480k|                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
 1196|   480k|                int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
  ------------------
  |  |   47|   480k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1197|   480k|                                imin(t_dim->max, 2));
 1198|       |
 1199|   923k|                while (depth--) {
  ------------------
  |  Branch (1199:24): [True: 442k, False: 480k]
  ------------------
 1200|   442k|                    b->tx = t_dim->sub;
 1201|   442k|                    t_dim = &dav1d_txfm_dimensions[b->tx];
 1202|   442k|                }
 1203|   480k|            }
 1204|  2.15M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.15M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.15M]
  |  |  ------------------
  |  |   35|  2.15M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.15M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1205|      0|                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
 1206|  2.15M|        }
 1207|       |
 1208|       |        // reconstruction
 1209|  2.20M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1209:13): [True: 0, False: 2.20M]
  ------------------
 1210|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1211|  2.20M|        } else {
 1212|  2.20M|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
 1213|  2.20M|        }
 1214|       |
 1215|  2.20M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1215:13): [True: 339k, False: 1.86M]
  ------------------
 1216|  1.86M|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1216:13): [True: 107k, False: 1.75M]
  ------------------
 1217|   447k|        {
 1218|   447k|            dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
 1219|   447k|                                       (const uint8_t (*)[8][2])
 1220|   447k|                                       &ts->lflvl[b->seg_id][0][0][0],
 1221|   447k|                                       t->bx, t->by, f->w4, f->h4, bs,
 1222|   447k|                                       b->tx, b->uvtx, f->cur.p.layout,
 1223|   447k|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1224|   447k|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1224:40): [True: 268k, False: 178k]
  ------------------
 1225|   447k|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1225:40): [True: 268k, False: 178k]
  ------------------
 1226|   447k|        }
 1227|       |        // update contexts
 1228|  2.20M|        const enum IntraPredMode y_mode_nofilt =
 1229|  2.20M|            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (1229:13): [True: 314k, False: 1.89M]
  ------------------
 1230|  2.20M|        BlockContext *edge = t->a;
 1231|  6.61M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1231:36): [True: 4.41M, False: 2.20M]
  ------------------
 1232|  4.41M|            int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
 1233|  4.41M|#define set_ctx(rep_macro) \
 1234|  4.41M|            rep_macro(edge->tx_intra, off, t_lsz); \
 1235|  4.41M|            rep_macro(edge->tx, off, t_lsz); \
 1236|  4.41M|            rep_macro(edge->mode, off, y_mode_nofilt); \
 1237|  4.41M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
 1238|  4.41M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1239|  4.41M|            rep_macro(edge->skip_mode, off, 0); \
 1240|  4.41M|            rep_macro(edge->intra, off, 1); \
 1241|  4.41M|            rep_macro(edge->skip, off, b->skip); \
 1242|       |            /* see aomedia bug 2183 for why we use luma coordinates here */ \
 1243|  4.41M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
 1244|  4.41M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
 1245|  4.41M|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
 1246|  4.41M|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
 1247|  4.41M|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
 1248|  4.41M|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1249|  4.41M|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1250|  4.41M|            }
 1251|  4.41M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  4.41M|    switch (var) { \
  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1234|   935k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   935k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   935k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   935k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   935k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   935k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   935k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   935k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   935k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   935k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   935k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   935k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.87M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (56:43): [True: 544k, False: 391k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   935k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   935k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 44.4k, False: 891k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  44.4k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  44.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  44.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  44.4k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  44.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  44.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  44.4k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  44.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  44.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  44.4k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  44.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  44.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  44.4k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  44.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  44.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  44.4k|            }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 935k, False: 3.47M]
  |  |  ------------------
  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.23M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.23M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.23M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.23M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.23M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.23M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.23M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.23M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.23M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.23M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.23M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.23M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.47M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (58:45): [True: 912k, False: 326k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.23M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.23M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 77.4k, False: 1.16M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  77.4k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  77.4k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  77.4k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  77.4k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  77.4k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  77.4k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  77.4k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  77.4k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  77.4k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  77.4k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  77.4k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  77.4k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  77.4k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  77.4k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  77.4k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  77.4k|            }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 1.23M, False: 3.17M]
  |  |  ------------------
  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.10M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.10M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.10M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.10M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.10M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.10M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.10M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.10M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.10M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.10M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.10M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.10M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.20M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (60:45): [True: 834k, False: 265k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.10M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.10M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 55.4k, False: 1.04M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  55.4k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  55.4k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  55.4k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  55.4k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  55.4k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  55.4k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  55.4k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  55.4k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  55.4k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  55.4k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  55.4k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  55.4k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  55.4k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  55.4k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  55.4k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  55.4k|            }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 1.10M, False: 3.31M]
  |  |  ------------------
  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1234|   592k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   592k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   592k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   592k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   592k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   592k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   592k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   592k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   592k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   592k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   592k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   592k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.18M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (62:45): [True: 412k, False: 180k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   592k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   592k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 17.7k, False: 574k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  17.7k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  17.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  17.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  17.7k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  17.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  17.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  17.7k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  17.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  17.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  17.7k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  17.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  17.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  17.7k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  17.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  17.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  17.7k|            }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 592k, False: 3.81M]
  |  |  ------------------
  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1234|   374k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   374k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   374k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   374k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   374k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   374k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   374k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   374k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   374k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   374k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   374k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   374k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   374k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   748k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (64:29): [True: 169k, False: 204k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   65|   374k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 374k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   374k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   374k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 6.04k, False: 368k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  6.04k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  6.04k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  6.04k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  6.04k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  6.04k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 6.04k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  6.04k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  6.04k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  6.04k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  6.04k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  6.04k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 6.04k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  6.04k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  6.04k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  6.04k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  6.04k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  6.04k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 6.04k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  6.04k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  6.04k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  6.04k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  6.04k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  6.04k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 6.04k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  6.04k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  6.04k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  6.04k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  6.04k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  6.04k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 6.04k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  6.04k|            }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 374k, False: 4.03M]
  |  |  ------------------
  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1234|   170k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   170k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   170k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   170k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   170k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   170k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   170k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   170k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   170k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   170k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   170k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   170k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   170k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   341k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (67:29): [True: 93.8k, False: 76.9k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   68|   170k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 170k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   170k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   170k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 5.71k, False: 165k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  5.71k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.71k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.71k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.71k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.71k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.71k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  5.71k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.71k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.71k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.71k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.71k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.71k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  5.71k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.71k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.71k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.71k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.71k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.71k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  5.71k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.71k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.71k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.71k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.71k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.71k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  5.71k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.71k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.71k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.71k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.71k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.71k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  5.71k|            }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 170k, False: 4.24M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 4.41M]
  |  |  ------------------
  |  |   78|  4.41M|    }
  ------------------
  |  Branch (1251:13): [Folded, False: 0]
  ------------------
 1252|  4.41M|#undef set_ctx
 1253|  4.41M|        }
 1254|  2.20M|        if (b->pal_sz[0])
  ------------------
  |  Branch (1254:13): [True: 72.2k, False: 2.13M]
  ------------------
 1255|  72.2k|            f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
 1256|  2.20M|        if (has_chroma) {
  ------------------
  |  Branch (1256:13): [True: 1.48M, False: 722k]
  ------------------
 1257|  1.48M|            uint8_t uv_mode = b->uv_mode;
 1258|  1.48M|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
 1259|  1.48M|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
 1260|  1.48M|            if (b->pal_sz[1])
  ------------------
  |  Branch (1260:17): [True: 18.4k, False: 1.46M]
  ------------------
 1261|  18.4k|                f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
 1262|  1.48M|        }
 1263|  2.20M|        if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
  ------------------
  |  |   36|  4.41M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 103k, False: 2.10M]
  |  |  ------------------
  ------------------
  |  Branch (1263:49): [True: 1.46M, False: 636k]
  ------------------
 1264|  1.56M|            splat_intraref(f->c, t, bs, bw4, bh4);
 1265|  2.20M|    } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|  1.76M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  1.76M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 620k, False: 1.14M]
  |  |  ------------------
  ------------------
 1266|       |        // intra block copy
 1267|   620k|        refmvs_candidate mvstack[8];
 1268|   620k|        int n_mvs, ctx;
 1269|   620k|        dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1270|   620k|                          (union refmvs_refpair) { .ref = { 0, -1 }},
 1271|   620k|                          bs, intra_edge_flags, t->by, t->bx);
 1272|       |
 1273|   620k|        if (mvstack[0].mv.mv[0].n)
  ------------------
  |  Branch (1273:13): [True: 566k, False: 53.7k]
  ------------------
 1274|   566k|            b->mv[0] = mvstack[0].mv.mv[0];
 1275|  53.7k|        else if (mvstack[1].mv.mv[0].n)
  ------------------
  |  Branch (1275:18): [True: 0, False: 53.7k]
  ------------------
 1276|      0|            b->mv[0] = mvstack[1].mv.mv[0];
 1277|  53.7k|        else {
 1278|  53.7k|            if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
  ------------------
  |  Branch (1278:17): [True: 52.4k, False: 1.36k]
  ------------------
 1279|  52.4k|                b->mv[0].y = 0;
 1280|  52.4k|                b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
 1281|  52.4k|            } else {
 1282|  1.36k|                b->mv[0].y = -(512 << f->seq_hdr->sb128);
 1283|  1.36k|                b->mv[0].x = 0;
 1284|  1.36k|            }
 1285|  53.7k|        }
 1286|       |
 1287|   620k|        const union mv ref = b->mv[0];
 1288|   620k|        read_mv_residual(ts, &b->mv[0], -1);
 1289|       |
 1290|       |        // clip intrabc motion vector to decoded parts of current tile
 1291|   620k|        int border_left = ts->tiling.col_start * 4;
 1292|   620k|        int border_top  = ts->tiling.row_start * 4;
 1293|   620k|        if (has_chroma) {
  ------------------
  |  Branch (1293:13): [True: 275k, False: 344k]
  ------------------
 1294|   275k|            if (bw4 < 2 &&  ss_hor)
  ------------------
  |  Branch (1294:17): [True: 93.3k, False: 182k]
  |  Branch (1294:29): [True: 6.67k, False: 86.6k]
  ------------------
 1295|  6.67k|                border_left += 4;
 1296|   275k|            if (bh4 < 2 &&  ss_ver)
  ------------------
  |  Branch (1296:17): [True: 76.9k, False: 198k]
  |  Branch (1296:29): [True: 4.95k, False: 72.0k]
  ------------------
 1297|  4.95k|                border_top  += 4;
 1298|   275k|        }
 1299|   620k|        int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
 1300|   620k|        int src_top    = t->by * 4 + (b->mv[0].y >> 3);
 1301|   620k|        int src_right  = src_left + bw4 * 4;
 1302|   620k|        int src_bottom = src_top  + bh4 * 4;
 1303|   620k|        const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
 1304|       |
 1305|       |        // check against left or right tile boundary and adjust if necessary
 1306|   620k|        if (src_left < border_left) {
  ------------------
  |  Branch (1306:13): [True: 196k, False: 423k]
  ------------------
 1307|   196k|            src_right += border_left - src_left;
 1308|   196k|            src_left  += border_left - src_left;
 1309|   423k|        } else if (src_right > border_right) {
  ------------------
  |  Branch (1309:20): [True: 187k, False: 236k]
  ------------------
 1310|   187k|            src_left  -= src_right - border_right;
 1311|   187k|            src_right -= src_right - border_right;
 1312|   187k|        }
 1313|       |        // check against top tile boundary and adjust if necessary
 1314|   620k|        if (src_top < border_top) {
  ------------------
  |  Branch (1314:13): [True: 502k, False: 118k]
  ------------------
 1315|   502k|            src_bottom += border_top - src_top;
 1316|   502k|            src_top    += border_top - src_top;
 1317|   502k|        }
 1318|       |
 1319|   620k|        const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1320|   620k|        const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1321|   620k|        const int sb_size = 1 << (6 + f->seq_hdr->sb128);
 1322|       |        // check for overlap with current superblock
 1323|   620k|        if (src_bottom > sby && src_right > sbx) {
  ------------------
  |  Branch (1323:13): [True: 580k, False: 39.7k]
  |  Branch (1323:33): [True: 194k, False: 385k]
  ------------------
 1324|   194k|            if (src_top - border_top >= src_bottom - sby) {
  ------------------
  |  Branch (1324:17): [True: 1.26k, False: 193k]
  ------------------
 1325|       |                // if possible move src up into the previous suberblock row
 1326|  1.26k|                src_top    -= src_bottom - sby;
 1327|  1.26k|                src_bottom -= src_bottom - sby;
 1328|   193k|            } else if (src_left - border_left >= src_right - sbx) {
  ------------------
  |  Branch (1328:24): [True: 186k, False: 6.85k]
  ------------------
 1329|       |                // if possible move src left into the previous suberblock
 1330|   186k|                src_left  -= src_right - sbx;
 1331|   186k|                src_right -= src_right - sbx;
 1332|   186k|            }
 1333|   194k|        }
 1334|       |        // move src up if it is below current superblock row
 1335|   620k|        if (src_bottom > sby + sb_size) {
  ------------------
  |  Branch (1335:13): [True: 3.67k, False: 616k]
  ------------------
 1336|  3.67k|            src_top    -= src_bottom - (sby + sb_size);
 1337|  3.67k|            src_bottom -= src_bottom - (sby + sb_size);
 1338|  3.67k|        }
 1339|       |        // error out if mv still overlaps with the current superblock
 1340|   620k|        if (src_bottom > sby && src_right > sbx)
  ------------------
  |  Branch (1340:13): [True: 579k, False: 41.0k]
  |  Branch (1340:33): [True: 6.85k, False: 572k]
  ------------------
 1341|  6.85k|            return -1;
 1342|       |
 1343|   613k|        b->mv[0].x = (src_left - t->bx * 4) * 8;
 1344|   613k|        b->mv[0].y = (src_top  - t->by * 4) * 8;
 1345|       |
 1346|   613k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   613k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 613k]
  |  |  ------------------
  |  |   35|   613k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   613k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1347|      0|            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
 1348|      0|                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
 1349|      0|                   mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
 1350|   613k|        read_vartx_tree(t, b, bs, bx4, by4);
 1351|       |
 1352|       |        // reconstruction
 1353|   613k|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1353:13): [True: 0, False: 613k]
  ------------------
 1354|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1355|      0|            b->filter2d = FILTER_2D_BILINEAR;
 1356|   613k|        } else {
 1357|   613k|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1357:17): [True: 0, False: 613k]
  ------------------
 1358|   613k|        }
 1359|       |
 1360|   613k|        splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
 1361|   613k|        BlockContext *edge = t->a;
 1362|  1.84M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1362:36): [True: 1.22M, False: 613k]
  ------------------
 1363|  1.22M|#define set_ctx(rep_macro) \
 1364|  1.22M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1365|  1.22M|            rep_macro(edge->mode, off, DC_PRED); \
 1366|  1.22M|            rep_macro(edge->pal_sz, off, 0); \
 1367|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1368|  1.22M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1369|  1.22M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1370|  1.22M|            rep_macro(edge->skip_mode, off, 0); \
 1371|  1.22M|            rep_macro(edge->intra, off, 0); \
 1372|  1.22M|            rep_macro(edge->skip, off, b->skip)
 1373|  1.22M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  1.22M|    switch (var) { \
  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1364|   666k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   666k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   666k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   666k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   666k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   666k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   666k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   666k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   666k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   666k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   666k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 666k, False: 560k]
  |  |  ------------------
  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1364|   144k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   144k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   144k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   144k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   144k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   144k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   144k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   144k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   144k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   144k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   144k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 144k, False: 1.08M]
  |  |  ------------------
  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1364|   213k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   213k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   213k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   213k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   213k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   213k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   213k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   213k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   213k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   213k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   213k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 213k, False: 1.01M]
  |  |  ------------------
  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1364|  84.7k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  84.7k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  84.7k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  84.7k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  84.7k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  84.7k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  84.7k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  84.7k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  84.7k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  84.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  84.7k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 84.7k, False: 1.14M]
  |  |  ------------------
  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1364|   108k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   108k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   108k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   108k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   108k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   108k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   108k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   108k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   108k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   108k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   108k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   108k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   108k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 108k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 108k, False: 1.11M]
  |  |  ------------------
  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1364|  9.64k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  9.64k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  9.64k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  9.64k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  9.64k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  9.64k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  9.64k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  9.64k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  9.64k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  9.64k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  9.64k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  9.64k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  9.64k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 9.64k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 9.64k, False: 1.21M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 1.22M]
  |  |  ------------------
  |  |   78|  1.22M|    }
  ------------------
  |  Branch (1373:13): [Folded, False: 0]
  ------------------
 1374|  1.22M|#undef set_ctx
 1375|  1.22M|        }
 1376|   613k|        if (has_chroma) {
  ------------------
  |  Branch (1376:13): [True: 269k, False: 343k]
  ------------------
 1377|   269k|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1378|   269k|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1379|   269k|        }
 1380|  1.14M|    } else {
 1381|       |        // inter-specific mode/mv coding
 1382|  1.14M|        int is_comp, has_subpel_filter;
 1383|       |
 1384|  1.14M|        if (b->skip_mode) {
  ------------------
  |  Branch (1384:13): [True: 23.8k, False: 1.11M]
  ------------------
 1385|  23.8k|            is_comp = 1;
 1386|  1.11M|        } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
  ------------------
  |  Branch (1386:21): [True: 708k, False: 408k]
  |  Branch (1386:30): [True: 112k, False: 296k]
  |  Branch (1386:48): [True: 65.8k, False: 46.2k]
  |  Branch (1386:66): [True: 47.2k, False: 18.5k]
  ------------------
 1387|   756k|                   f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (1387:20): [True: 527k, False: 228k]
  |  Branch (1387:58): [True: 366k, False: 161k]
  ------------------
 1388|   366k|        {
 1389|   366k|            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
 1390|   366k|                                         have_top, have_left);
 1391|   366k|            is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   366k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1392|   366k|                          ts->cdf.m.comp[ctx]);
 1393|   366k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   366k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 366k]
  |  |  ------------------
  |  |   35|   366k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   366k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1394|      0|                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
 1395|   750k|        } else {
 1396|   750k|            is_comp = 0;
 1397|   750k|        }
 1398|       |
 1399|  1.14M|        if (b->skip_mode) {
  ------------------
  |  Branch (1399:13): [True: 23.8k, False: 1.11M]
  ------------------
 1400|  23.8k|            b->ref[0] = f->frame_hdr->skip_mode_refs[0];
 1401|  23.8k|            b->ref[1] = f->frame_hdr->skip_mode_refs[1];
 1402|  23.8k|            b->comp_type = COMP_INTER_AVG;
 1403|  23.8k|            b->inter_mode = NEARESTMV_NEARESTMV;
 1404|  23.8k|            b->drl_idx = NEAREST_DRL;
 1405|  23.8k|            has_subpel_filter = 0;
 1406|       |
 1407|  23.8k|            refmvs_candidate mvstack[8];
 1408|  23.8k|            int n_mvs, ctx;
 1409|  23.8k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1410|  23.8k|                              (union refmvs_refpair) { .ref = {
 1411|  23.8k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1412|  23.8k|                              bs, intra_edge_flags, t->by, t->bx);
 1413|       |
 1414|  23.8k|            b->mv[0] = mvstack[0].mv.mv[0];
 1415|  23.8k|            b->mv[1] = mvstack[0].mv.mv[1];
 1416|  23.8k|            fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1417|  23.8k|            fix_mv_precision(f->frame_hdr, &b->mv[1]);
 1418|  23.8k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  23.8k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 23.8k]
  |  |  ------------------
  |  |   35|  23.8k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  23.8k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1419|      0|                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
 1420|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1421|      0|                       b->ref[0], b->ref[1]);
 1422|  1.11M|        } else if (is_comp) {
  ------------------
  |  Branch (1422:20): [True: 189k, False: 927k]
  ------------------
 1423|   189k|            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
 1424|   189k|                                                 have_top, have_left);
 1425|   189k|            if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   189k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1425:17): [True: 163k, False: 26.0k]
  ------------------
 1426|   189k|                    ts->cdf.m.comp_dir[dir_ctx]))
 1427|   163k|            {
 1428|       |                // bidir - first reference (fw)
 1429|   163k|                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
 1430|   163k|                                                     have_top, have_left);
 1431|   163k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   163k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1431:21): [True: 56.5k, False: 106k]
  ------------------
 1432|   163k|                        ts->cdf.m.comp_fwd_ref[0][ctx1]))
 1433|  56.5k|                {
 1434|  56.5k|                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
 1435|  56.5k|                                                           have_top, have_left);
 1436|  56.5k|                    b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  56.5k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1437|  56.5k|                                        ts->cdf.m.comp_fwd_ref[2][ctx2]);
 1438|   106k|                } else {
 1439|   106k|                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1440|   106k|                                                           have_top, have_left);
 1441|   106k|                    b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   106k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1442|   106k|                                    ts->cdf.m.comp_fwd_ref[1][ctx2]);
 1443|   106k|                }
 1444|       |
 1445|       |                // second reference (bw)
 1446|   163k|                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
 1447|   163k|                                                     have_top, have_left);
 1448|   163k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   163k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1448:21): [True: 78.4k, False: 84.7k]
  ------------------
 1449|   163k|                        ts->cdf.m.comp_bwd_ref[0][ctx3]))
 1450|  78.4k|                {
 1451|  78.4k|                    b->ref[1] = 6;
 1452|  84.7k|                } else {
 1453|  84.7k|                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1454|  84.7k|                                                           have_top, have_left);
 1455|  84.7k|                    b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  84.7k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1456|  84.7k|                                        ts->cdf.m.comp_bwd_ref[1][ctx4]);
 1457|  84.7k|                }
 1458|   163k|            } else {
 1459|       |                // unidir
 1460|  26.0k|                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  280|  26.0k|#define av1_get_uni_p_ctx av1_get_ref_ctx
  ------------------
 1461|  26.0k|                                                     have_top, have_left);
 1462|  26.0k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  26.0k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1462:21): [True: 5.66k, False: 20.3k]
  ------------------
 1463|  26.0k|                        ts->cdf.m.comp_uni_ref[0][uctx_p]))
 1464|  5.66k|                {
 1465|  5.66k|                    b->ref[0] = 4;
 1466|  5.66k|                    b->ref[1] = 6;
 1467|  20.3k|                } else {
 1468|  20.3k|                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
 1469|  20.3k|                                                           have_top, have_left);
 1470|  20.3k|                    b->ref[0] = 0;
 1471|  20.3k|                    b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  20.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1472|  20.3k|                                        ts->cdf.m.comp_uni_ref[1][uctx_p1]);
 1473|  20.3k|                    if (b->ref[1] == 2) {
  ------------------
  |  Branch (1473:25): [True: 12.9k, False: 7.44k]
  ------------------
 1474|  12.9k|                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  281|  12.9k|#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1475|  12.9k|                                                               have_top, have_left);
 1476|  12.9k|                        b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  12.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1477|  12.9k|                                         ts->cdf.m.comp_uni_ref[2][uctx_p2]);
 1478|  12.9k|                    }
 1479|  20.3k|                }
 1480|  26.0k|            }
 1481|   189k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   189k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 189k]
  |  |  ------------------
  |  |   35|   189k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   189k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1482|      0|                printf("Post-refs[%d/%d]: r=%d\n",
 1483|      0|                       b->ref[0], b->ref[1], ts->msac.rng);
 1484|       |
 1485|   189k|            refmvs_candidate mvstack[8];
 1486|   189k|            int n_mvs, ctx;
 1487|   189k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1488|   189k|                              (union refmvs_refpair) { .ref = {
 1489|   189k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1490|   189k|                              bs, intra_edge_flags, t->by, t->bx);
 1491|       |
 1492|   189k|            b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   189k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1493|   189k|                                ts->cdf.m.comp_inter_mode[ctx],
 1494|   189k|                                N_COMP_INTER_PRED_MODES - 1);
 1495|   189k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   189k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 189k]
  |  |  ------------------
  |  |   35|   189k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   189k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1496|      0|                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
 1497|      0|                       b->inter_mode, ctx, n_mvs, ts->msac.rng);
 1498|       |
 1499|   189k|            const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
 1500|   189k|            b->drl_idx = NEAREST_DRL;
 1501|   189k|            if (b->inter_mode == NEWMV_NEWMV) {
  ------------------
  |  Branch (1501:17): [True: 37.4k, False: 151k]
  ------------------
 1502|  37.4k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1502:21): [True: 37.4k, False: 0]
  ------------------
 1503|  37.4k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1504|  37.4k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  37.4k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1505|  37.4k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1506|  37.4k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
  ------------------
  |  Branch (1506:25): [True: 25.5k, False: 11.9k]
  |  Branch (1506:53): [True: 7.77k, False: 17.7k]
  ------------------
 1507|  7.77k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1508|  7.77k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  7.77k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1509|  7.77k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1510|  7.77k|                    }
 1511|  37.4k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  37.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 37.4k]
  |  |  ------------------
  |  |   35|  37.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  37.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1512|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1513|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1514|  37.4k|                }
 1515|   151k|            } else if (im[0] == NEARMV || im[1] == NEARMV) {
  ------------------
  |  Branch (1515:24): [True: 40.5k, False: 111k]
  |  Branch (1515:43): [True: 5.10k, False: 106k]
  ------------------
 1516|  45.6k|                b->drl_idx = NEARER_DRL;
 1517|  45.6k|                if (n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1517:21): [True: 10.6k, False: 34.9k]
  ------------------
 1518|  10.6k|                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1519|  10.6k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  10.6k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1520|  10.6k|                                      ts->cdf.m.drl_bit[drl_ctx_v2]);
 1521|  10.6k|                    if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
  ------------------
  |  Branch (1521:25): [True: 5.09k, False: 5.59k]
  |  Branch (1521:51): [True: 2.81k, False: 2.27k]
  ------------------
 1522|  2.81k|                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
 1523|  2.81k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  2.81k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1524|  2.81k|                                          ts->cdf.m.drl_bit[drl_ctx_v3]);
 1525|  2.81k|                    }
 1526|  10.6k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  10.6k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 10.6k]
  |  |  ------------------
  |  |   35|  10.6k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  10.6k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1527|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1528|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1529|  10.6k|                }
 1530|  45.6k|            }
 1531|   189k|            assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1531:13): [True: 189k, False: 0]
  |  Branch (1531:13): [True: 189k, False: 0]
  ------------------
 1532|       |
 1533|   189k|#define assign_comp_mv(idx) \
 1534|   189k|            switch (im[idx]) { \
 1535|   189k|            case NEARMV: \
 1536|   189k|            case NEARESTMV: \
 1537|   189k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1538|   189k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
 1539|   189k|                break; \
 1540|   189k|            case GLOBALMV: \
 1541|   189k|                has_subpel_filter |= \
 1542|   189k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
 1543|   189k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
 1544|   189k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
 1545|   189k|                break; \
 1546|   189k|            case NEWMV: \
 1547|   189k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1548|   189k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
 1549|   189k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
 1550|   189k|                break; \
 1551|   189k|            }
 1552|   189k|            has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1552:33): [True: 0, False: 189k]
  ------------------
 1553|   189k|                                b->inter_mode != GLOBALMV_GLOBALMV;
  ------------------
  |  Branch (1553:33): [True: 174k, False: 14.8k]
  ------------------
 1554|   189k|            assign_comp_mv(0);
  ------------------
  |  | 1534|   189k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 189k, False: 0]
  |  |  ------------------
  |  | 1535|  40.5k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 40.5k, False: 148k]
  |  |  ------------------
  |  | 1536|   122k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 81.4k, False: 107k]
  |  |  ------------------
  |  | 1537|   122k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   122k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   122k|                break; \
  |  | 1540|  40.5k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 14.8k, False: 174k]
  |  |  ------------------
  |  | 1541|  14.8k|                has_subpel_filter |= \
  |  | 1542|  14.8k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  14.8k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  14.8k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  14.8k|                break; \
  |  | 1546|  52.4k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 52.4k, False: 136k]
  |  |  ------------------
  |  | 1547|  52.4k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  52.4k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  52.4k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  52.4k|                break; \
  |  | 1551|   189k|            }
  ------------------
 1555|   189k|            assign_comp_mv(1);
  ------------------
  |  | 1534|   189k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 189k, False: 0]
  |  |  ------------------
  |  | 1535|  40.2k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 40.2k, False: 149k]
  |  |  ------------------
  |  | 1536|   120k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 80.3k, False: 108k]
  |  |  ------------------
  |  | 1537|   120k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   120k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   120k|                break; \
  |  | 1540|  40.2k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 14.8k, False: 174k]
  |  |  ------------------
  |  | 1541|  14.8k|                has_subpel_filter |= \
  |  | 1542|  14.8k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  14.8k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  14.8k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  14.8k|                break; \
  |  | 1546|  53.8k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 53.8k, False: 135k]
  |  |  ------------------
  |  | 1547|  53.8k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  53.8k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  53.8k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  53.8k|                break; \
  |  | 1551|   189k|            }
  ------------------
 1556|   189k|#undef assign_comp_mv
 1557|   189k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   189k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 189k]
  |  |  ------------------
  |  |   35|   189k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   189k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1558|      0|                printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
 1559|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1560|      0|                       ts->msac.rng);
 1561|       |
 1562|       |            // jnt_comp vs. seg vs. wedge
 1563|   189k|            int is_segwedge = 0;
 1564|   189k|            if (f->seq_hdr->masked_compound) {
  ------------------
  |  Branch (1564:17): [True: 163k, False: 25.4k]
  ------------------
 1565|   163k|                const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
 1566|       |
 1567|   163k|                is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   163k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1568|   163k|                                  ts->cdf.m.mask_comp[mask_ctx]);
 1569|   163k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   163k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 163k]
  |  |  ------------------
  |  |   35|   163k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   163k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1570|      0|                    printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
 1571|      0|                           is_segwedge, mask_ctx, ts->msac.rng);
 1572|   163k|            }
 1573|       |
 1574|   189k|            if (!is_segwedge) {
  ------------------
  |  Branch (1574:17): [True: 136k, False: 52.8k]
  ------------------
 1575|   136k|                if (f->seq_hdr->jnt_comp) {
  ------------------
  |  Branch (1575:21): [True: 100k, False: 35.7k]
  ------------------
 1576|   100k|                    const int jnt_ctx =
 1577|   100k|                        get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
 1578|   100k|                                         f->cur.frame_hdr->frame_offset,
 1579|   100k|                                         f->refp[b->ref[0]].p.frame_hdr->frame_offset,
 1580|   100k|                                         f->refp[b->ref[1]].p.frame_hdr->frame_offset,
 1581|   100k|                                         t->a, &t->l, by4, bx4);
 1582|   100k|                    b->comp_type = COMP_INTER_WEIGHTED_AVG +
 1583|   100k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   100k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1584|   100k|                                       ts->cdf.m.jnt_comp[jnt_ctx]);
 1585|   100k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   100k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 100k]
  |  |  ------------------
  |  |   35|   100k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   100k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1586|      0|                        printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
 1587|      0|                               b->comp_type == COMP_INTER_AVG,
 1588|      0|                               jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
 1589|      0|                               t->l.comp_type[by4], t->l.ref[0][by4],
 1590|      0|                               ts->msac.rng);
 1591|   100k|                } else {
 1592|  35.7k|                    b->comp_type = COMP_INTER_AVG;
 1593|  35.7k|                }
 1594|   136k|            } else {
 1595|  52.8k|                if (wedge_allowed_mask & (1 << bs)) {
  ------------------
  |  Branch (1595:21): [True: 43.4k, False: 9.33k]
  ------------------
 1596|  43.4k|                    const int ctx = dav1d_wedge_ctx_lut[bs];
 1597|  43.4k|                    b->comp_type = COMP_INTER_WEDGE -
 1598|  43.4k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  43.4k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1599|  43.4k|                                       ts->cdf.m.wedge_comp[ctx]);
 1600|  43.4k|                    if (b->comp_type == COMP_INTER_WEDGE)
  ------------------
  |  Branch (1600:25): [True: 15.7k, False: 27.7k]
  ------------------
 1601|  15.7k|                        b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  15.7k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1602|  43.4k|                                           ts->cdf.m.wedge_idx[ctx], 15);
 1603|  43.4k|                } else {
 1604|  9.33k|                    b->comp_type = COMP_INTER_SEG;
 1605|  9.33k|                }
 1606|  52.8k|                b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  52.8k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
 1607|  52.8k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  52.8k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 52.8k]
  |  |  ------------------
  |  |   35|  52.8k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  52.8k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1608|      0|                    printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
 1609|      0|                           b->comp_type == COMP_INTER_WEDGE,
 1610|      0|                           b->wedge_idx, b->mask_sign, ts->msac.rng);
 1611|  52.8k|            }
 1612|   927k|        } else {
 1613|   927k|            b->comp_type = COMP_INTER_NONE;
 1614|       |
 1615|       |            // ref
 1616|   927k|            if (seg && seg->ref > 0) {
  ------------------
  |  Branch (1616:17): [True: 397k, False: 530k]
  |  Branch (1616:24): [True: 296k, False: 101k]
  ------------------
 1617|   296k|                b->ref[0] = seg->ref - 1;
 1618|   631k|            } else if (seg && (seg->globalmv || seg->skip)) {
  ------------------
  |  Branch (1618:24): [True: 101k, False: 530k]
  |  Branch (1618:32): [True: 46.2k, False: 54.8k]
  |  Branch (1618:49): [True: 18.5k, False: 36.2k]
  ------------------
 1619|  64.7k|                b->ref[0] = 0;
 1620|   566k|            } else {
 1621|   566k|                const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
 1622|   566k|                                                 have_top, have_left);
 1623|   566k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   566k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1623:21): [True: 210k, False: 355k]
  ------------------
 1624|   566k|                                                 ts->cdf.m.ref[0][ctx1]))
 1625|   210k|                {
 1626|   210k|                    const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  275|   210k|#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx
  ------------------
 1627|   210k|                                                       have_top, have_left);
 1628|   210k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   210k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1628:25): [True: 140k, False: 70.5k]
  ------------------
 1629|   210k|                                                     ts->cdf.m.ref[1][ctx2]))
 1630|   140k|                    {
 1631|   140k|                        b->ref[0] = 6;
 1632|   140k|                    } else {
 1633|  70.5k|                        const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  279|  70.5k|#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx
  ------------------
 1634|  70.5k|                                                           have_top, have_left);
 1635|  70.5k|                        b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  70.5k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1636|  70.5k|                                            ts->cdf.m.ref[5][ctx3]);
 1637|  70.5k|                    }
 1638|   355k|                } else {
 1639|   355k|                    const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  276|   355k|#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx
  ------------------
 1640|   355k|                                                       have_top, have_left);
 1641|   355k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   355k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1641:25): [True: 69.9k, False: 285k]
  ------------------
 1642|   355k|                                                     ts->cdf.m.ref[2][ctx2]))
 1643|  69.9k|                    {
 1644|  69.9k|                        const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  278|  69.9k|#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1645|  69.9k|                                                           have_top, have_left);
 1646|  69.9k|                        b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  69.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1647|  69.9k|                                            ts->cdf.m.ref[4][ctx3]);
 1648|   285k|                    } else {
 1649|   285k|                        const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  277|   285k|#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx
  ------------------
 1650|   285k|                                                           have_top, have_left);
 1651|   285k|                        b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   285k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1652|   285k|                                        ts->cdf.m.ref[3][ctx3]);
 1653|   285k|                    }
 1654|   355k|                }
 1655|   566k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   566k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 566k]
  |  |  ------------------
  |  |   35|   566k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   566k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1656|      0|                    printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
 1657|   566k|            }
 1658|   927k|            b->ref[1] = -1;
 1659|       |
 1660|   927k|            refmvs_candidate mvstack[8];
 1661|   927k|            int n_mvs, ctx;
 1662|   927k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1663|   927k|                              (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
 1664|   927k|                              bs, intra_edge_flags, t->by, t->bx);
 1665|       |
 1666|       |            // mode parsing and mv derivation from ref_mvs
 1667|   927k|            if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1667:18): [True: 397k, False: 530k]
  |  Branch (1667:26): [True: 334k, False: 62.0k]
  |  Branch (1667:39): [True: 22.3k, False: 39.7k]
  ------------------
 1668|   570k|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   570k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1668:17): [True: 408k, False: 161k]
  ------------------
 1669|   570k|                                             ts->cdf.m.newmv_mode[ctx & 7]))
 1670|   765k|            {
 1671|   765k|                if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1671:22): [True: 383k, False: 382k]
  |  Branch (1671:30): [True: 334k, False: 48.8k]
  |  Branch (1671:43): [True: 22.3k, False: 26.4k]
  ------------------
 1672|   408k|                    !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   408k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1672:21): [True: 27.6k, False: 381k]
  ------------------
 1673|   408k|                         ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
 1674|   384k|                {
 1675|   384k|                    b->inter_mode = GLOBALMV;
 1676|   384k|                    b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
 1677|   384k|                                          t->bx, t->by, bw4, bh4, f->frame_hdr);
 1678|   384k|                    has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1678:41): [True: 116k, False: 268k]
  ------------------
 1679|   268k|                        f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
  ------------------
  |  Branch (1679:25): [True: 99.7k, False: 169k]
  ------------------
 1680|   384k|                } else {
 1681|   381k|                    has_subpel_filter = 1;
 1682|   381k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   381k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1682:25): [True: 161k, False: 219k]
  ------------------
 1683|   381k|                            ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
 1684|   161k|                    { // NEAREST, NEARER, NEAR or NEARISH
 1685|   161k|                        b->inter_mode = NEARMV;
 1686|   161k|                        b->drl_idx = NEARER_DRL;
 1687|   161k|                        if (n_mvs > 2) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1687:29): [True: 71.6k, False: 89.6k]
  ------------------
 1688|  71.6k|                            const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1689|  71.6k|                            b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  71.6k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1690|  71.6k|                                              ts->cdf.m.drl_bit[drl_ctx_v2]);
 1691|  71.6k|                            if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
  ------------------
  |  Branch (1691:33): [True: 40.4k, False: 31.1k]
  |  Branch (1691:59): [True: 23.7k, False: 16.6k]
  ------------------
 1692|  23.7k|                                const int drl_ctx_v3 =
 1693|  23.7k|                                    get_drl_context(mvstack, 2);
 1694|  23.7k|                                b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  23.7k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1695|  23.7k|                                                  ts->cdf.m.drl_bit[drl_ctx_v3]);
 1696|  23.7k|                            }
 1697|  71.6k|                        }
 1698|   219k|                    } else {
 1699|   219k|                        b->inter_mode = NEARESTMV;
 1700|   219k|                        b->drl_idx = NEAREST_DRL;
 1701|   219k|                    }
 1702|   381k|                    assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1702:21): [True: 381k, False: 0]
  |  Branch (1702:21): [True: 381k, False: 0]
  ------------------
 1703|   381k|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1704|   381k|                    if (b->drl_idx < NEAR_DRL)
  ------------------
  |  Branch (1704:25): [True: 340k, False: 40.4k]
  ------------------
 1705|   340k|                        fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1706|   381k|                }
 1707|       |
 1708|   765k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   765k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 765k]
  |  |  ------------------
  |  |   35|   765k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   765k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1709|      0|                    printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
 1710|      0|                           b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
 1711|      0|                           ts->msac.rng);
 1712|   765k|            } else {
 1713|   161k|                has_subpel_filter = 1;
 1714|   161k|                b->inter_mode = NEWMV;
 1715|   161k|                b->drl_idx = NEAREST_DRL;
 1716|   161k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1716:21): [True: 123k, False: 37.9k]
  ------------------
 1717|   123k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1718|   123k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   123k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1719|   123k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1720|   123k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1720:25): [True: 56.9k, False: 66.7k]
  |  Branch (1720:53): [True: 33.1k, False: 23.7k]
  ------------------
 1721|  33.1k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1722|  33.1k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  33.1k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1723|  33.1k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1724|  33.1k|                    }
 1725|   123k|                }
 1726|   161k|                assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1726:17): [True: 161k, False: 0]
  |  Branch (1726:17): [True: 161k, False: 0]
  ------------------
 1727|   161k|                if (n_mvs > 1) {
  ------------------
  |  Branch (1727:21): [True: 123k, False: 37.9k]
  ------------------
 1728|   123k|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1729|   123k|                } else {
 1730|  37.9k|                    assert(!b->drl_idx);
  ------------------
  |  Branch (1730:21): [True: 37.9k, False: 0]
  ------------------
 1731|  37.9k|                    b->mv[0] = mvstack[0].mv.mv[0];
 1732|  37.9k|                    fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1733|  37.9k|                }
 1734|   161k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   161k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 161k]
  |  |  ------------------
  |  |   35|   161k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   161k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1735|      0|                    printf("Post-intermode[%d,drl=%d]: r=%d\n",
 1736|      0|                           b->inter_mode, b->drl_idx, ts->msac.rng);
 1737|   161k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
 1738|   161k|                read_mv_residual(ts, &b->mv[0], mv_prec);
 1739|   161k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   161k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 161k]
  |  |  ------------------
  |  |   35|   161k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   161k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1740|      0|                    printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
 1741|      0|                           b->mv[0].y, b->mv[0].x, ts->msac.rng);
 1742|   161k|            }
 1743|       |
 1744|       |            // interintra flags
 1745|   927k|            const int ii_sz_grp = dav1d_ymode_size_context[bs];
 1746|   927k|            if (f->seq_hdr->inter_intra &&
  ------------------
  |  Branch (1746:17): [True: 764k, False: 162k]
  ------------------
 1747|   764k|                interintra_allowed_mask & (1 << bs) &&
  ------------------
  |  Branch (1747:17): [True: 343k, False: 420k]
  ------------------
 1748|   343k|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   343k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1748:17): [True: 41.5k, False: 302k]
  ------------------
 1749|   343k|                                             ts->cdf.m.interintra[ii_sz_grp]))
 1750|  41.5k|            {
 1751|  41.5k|                b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|  41.5k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1752|  41.5k|                                         ts->cdf.m.interintra_mode[ii_sz_grp],
 1753|  41.5k|                                         N_INTER_INTRA_PRED_MODES - 1);
 1754|  41.5k|                const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
 1755|  41.5k|                b->interintra_type = INTER_INTRA_BLEND +
 1756|  41.5k|                                     dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  41.5k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1757|  41.5k|                                         ts->cdf.m.interintra_wedge[wedge_ctx]);
 1758|  41.5k|                if (b->interintra_type == INTER_INTRA_WEDGE)
  ------------------
  |  Branch (1758:21): [True: 10.1k, False: 31.4k]
  ------------------
 1759|  10.1k|                    b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  10.1k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1760|  41.5k|                                       ts->cdf.m.wedge_idx[wedge_ctx], 15);
 1761|   886k|            } else {
 1762|   886k|                b->interintra_type = INTER_INTRA_NONE;
 1763|   886k|            }
 1764|   927k|            if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
  ------------------
  |  |   34|   927k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 927k]
  |  |  ------------------
  |  |   35|   927k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   927k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1764:37): [True: 0, False: 0]
  ------------------
 1765|      0|                interintra_allowed_mask & (1 << bs))
  ------------------
  |  Branch (1765:17): [True: 0, False: 0]
  ------------------
 1766|      0|            {
 1767|      0|                printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
 1768|      0|                       b->interintra_type, b->interintra_mode,
 1769|      0|                       b->wedge_idx, ts->msac.rng);
 1770|      0|            }
 1771|       |
 1772|       |            // motion variation
 1773|   927k|            if (f->frame_hdr->switchable_motion_mode &&
  ------------------
  |  Branch (1773:17): [True: 859k, False: 68.0k]
  ------------------
 1774|   859k|                b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
  ------------------
  |  Branch (1774:17): [True: 820k, False: 38.8k]
  |  Branch (1774:59): [True: 512k, False: 308k]
  ------------------
 1775|       |                // is not warped global motion
 1776|   512k|                !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
  ------------------
  |  Branch (1776:19): [True: 400k, False: 112k]
  |  Branch (1776:54): [True: 165k, False: 235k]
  ------------------
 1777|   165k|                  f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
  ------------------
  |  Branch (1777:19): [True: 21.3k, False: 143k]
  ------------------
 1778|       |                // has overlappable neighbours
 1779|   491k|                ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
  ------------------
  |  Branch (1779:19): [True: 445k, False: 45.7k]
  |  Branch (1779:32): [True: 431k, False: 14.3k]
  ------------------
 1780|  60.0k|                 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
  ------------------
  |  Branch (1780:19): [True: 51.8k, False: 8.26k]
  |  Branch (1780:31): [True: 48.4k, False: 3.34k]
  ------------------
 1781|   479k|            {
 1782|       |                // reaching here means the block allows obmc - check warp by
 1783|       |                // finding matching-ref blocks in top/left edges
 1784|   479k|                uint64_t mask[2] = { 0, 0 };
 1785|   479k|                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
 1786|   479k|                                  have_left, have_top, b->ref[0], mask);
 1787|   479k|                const int allow_warp = !f->svc[b->ref[0]][0].scale &&
  ------------------
  |  Branch (1787:40): [True: 346k, False: 133k]
  ------------------
 1788|   346k|                    !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (1788:21): [True: 338k, False: 8.28k]
  ------------------
 1789|   338k|                    f->frame_hdr->warp_motion && (mask[0] | mask[1]);
  ------------------
  |  Branch (1789:21): [True: 280k, False: 57.2k]
  |  Branch (1789:50): [True: 262k, False: 18.6k]
  ------------------
 1790|       |
 1791|   479k|                b->motion_mode = allow_warp ?
  ------------------
  |  Branch (1791:34): [True: 262k, False: 217k]
  ------------------
 1792|   262k|                    dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   262k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1793|   262k|                        ts->cdf.m.motion_mode[bs], 2) :
 1794|   479k|                    dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
  ------------------
  |  |   52|   217k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1795|   479k|                if (b->motion_mode == MM_WARP) {
  ------------------
  |  Branch (1795:21): [True: 92.5k, False: 387k]
  ------------------
 1796|  92.5k|                    has_subpel_filter = 0;
 1797|  92.5k|                    derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
 1798|  92.5k|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
 1799|  92.5k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  92.5k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 92.5k]
  |  |  ------------------
  |  |   35|  92.5k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  92.5k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1800|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
 1801|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
 1802|      0|                               "mv=y:%d,x:%d\n",
 1803|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1804|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1805|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1806|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1807|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1808|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1809|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1810|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1811|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1812|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1813|      0|                               b->mv[0].y, b->mv[0].x);
 1814|  92.5k|#undef signabs
 1815|  92.5k|                    if (t->frame_thread.pass) {
  ------------------
  |  Branch (1815:25): [True: 0, False: 92.5k]
  ------------------
 1816|      0|                        if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1816:29): [True: 0, False: 0]
  ------------------
 1817|      0|                            b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
 1818|      0|                            b->matrix[1] = t->warpmv.matrix[3];
 1819|      0|                            b->matrix[2] = t->warpmv.matrix[4];
 1820|      0|                            b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
 1821|      0|                        } else {
 1822|      0|                            b->matrix[0] = INT16_MIN;
 1823|      0|                        }
 1824|      0|                    }
 1825|  92.5k|                }
 1826|       |
 1827|   479k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   479k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 479k]
  |  |  ------------------
  |  |   35|   479k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   479k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1828|      0|                    printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%"
 1829|      0|                           PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0],
 1830|      0|                            mask[1]);
 1831|   479k|            } else {
 1832|   447k|                b->motion_mode = MM_TRANSLATION;
 1833|   447k|            }
 1834|   927k|        }
 1835|       |
 1836|       |        // subpel filter
 1837|  1.14M|        enum Dav1dFilterMode filter[2];
 1838|  1.14M|        if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
  ------------------
  |  Branch (1838:13): [True: 513k, False: 627k]
  ------------------
 1839|   513k|            if (has_subpel_filter) {
  ------------------
  |  Branch (1839:17): [True: 348k, False: 165k]
  ------------------
 1840|   348k|                const int comp = b->comp_type != COMP_INTER_NONE;
 1841|   348k|                const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
 1842|   348k|                                                by4, bx4);
 1843|   348k|                filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   348k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1844|   348k|                               ts->cdf.m.filter[0][ctx1],
 1845|   348k|                               DAV1D_N_SWITCHABLE_FILTERS - 1);
 1846|   348k|                if (f->seq_hdr->dual_filter) {
  ------------------
  |  Branch (1846:21): [True: 271k, False: 76.6k]
  ------------------
 1847|   271k|                    const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
 1848|   271k|                                                    b->ref[0], by4, bx4);
 1849|   271k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   271k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 271k]
  |  |  ------------------
  |  |   35|   271k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   271k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1850|      0|                        printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
 1851|      0|                               filter[0], ctx1, ts->msac.rng);
 1852|   271k|                    filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   271k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1853|   271k|                                    ts->cdf.m.filter[1][ctx2],
 1854|   271k|                                    DAV1D_N_SWITCHABLE_FILTERS - 1);
 1855|   271k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   271k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 271k]
  |  |  ------------------
  |  |   35|   271k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   271k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1856|      0|                        printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
 1857|      0|                               filter[1], ctx2, ts->msac.rng);
 1858|   271k|                } else {
 1859|  76.6k|                    filter[1] = filter[0];
 1860|  76.6k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  76.6k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 76.6k]
  |  |  ------------------
  |  |   35|  76.6k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  76.6k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1861|      0|                        printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
 1862|      0|                               filter[0], ctx1, ts->msac.rng);
 1863|  76.6k|                }
 1864|   348k|            } else {
 1865|   165k|                filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
 1866|   165k|            }
 1867|   627k|        } else {
 1868|   627k|            filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
 1869|   627k|        }
 1870|  1.14M|        b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
 1871|       |
 1872|  1.14M|        read_vartx_tree(t, b, bs, bx4, by4);
 1873|       |
 1874|       |        // reconstruction
 1875|  1.14M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1875:13): [True: 0, False: 1.14M]
  ------------------
 1876|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1877|  1.14M|        } else {
 1878|  1.14M|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1878:17): [True: 0, False: 1.14M]
  ------------------
 1879|  1.14M|        }
 1880|       |
 1881|  1.14M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1881:13): [True: 692k, False: 448k]
  ------------------
 1882|   448k|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1882:13): [True: 106k, False: 342k]
  ------------------
 1883|   798k|        {
 1884|   798k|            const int is_globalmv =
 1885|   798k|                b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
  ------------------
  |  Branch (1885:35): [True: 110k, False: 687k]
  ------------------
 1886|   798k|            const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
 1887|   798k|                &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
 1888|   798k|            const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1889|   798k|            enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx;
 1890|   798k|            if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1890:17): [True: 4.06k, False: 794k]
  ------------------
 1891|  4.06k|                ytx  = (enum RectTxfmSize) TX_4X4;
 1892|  4.06k|                uvtx = (enum RectTxfmSize) TX_4X4;
 1893|  4.06k|            }
 1894|   798k|            dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
 1895|   798k|                                       t->bx, t->by, f->w4, f->h4, b->skip, bs,
 1896|   798k|                                       ytx, tx_split, uvtx, f->cur.p.layout,
 1897|   798k|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1898|   798k|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1898:40): [True: 258k, False: 539k]
  ------------------
 1899|   798k|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1899:40): [True: 258k, False: 539k]
  ------------------
 1900|   798k|        }
 1901|       |
 1902|       |        // context updates
 1903|  1.14M|        if (is_comp)
  ------------------
  |  Branch (1903:13): [True: 213k, False: 927k]
  ------------------
 1904|   213k|            splat_tworef_mv(f->c, t, bs, b, bw4, bh4);
 1905|   927k|        else
 1906|   927k|            splat_oneref_mv(f->c, t, bs, b, bw4, bh4);
 1907|  1.14M|        BlockContext *edge = t->a;
 1908|  3.42M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1908:36): [True: 2.28M, False: 1.14M]
  ------------------
 1909|  2.28M|#define set_ctx(rep_macro) \
 1910|  2.28M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1911|  2.28M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
 1912|  2.28M|            rep_macro(edge->intra, off, 0); \
 1913|  2.28M|            rep_macro(edge->skip, off, b->skip); \
 1914|  2.28M|            rep_macro(edge->pal_sz, off, 0); \
 1915|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1916|  2.28M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1917|  2.28M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1918|  2.28M|            rep_macro(edge->comp_type, off, b->comp_type); \
 1919|  2.28M|            rep_macro(edge->filter[0], off, filter[0]); \
 1920|  2.28M|            rep_macro(edge->filter[1], off, filter[1]); \
 1921|  2.28M|            rep_macro(edge->mode, off, b->inter_mode); \
 1922|  2.28M|            rep_macro(edge->ref[0], off, b->ref[0]); \
 1923|  2.28M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
 1924|  2.28M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  2.28M|    switch (var) { \
  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1910|   425k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   425k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   425k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   425k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   425k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   425k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   425k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   425k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   425k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   425k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   425k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   425k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   425k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   425k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   425k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   425k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 425k, False: 1.85M]
  |  |  ------------------
  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1910|   757k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   757k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   757k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   757k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   757k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   757k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   757k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   757k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   757k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   757k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   757k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   757k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   757k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   757k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   757k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   757k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 757k, False: 1.52M]
  |  |  ------------------
  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1910|   578k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   578k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   578k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   578k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   578k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   578k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   578k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   578k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   578k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   578k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   578k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   578k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   578k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   578k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 578k, False: 1.70M]
  |  |  ------------------
  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1910|   230k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   230k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   230k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   230k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   230k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   230k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   230k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   230k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   230k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   230k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   230k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   230k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   230k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   230k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   230k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   230k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 230k, False: 2.05M]
  |  |  ------------------
  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1910|   215k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   215k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   215k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   215k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   215k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   215k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   215k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   215k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   215k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   215k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   215k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   215k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   215k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   215k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   215k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   215k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   215k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   215k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 215k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 215k, False: 2.06M]
  |  |  ------------------
  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1910|  73.3k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|  73.3k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|  73.3k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|  73.3k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|  73.3k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|  73.3k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|  73.3k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|  73.3k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|  73.3k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|  73.3k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|  73.3k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|  73.3k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|  73.3k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|  73.3k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  73.3k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  73.3k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  73.3k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  73.3k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 73.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 73.3k, False: 2.20M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 2.28M]
  |  |  ------------------
  |  |   78|  2.28M|    }
  ------------------
  |  Branch (1924:13): [Folded, False: 0]
  ------------------
 1925|  2.28M|#undef set_ctx
 1926|  2.28M|        }
 1927|  1.14M|        if (has_chroma) {
  ------------------
  |  Branch (1927:13): [True: 549k, False: 591k]
  ------------------
 1928|   549k|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1929|   549k|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1930|   549k|        }
 1931|  1.14M|    }
 1932|       |
 1933|       |    // update contexts
 1934|  3.96M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (1934:9): [True: 1.27M, False: 2.68M]
  ------------------
 1935|  1.27M|        f->frame_hdr->segmentation.update_map)
  ------------------
  |  Branch (1935:9): [True: 1.00M, False: 269k]
  ------------------
 1936|  1.00M|    {
 1937|  1.00M|        uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
 1938|  1.00M|#define set_ctx(rep_macro) \
 1939|  1.00M|        for (int y = 0; y < bh4; y++) { \
 1940|  1.00M|            rep_macro(seg_ptr, 0, b->seg_id); \
 1941|  1.00M|            seg_ptr += f->b4_stride; \
 1942|  1.00M|        }
 1943|  1.00M|        case_set(b_dim[2]);
  ------------------
  |  |   70|  1.00M|    switch (var) { \
  |  |   71|   276k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1939|   671k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 395k, False: 276k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   395k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   395k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   395k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   395k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   395k|        }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 276k, False: 730k]
  |  |  ------------------
  |  |   72|   195k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1939|   760k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 564k, False: 195k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   564k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   564k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   564k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   564k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   564k|        }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 195k, False: 810k]
  |  |  ------------------
  |  |   73|   230k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1939|   989k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 758k, False: 230k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   758k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   758k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   758k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   758k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   758k|        }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 230k, False: 775k]
  |  |  ------------------
  |  |   74|  99.2k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1939|   647k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 548k, False: 99.2k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   548k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   548k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   548k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   548k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   548k|        }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 99.2k, False: 907k]
  |  |  ------------------
  |  |   75|   130k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1939|  2.15M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 2.01M, False: 130k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  2.01M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  2.01M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  2.01M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  2.01M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  2.01M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 2.01M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  2.01M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  2.01M|        }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 130k, False: 875k]
  |  |  ------------------
  |  |   76|  73.4k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1939|  2.39M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 2.32M, False: 73.4k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  2.32M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  2.32M|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  2.32M|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  2.32M|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  2.32M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 2.32M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  2.32M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  2.32M|        }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 73.4k, False: 933k]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 1.00M]
  |  |  ------------------
  |  |   78|  1.00M|    }
  ------------------
  |  Branch (1943:9): [Folded, False: 0]
  ------------------
 1944|  1.00M|#undef set_ctx
 1945|  1.00M|    }
 1946|  3.96M|    if (!b->skip) {
  ------------------
  |  Branch (1946:9): [True: 2.05M, False: 1.90M]
  ------------------
 1947|  2.05M|        uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
 1948|  2.05M|        const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
 1949|  2.05M|        const int bx_idx = (bx4 & 16) >> 4;
 1950|  6.34M|        for (int y = 0; y < bh4; y += 2, noskip_mask++) {
  ------------------
  |  Branch (1950:25): [True: 4.29M, False: 2.05M]
  ------------------
 1951|  4.29M|            (*noskip_mask)[bx_idx] |= mask;
 1952|  4.29M|            if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
  ------------------
  |  Branch (1952:17): [True: 435k, False: 3.86M]
  ------------------
 1953|   435k|                (*noskip_mask)[1] |= mask;
 1954|  4.29M|        }
 1955|  2.05M|    }
 1956|       |
 1957|  3.96M|    if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1957:9): [True: 0, False: 3.96M]
  |  Branch (1957:38): [True: 0, False: 0]
  ------------------
 1958|      0|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 1959|      0|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 1960|       |
 1961|       |        // keep track of motion vectors for each reference
 1962|      0|        if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1962:13): [True: 0, False: 0]
  ------------------
 1963|       |            // y
 1964|      0|            if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1964:17): [True: 0, False: 0]
  ------------------
 1965|      0|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1965:19): [True: 0, False: 0]
  |  Branch (1965:48): [True: 0, False: 0]
  ------------------
 1966|      0|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1966:19): [True: 0, False: 0]
  |  Branch (1966:48): [True: 0, False: 0]
  ------------------
 1967|      0|            {
 1968|      0|                affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim,
 1969|      0|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1969:39): [True: 0, False: 0]
  ------------------
 1970|      0|                                      &f->frame_hdr->gmv[b->ref[0]]);
 1971|      0|            } else {
 1972|      0|                mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y,
 1973|      0|                             0, &f->svc[b->ref[0]][1]);
 1974|      0|                if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1974:21): [True: 0, False: 0]
  ------------------
 1975|      0|                    obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4);
 1976|      0|                }
 1977|      0|            }
 1978|       |
 1979|       |            // uv
 1980|      0|            if (has_chroma) {
  ------------------
  |  Branch (1980:17): [True: 0, False: 0]
  ------------------
 1981|       |                // sub8x8 derivation
 1982|      0|                int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1982:33): [True: 0, False: 0]
  |  Branch (1982:50): [True: 0, False: 0]
  ------------------
 1983|      0|                refmvs_block *const *r;
 1984|      0|                if (is_sub8x8) {
  ------------------
  |  Branch (1984:21): [True: 0, False: 0]
  ------------------
 1985|      0|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1985:21): [True: 0, False: 0]
  ------------------
 1986|      0|                    r = &t->rt.r[(t->by & 31) + 5];
 1987|      0|                    if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1987:25): [True: 0, False: 0]
  ------------------
 1988|      0|                    if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1988:25): [True: 0, False: 0]
  ------------------
 1989|      0|                    if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1989:25): [True: 0, False: 0]
  |  Branch (1989:37): [True: 0, False: 0]
  ------------------
 1990|      0|                        is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1991|      0|                }
 1992|       |
 1993|       |                // chroma prediction
 1994|      0|                if (is_sub8x8) {
  ------------------
  |  Branch (1994:21): [True: 0, False: 0]
  ------------------
 1995|      0|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1995:21): [True: 0, False: 0]
  ------------------
 1996|      0|                    if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1996:25): [True: 0, False: 0]
  |  Branch (1996:37): [True: 0, False: 0]
  ------------------
 1997|      0|                        const refmvs_block *const rr = &r[-1][t->bx - 1];
 1998|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 1999|      0|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2000|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2001|      0|                    }
 2002|      0|                    if (bw4 == 1) {
  ------------------
  |  Branch (2002:25): [True: 0, False: 0]
  ------------------
 2003|      0|                        const refmvs_block *const rr = &r[0][t->bx - 1];
 2004|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2005|      0|                                     t->by, bh4, rr->mv.mv[0].y, ss_ver,
 2006|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2007|      0|                    }
 2008|      0|                    if (bh4 == ss_ver) {
  ------------------
  |  Branch (2008:25): [True: 0, False: 0]
  ------------------
 2009|      0|                        const refmvs_block *const rr = &r[-1][t->bx];
 2010|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2011|      0|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2012|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2013|      0|                    }
 2014|      0|                    mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4,
 2015|      0|                                 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2016|      0|                } else {
 2017|      0|                    if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (2017:25): [True: 0, False: 0]
  ------------------
 2018|      0|                        ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (2018:27): [True: 0, False: 0]
  |  Branch (2018:56): [True: 0, False: 0]
  ------------------
 2019|      0|                         (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (2019:27): [True: 0, False: 0]
  |  Branch (2019:56): [True: 0, False: 0]
  ------------------
 2020|      0|                    {
 2021|      0|                        affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim,
 2022|      0|                                                b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (2022:49): [True: 0, False: 0]
  ------------------
 2023|      0|                                                &f->frame_hdr->gmv[b->ref[0]]);
 2024|      0|                    } else {
 2025|      0|                        mc_lowest_px(&lowest_px[b->ref[0]][1],
 2026|      0|                                     t->by & ~ss_ver, bh4 << (bh4 == ss_ver),
 2027|      0|                                     b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2028|      0|                        if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (2028:29): [True: 0, False: 0]
  ------------------
 2029|      0|                            obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4);
 2030|      0|                        }
 2031|      0|                    }
 2032|      0|                }
 2033|      0|            }
 2034|      0|        } else {
 2035|       |            // y
 2036|      0|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2036:29): [True: 0, False: 0]
  ------------------
 2037|      0|                if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (2037:21): [True: 0, False: 0]
  |  Branch (2037:59): [True: 0, False: 0]
  ------------------
 2038|      0|                    affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim,
 2039|      0|                                          &f->frame_hdr->gmv[b->ref[i]]);
 2040|      0|                } else {
 2041|      0|                    mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4,
 2042|      0|                                 b->mv[i].y, 0, &f->svc[b->ref[i]][1]);
 2043|      0|                }
 2044|      0|            }
 2045|       |
 2046|       |            // uv
 2047|      0|            if (has_chroma) for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2047:17): [True: 0, False: 0]
  |  Branch (2047:45): [True: 0, False: 0]
  ------------------
 2048|      0|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (2048:21): [True: 0, False: 0]
  ------------------
 2049|      0|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (2049:21): [True: 0, False: 0]
  |  Branch (2049:45): [True: 0, False: 0]
  ------------------
 2050|      0|                {
 2051|      0|                    affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim,
 2052|      0|                                            &f->frame_hdr->gmv[b->ref[i]]);
 2053|      0|                } else {
 2054|      0|                    mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4,
 2055|      0|                                 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]);
 2056|      0|                }
 2057|      0|            }
 2058|      0|        }
 2059|      0|    }
 2060|       |
 2061|  3.96M|    return 0;
 2062|  3.96M|}
decode.c:get_prev_frame_segid:
  499|   186k|{
  500|   186k|    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (500:5): [True: 186k, False: 0]
  ------------------
  501|       |
  502|   186k|    unsigned seg_id = 8;
  503|   186k|    ref_seg_map += by * stride + bx;
  504|   197k|    do {
  505|  1.32M|        for (int x = 0; x < w4; x++)
  ------------------
  |  Branch (505:25): [True: 1.12M, False: 197k]
  ------------------
  506|  1.12M|            seg_id = imin(seg_id, ref_seg_map[x]);
  507|   197k|        ref_seg_map += stride;
  508|   197k|    } while (--h4 > 0 && seg_id);
  ------------------
  |  Branch (508:14): [True: 154k, False: 43.4k]
  |  Branch (508:26): [True: 11.0k, False: 143k]
  ------------------
  509|   186k|    assert(seg_id < 8);
  ------------------
  |  Branch (509:5): [True: 186k, False: 0]
  ------------------
  510|       |
  511|   186k|    return seg_id;
  512|   186k|}
decode.c:neg_deinterleave:
  169|   707k|static int neg_deinterleave(int diff, int ref, int max) {
  170|   707k|    if (!ref) return diff;
  ------------------
  |  Branch (170:9): [True: 367k, False: 340k]
  ------------------
  171|   340k|    if (ref >= (max - 1)) return max - diff - 1;
  ------------------
  |  Branch (171:9): [True: 68.3k, False: 271k]
  ------------------
  172|   271k|    if (2 * ref < max) {
  ------------------
  |  Branch (172:9): [True: 167k, False: 104k]
  ------------------
  173|   167k|        if (diff <= 2 * ref) {
  ------------------
  |  Branch (173:13): [True: 129k, False: 38.4k]
  ------------------
  174|   129k|            if (diff & 1)
  ------------------
  |  Branch (174:17): [True: 15.3k, False: 113k]
  ------------------
  175|  15.3k|                return ref + ((diff + 1) >> 1);
  176|   113k|            else
  177|   113k|                return ref - (diff >> 1);
  178|   129k|        }
  179|  38.4k|        return diff;
  180|   167k|    } else {
  181|   104k|        if (diff <= 2 * (max - ref - 1)) {
  ------------------
  |  Branch (181:13): [True: 81.3k, False: 22.8k]
  ------------------
  182|  81.3k|            if (diff & 1)
  ------------------
  |  Branch (182:17): [True: 11.9k, False: 69.4k]
  ------------------
  183|  11.9k|                return ref + ((diff + 1) >> 1);
  184|  69.4k|            else
  185|  69.4k|                return ref - (diff >> 1);
  186|  81.3k|        }
  187|  22.8k|        return max - (diff + 1);
  188|   104k|    }
  189|   271k|}
decode.c:read_pal_indices:
  419|  90.7k|{
  420|  90.7k|    Dav1dTileState *const ts = t->ts;
  421|  90.7k|    const ptrdiff_t stride = bw4 * 4;
  422|  90.7k|    assert(pal_idx);
  ------------------
  |  Branch (422:5): [True: 90.7k, False: 0]
  ------------------
  423|  90.7k|    uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
  424|  90.7k|    pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
  425|  90.7k|    uint16_t (*const color_map_cdf)[8] =
  426|  90.7k|        ts->cdf.m.color_map[pl][pal_sz - 2];
  427|  90.7k|    uint8_t (*const order)[8] = t->scratch.pal_order;
  428|  90.7k|    uint8_t *const ctx = t->scratch.pal_ctx;
  429|  2.42M|    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
  ------------------
  |  Branch (429:21): [True: 2.33M, False: 90.7k]
  ------------------
  430|       |        // top/left-to-bottom/right diagonals ("wave-front")
  431|  2.33M|        const int first = imin(i, w4 * 4 - 1);
  432|  2.33M|        const int last = imax(0, i - h4 * 4 + 1);
  433|  2.33M|        order_palette(pal_tmp, stride, i, first, last, order, ctx);
  434|  20.0M|        for (int j = first, m = 0; j >= last; j--, m++) {
  ------------------
  |  Branch (434:36): [True: 17.6M, False: 2.33M]
  ------------------
  435|  17.6M|            const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  17.6M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  436|  17.6M|                                      color_map_cdf[ctx[m]], pal_sz - 1);
  437|  17.6M|            pal_tmp[(i - j) * stride + j] = order[m][color_idx];
  438|  17.6M|        }
  439|  2.33M|    }
  440|       |
  441|  90.7k|    t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
  442|  90.7k|                                 w4 * 4, h4 * 4);
  443|  90.7k|}
decode.c:order_palette:
  356|  2.33M|{
  357|  2.33M|    int have_top = i > first;
  358|       |
  359|  2.33M|    assert(pal_idx);
  ------------------
  |  Branch (359:5): [True: 2.33M, False: 0]
  ------------------
  360|  2.33M|    pal_idx += first + (i - first) * stride;
  361|  20.0M|    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
  ------------------
  |  Branch (361:32): [True: 17.6M, False: 2.33M]
  ------------------
  362|  17.6M|        const int have_left = j > 0;
  363|       |
  364|  17.6M|        assert(have_left || have_top);
  ------------------
  |  Branch (364:9): [True: 16.7M, False: 900k]
  |  Branch (364:9): [True: 900k, False: 0]
  ------------------
  365|       |
  366|  17.6M|#define add(v_in) do { \
  367|  17.6M|        const int v = v_in; \
  368|  17.6M|        assert((unsigned)v < 8U); \
  369|  17.6M|        order[n][o_idx++] = v; \
  370|  17.6M|        mask |= 1 << v; \
  371|  17.6M|    } while (0)
  372|       |
  373|  17.6M|        unsigned mask = 0;
  374|  17.6M|        int o_idx = 0;
  375|  17.6M|        if (!have_left) {
  ------------------
  |  Branch (375:13): [True: 900k, False: 16.7M]
  ------------------
  376|   900k|            ctx[n] = 0;
  377|   900k|            add(pal_idx[-stride]);
  ------------------
  |  |  366|   900k|#define add(v_in) do { \
  |  |  367|   900k|        const int v = v_in; \
  |  |  368|   900k|        assert((unsigned)v < 8U); \
  |  |  369|   900k|        order[n][o_idx++] = v; \
  |  |  370|   900k|        mask |= 1 << v; \
  |  |  371|   900k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 900k]
  |  |  ------------------
  ------------------
  |  Branch (377:13): [True: 900k, False: 0]
  ------------------
  378|  16.7M|        } else if (!have_top) {
  ------------------
  |  Branch (378:20): [True: 1.43M, False: 15.3M]
  ------------------
  379|  1.43M|            ctx[n] = 0;
  380|  1.43M|            add(pal_idx[-1]);
  ------------------
  |  |  366|  1.43M|#define add(v_in) do { \
  |  |  367|  1.43M|        const int v = v_in; \
  |  |  368|  1.43M|        assert((unsigned)v < 8U); \
  |  |  369|  1.43M|        order[n][o_idx++] = v; \
  |  |  370|  1.43M|        mask |= 1 << v; \
  |  |  371|  1.43M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.43M]
  |  |  ------------------
  ------------------
  |  Branch (380:13): [True: 1.43M, False: 0]
  ------------------
  381|  15.3M|        } else {
  382|  15.3M|            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
  383|  15.3M|            const int same_t_l = t == l;
  384|  15.3M|            const int same_t_tl = t == tl;
  385|  15.3M|            const int same_l_tl = l == tl;
  386|  15.3M|            const int same_all = same_t_l & same_t_tl & same_l_tl;
  387|       |
  388|  15.3M|            if (same_all) {
  ------------------
  |  Branch (388:17): [True: 8.57M, False: 6.75M]
  ------------------
  389|  8.57M|                ctx[n] = 4;
  390|  8.57M|                add(t);
  ------------------
  |  |  366|  8.57M|#define add(v_in) do { \
  |  |  367|  8.57M|        const int v = v_in; \
  |  |  368|  8.57M|        assert((unsigned)v < 8U); \
  |  |  369|  8.57M|        order[n][o_idx++] = v; \
  |  |  370|  8.57M|        mask |= 1 << v; \
  |  |  371|  8.57M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 8.57M]
  |  |  ------------------
  ------------------
  |  Branch (390:17): [True: 8.57M, False: 0]
  ------------------
  391|  8.57M|            } else if (same_t_l) {
  ------------------
  |  Branch (391:24): [True: 465k, False: 6.28M]
  ------------------
  392|   465k|                ctx[n] = 3;
  393|   465k|                add(t);
  ------------------
  |  |  366|   465k|#define add(v_in) do { \
  |  |  367|   465k|        const int v = v_in; \
  |  |  368|   465k|        assert((unsigned)v < 8U); \
  |  |  369|   465k|        order[n][o_idx++] = v; \
  |  |  370|   465k|        mask |= 1 << v; \
  |  |  371|   465k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 465k]
  |  |  ------------------
  ------------------
  |  Branch (393:17): [True: 465k, False: 0]
  ------------------
  394|   465k|                add(tl);
  ------------------
  |  |  366|   465k|#define add(v_in) do { \
  |  |  367|   465k|        const int v = v_in; \
  |  |  368|   465k|        assert((unsigned)v < 8U); \
  |  |  369|   465k|        order[n][o_idx++] = v; \
  |  |  370|   465k|        mask |= 1 << v; \
  |  |  371|   465k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 465k]
  |  |  ------------------
  ------------------
  |  Branch (394:17): [True: 465k, False: 0]
  ------------------
  395|  6.28M|            } else if (same_t_tl | same_l_tl) {
  ------------------
  |  Branch (395:24): [True: 5.10M, False: 1.18M]
  ------------------
  396|  5.10M|                ctx[n] = 2;
  397|  5.10M|                add(tl);
  ------------------
  |  |  366|  5.10M|#define add(v_in) do { \
  |  |  367|  5.10M|        const int v = v_in; \
  |  |  368|  5.10M|        assert((unsigned)v < 8U); \
  |  |  369|  5.10M|        order[n][o_idx++] = v; \
  |  |  370|  5.10M|        mask |= 1 << v; \
  |  |  371|  5.10M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 5.10M]
  |  |  ------------------
  ------------------
  |  Branch (397:17): [True: 5.10M, False: 0]
  ------------------
  398|  5.10M|                add(same_t_tl ? l : t);
  ------------------
  |  |  366|  5.10M|#define add(v_in) do { \
  |  |  367|  10.2M|        const int v = v_in; \
  |  |  ------------------
  |  |  |  Branch (367:23): [True: 2.60M, False: 2.50M]
  |  |  ------------------
  |  |  368|  5.10M|        assert((unsigned)v < 8U); \
  |  |  369|  5.10M|        order[n][o_idx++] = v; \
  |  |  370|  5.10M|        mask |= 1 << v; \
  |  |  371|  5.10M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 5.10M]
  |  |  ------------------
  ------------------
  |  Branch (398:17): [True: 5.10M, False: 0]
  ------------------
  399|  5.10M|            } else {
  400|  1.18M|                ctx[n] = 1;
  401|  1.18M|                add(imin(t, l));
  ------------------
  |  |  366|  1.18M|#define add(v_in) do { \
  |  |  367|  1.18M|        const int v = v_in; \
  |  |  368|  1.18M|        assert((unsigned)v < 8U); \
  |  |  369|  1.18M|        order[n][o_idx++] = v; \
  |  |  370|  1.18M|        mask |= 1 << v; \
  |  |  371|  1.18M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.18M]
  |  |  ------------------
  ------------------
  |  Branch (401:17): [True: 1.18M, False: 0]
  ------------------
  402|  1.18M|                add(imax(t, l));
  ------------------
  |  |  366|  1.18M|#define add(v_in) do { \
  |  |  367|  1.18M|        const int v = v_in; \
  |  |  368|  1.18M|        assert((unsigned)v < 8U); \
  |  |  369|  1.18M|        order[n][o_idx++] = v; \
  |  |  370|  1.18M|        mask |= 1 << v; \
  |  |  371|  1.18M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.18M]
  |  |  ------------------
  ------------------
  |  Branch (402:17): [True: 1.18M, False: 0]
  ------------------
  403|  1.18M|                add(tl);
  ------------------
  |  |  366|  1.18M|#define add(v_in) do { \
  |  |  367|  1.18M|        const int v = v_in; \
  |  |  368|  1.18M|        assert((unsigned)v < 8U); \
  |  |  369|  1.18M|        order[n][o_idx++] = v; \
  |  |  370|  1.18M|        mask |= 1 << v; \
  |  |  371|  1.18M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.18M]
  |  |  ------------------
  ------------------
  |  Branch (403:17): [True: 1.18M, False: 0]
  ------------------
  404|  1.18M|            }
  405|  15.3M|        }
  406|   159M|        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
  ------------------
  |  Branch (406:39): [True: 141M, False: 17.6M]
  ------------------
  407|   141M|            if (!(mask & m))
  ------------------
  |  Branch (407:17): [True: 115M, False: 25.6M]
  ------------------
  408|   115M|                order[n][o_idx++] = bit;
  409|       |        assert(o_idx == 8);
  ------------------
  |  Branch (409:9): [True: 17.6M, False: 0]
  ------------------
  410|  17.6M|#undef add
  411|  17.6M|    }
  412|  2.33M|}
decode.c:splat_intraref:
  566|  1.56M|{
  567|  1.56M|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  568|  1.56M|        .ref.ref = { 0, -1 },
  569|  1.56M|        .mv.mv[0].n = INVALID_MV,
  ------------------
  |  |   40|  1.56M|#define INVALID_MV 0x80008000
  ------------------
  570|  1.56M|        .bs = bs,
  571|  1.56M|        .mf = 0,
  572|  1.56M|    };
  573|  1.56M|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  574|  1.56M|}
decode.c:read_mv_residual:
  109|   888k|{
  110|   888k|    MsacContext *const msac = &ts->msac;
  111|   888k|    const enum MVJoint mv_joint =
  112|   888k|        dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
  ------------------
  |  |   47|   888k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  113|   888k|    if (mv_joint & MV_JOINT_V)
  ------------------
  |  Branch (113:9): [True: 740k, False: 147k]
  ------------------
  114|   740k|        ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
  115|   888k|    if (mv_joint & MV_JOINT_H)
  ------------------
  |  Branch (115:9): [True: 745k, False: 143k]
  ------------------
  116|   745k|        ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
  117|   888k|}
decode.c:read_mv_component_diff:
   79|  1.48M|{
   80|  1.48M|    const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
  ------------------
  |  |   52|  1.48M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   81|  1.48M|    const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
  ------------------
  |  |   57|  1.48M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
   82|  1.48M|    int up, fp = 3, hp = 1;
   83|       |
   84|  1.48M|    if (!cl) {
  ------------------
  |  Branch (84:9): [True: 362k, False: 1.12M]
  ------------------
   85|   362k|        up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
  ------------------
  |  |   52|   362k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   86|   362k|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (86:13): [True: 202k, False: 160k]
  ------------------
   87|   202k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
  ------------------
  |  |   47|   202k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   88|   202k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (88:17): [True: 151k, False: 50.9k]
  ------------------
   89|   151k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
  ------------------
  |  |   52|   151k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   90|   202k|        }
   91|  1.12M|    } else {
   92|  1.12M|        up = 1 << cl;
   93|  11.9M|        for (int n = 0; n < cl; n++)
  ------------------
  |  Branch (93:25): [True: 10.7M, False: 1.12M]
  ------------------
   94|  10.7M|            up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
  ------------------
  |  |   52|  10.7M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   95|  1.12M|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (95:13): [True: 72.6k, False: 1.05M]
  ------------------
   96|  72.6k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
  ------------------
  |  |   47|  72.6k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   97|  72.6k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (97:17): [True: 52.8k, False: 19.7k]
  ------------------
   98|  52.8k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
  ------------------
  |  |   52|  52.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   99|  72.6k|        }
  100|  1.12M|    }
  101|       |
  102|  1.48M|    const int diff = ((up << 3) | (fp << 1) | hp) + 1;
  103|       |
  104|  1.48M|    return sign ? -diff : diff;
  ------------------
  |  Branch (104:12): [True: 1.27M, False: 214k]
  ------------------
  105|  1.48M|}
decode.c:read_vartx_tree:
  448|  1.75M|{
  449|  1.75M|    const Dav1dFrameContext *const f = t->f;
  450|  1.75M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  451|  1.75M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  452|       |
  453|       |    // var-tx tree coding
  454|  1.75M|    uint16_t tx_split[2] = { 0 };
  455|  1.75M|    b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
  456|  1.75M|    if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
  ------------------
  |  Branch (456:9): [True: 539k, False: 1.21M]
  |  Branch (456:22): [True: 22.1k, False: 517k]
  ------------------
  457|   517k|                     b->max_ytx == TX_4X4))
  ------------------
  |  Branch (457:22): [True: 32.1k, False: 485k]
  ------------------
  458|  54.3k|    {
  459|  54.3k|        b->max_ytx = b->uvtx = TX_4X4;
  460|  54.3k|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (460:13): [True: 10.0k, False: 44.2k]
  ------------------
  461|  10.0k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
  462|  10.0k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
  463|  10.0k|        }
  464|  1.69M|    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
  ------------------
  |  Branch (464:16): [True: 1.36M, False: 335k]
  |  Branch (464:66): [True: 184k, False: 150k]
  ------------------
  465|  1.54M|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (465:13): [True: 184k, False: 1.36M]
  ------------------
  466|   184k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
  467|   184k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
  468|   184k|        }
  469|  1.54M|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  470|  1.54M|    } else {
  471|   150k|        assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
  ------------------
  |  Branch (471:9): [True: 147k, False: 2.94k]
  |  Branch (471:9): [True: 419, False: 2.52k]
  |  Branch (471:9): [True: 2.52k, False: 0]
  ------------------
  472|   150k|        int y, x, y_off, x_off;
  473|   150k|        const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
  474|   304k|        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
  ------------------
  |  Branch (474:32): [True: 153k, False: 150k]
  ------------------
  475|   312k|            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
  ------------------
  |  Branch (475:36): [True: 158k, False: 153k]
  ------------------
  476|   158k|                read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
  477|       |                // contexts are updated inside read_tx_tree()
  478|   158k|                t->bx += ytx->w;
  479|   158k|            }
  480|   153k|            t->bx -= x;
  481|   153k|            t->by += ytx->h;
  482|   153k|        }
  483|   150k|        t->by -= y;
  484|   150k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   150k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 150k]
  |  |  ------------------
  |  |   35|   150k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   150k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  485|      0|            printf("Post-vartxtree[%x/%x]: r=%d\n",
  486|      0|                   tx_split[0], tx_split[1], t->ts->msac.rng);
  487|   150k|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  488|   150k|    }
  489|  1.75M|    assert(!(tx_split[0] & ~0x33));
  ------------------
  |  Branch (489:5): [True: 1.75M, False: 0]
  ------------------
  490|  1.75M|    b->tx_split0 = (uint8_t)tx_split[0];
  491|  1.75M|    b->tx_split1 = tx_split[1];
  492|  1.75M|}
decode.c:read_tx_tree:
  123|   313k|{
  124|   313k|    const Dav1dFrameContext *const f = t->f;
  125|   313k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  126|   313k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
  127|   313k|    const int txw = t_dim->lw, txh = t_dim->lh;
  128|   313k|    int is_split;
  129|       |
  130|   313k|    if (depth < 2 && from > (int) TX_4X4) {
  ------------------
  |  Branch (130:9): [True: 260k, False: 52.6k]
  |  Branch (130:22): [True: 260k, False: 0]
  ------------------
  131|   260k|        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
  132|   260k|        const int a = t->a->tx[bx4] < txw;
  133|   260k|        const int l = t->l.tx[by4] < txh;
  134|       |
  135|   260k|        is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
  ------------------
  |  |   52|   260k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  136|   260k|                       t->ts->cdf.m.txpart[cat][a + l]);
  137|   260k|        if (is_split)
  ------------------
  |  Branch (137:13): [True: 72.6k, False: 188k]
  ------------------
  138|  72.6k|            masks[depth] |= 1 << (y_off * 4 + x_off);
  139|   260k|    } else {
  140|  52.6k|        is_split = 0;
  141|  52.6k|    }
  142|       |
  143|   313k|    if (is_split && t_dim->max > TX_8X8) {
  ------------------
  |  Branch (143:9): [True: 72.6k, False: 240k]
  |  Branch (143:21): [True: 54.5k, False: 18.0k]
  ------------------
  144|  54.5k|        const enum RectTxfmSize sub = t_dim->sub;
  145|  54.5k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  146|  54.5k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  147|       |
  148|  54.5k|        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
  149|  54.5k|        t->bx += txsw;
  150|  54.5k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (150:13): [True: 42.3k, False: 12.1k]
  |  Branch (150:27): [True: 41.8k, False: 559]
  ------------------
  151|  41.8k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
  152|  54.5k|        t->bx -= txsw;
  153|  54.5k|        t->by += txsh;
  154|  54.5k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (154:13): [True: 36.4k, False: 18.1k]
  |  Branch (154:27): [True: 35.3k, False: 1.08k]
  ------------------
  155|  35.3k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
  156|  35.3k|            t->bx += txsw;
  157|  35.3k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (157:17): [True: 23.2k, False: 12.1k]
  |  Branch (157:31): [True: 22.6k, False: 541]
  ------------------
  158|  22.6k|                read_tx_tree(t, sub, depth + 1, masks,
  159|  22.6k|                             x_off * 2 + 1, y_off * 2 + 1);
  160|  35.3k|            t->bx -= txsw;
  161|  35.3k|        }
  162|  54.5k|        t->by -= txsh;
  163|   258k|    } else {
  164|   258k|        dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
  ------------------
  |  Branch (164:54): [True: 18.0k, False: 240k]
  ------------------
  165|   258k|        dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
  ------------------
  |  Branch (165:53): [True: 18.0k, False: 240k]
  ------------------
  166|   258k|    }
  167|   313k|}
decode.c:splat_intrabc_mv:
  535|   613k|{
  536|   613k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  537|   613k|        .ref.ref = { 0, -1 },
  538|   613k|        .mv.mv[0] = b->mv[0],
  539|   613k|        .bs = bs,
  540|   613k|        .mf = 0,
  541|   613k|    };
  542|   613k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  543|   613k|}
decode.c:findoddzero:
  339|   497k|static inline int findoddzero(const uint8_t *buf, int len) {
  340|   536k|    for (int n = 0; n < len; n++)
  ------------------
  |  Branch (340:21): [True: 518k, False: 17.7k]
  ------------------
  341|   518k|        if (!buf[n * 2]) return 1;
  ------------------
  |  Branch (341:13): [True: 479k, False: 38.8k]
  ------------------
  342|  17.7k|    return 0;
  343|   497k|}
decode.c:find_matching_ref:
  197|   479k|{
  198|   479k|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  199|   479k|    int count = 0;
  200|   479k|    int have_topleft = have_top && have_left;
  ------------------
  |  Branch (200:24): [True: 442k, False: 37.5k]
  |  Branch (200:36): [True: 403k, False: 38.1k]
  ------------------
  201|   479k|    int have_topright = imax(bw4, bh4) < 32 &&
  ------------------
  |  Branch (201:25): [True: 448k, False: 31.6k]
  ------------------
  202|   448k|                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
  ------------------
  |  Branch (202:25): [True: 416k, False: 31.9k]
  |  Branch (202:37): [True: 382k, False: 33.8k]
  ------------------
  203|   382k|                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  ------------------
  |  Branch (203:25): [True: 240k, False: 141k]
  ------------------
  204|       |
  205|   479k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  206|   479k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  207|       |
  208|   479k|    if (have_top) {
  ------------------
  |  Branch (208:9): [True: 442k, False: 37.5k]
  ------------------
  209|   442k|        const refmvs_block *r2 = &r[-1][t->bx];
  210|   442k|        if (matches(r2)) {
  ------------------
  |  |  206|   442k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 377k, False: 64.7k]
  |  |  |  Branch (206:53): [True: 348k, False: 29.3k]
  |  |  ------------------
  ------------------
  211|   348k|            masks[0] |= 1;
  212|   348k|            count = 1;
  213|   348k|        }
  214|   442k|        int aw4 = bs(r2)[0];
  ------------------
  |  |  205|   442k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  215|   442k|        if (aw4 >= bw4) {
  ------------------
  |  Branch (215:13): [True: 383k, False: 58.2k]
  ------------------
  216|   383k|            const int off = t->bx & (aw4 - 1);
  217|   383k|            if (off) have_topleft = 0;
  ------------------
  |  Branch (217:17): [True: 69.3k, False: 314k]
  ------------------
  218|   383k|            if (aw4 - off > bw4) have_topright = 0;
  ------------------
  |  Branch (218:17): [True: 70.9k, False: 312k]
  ------------------
  219|   383k|        } else {
  220|  58.2k|            unsigned mask = 1 << aw4;
  221|   144k|            for (int x = aw4; x < w4; x += aw4) {
  ------------------
  |  Branch (221:31): [True: 87.1k, False: 57.6k]
  ------------------
  222|  87.1k|                r2 += aw4;
  223|  87.1k|                if (matches(r2)) {
  ------------------
  |  |  206|  87.1k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 69.1k, False: 18.0k]
  |  |  |  Branch (206:53): [True: 64.9k, False: 4.22k]
  |  |  ------------------
  ------------------
  224|  64.9k|                    masks[0] |= mask;
  225|  64.9k|                    if (++count >= 8) return;
  ------------------
  |  Branch (225:25): [True: 540, False: 64.3k]
  ------------------
  226|  64.9k|                }
  227|  86.6k|                aw4 = bs(r2)[0];
  ------------------
  |  |  205|  86.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  228|  86.6k|                mask <<= aw4;
  229|  86.6k|            }
  230|  58.2k|        }
  231|   442k|    }
  232|   479k|    if (have_left) {
  ------------------
  |  Branch (232:9): [True: 441k, False: 38.1k]
  ------------------
  233|   441k|        /*const*/ refmvs_block *const *r2 = r;
  234|   441k|        if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|   441k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 373k, False: 67.0k]
  |  |  |  Branch (206:53): [True: 344k, False: 29.2k]
  |  |  ------------------
  ------------------
  235|   344k|            masks[1] |= 1;
  236|   344k|            if (++count >= 8) return;
  ------------------
  |  Branch (236:17): [True: 310, False: 344k]
  ------------------
  237|   344k|        }
  238|   440k|        int lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|   440k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  239|   440k|        if (lh4 >= bh4) {
  ------------------
  |  Branch (239:13): [True: 369k, False: 70.9k]
  ------------------
  240|   369k|            if (t->by & (lh4 - 1)) have_topleft = 0;
  ------------------
  |  Branch (240:17): [True: 70.2k, False: 299k]
  ------------------
  241|   369k|        } else {
  242|  70.9k|            unsigned mask = 1 << lh4;
  243|   174k|            for (int y = lh4; y < h4; y += lh4) {
  ------------------
  |  Branch (243:31): [True: 104k, False: 69.6k]
  ------------------
  244|   104k|                r2 += lh4;
  245|   104k|                if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|   104k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 77.6k, False: 26.9k]
  |  |  |  Branch (206:53): [True: 72.3k, False: 5.28k]
  |  |  ------------------
  ------------------
  246|  72.3k|                    masks[1] |= mask;
  247|  72.3k|                    if (++count >= 8) return;
  ------------------
  |  Branch (247:25): [True: 1.33k, False: 71.0k]
  ------------------
  248|  72.3k|                }
  249|   103k|                lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|   103k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  250|   103k|                mask <<= lh4;
  251|   103k|            }
  252|  70.9k|        }
  253|   440k|    }
  254|   477k|    if (have_topleft && matches(&r[-1][t->bx - 1])) {
  ------------------
  |  |  206|   262k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 212k, False: 49.7k]
  |  |  |  Branch (206:53): [True: 194k, False: 18.2k]
  |  |  ------------------
  ------------------
  |  Branch (254:9): [True: 262k, False: 215k]
  ------------------
  255|   194k|        masks[1] |= 1ULL << 32;
  256|   194k|        if (++count >= 8) return;
  ------------------
  |  Branch (256:13): [True: 976, False: 193k]
  ------------------
  257|   194k|    }
  258|   476k|    if (have_topright && matches(&r[-1][t->bx + bw4])) {
  ------------------
  |  |  206|   169k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 135k, False: 34.3k]
  |  |  |  Branch (206:53): [True: 124k, False: 10.9k]
  |  |  ------------------
  ------------------
  |  Branch (258:9): [True: 169k, False: 307k]
  ------------------
  259|   124k|        masks[0] |= 1ULL << 32;
  260|   124k|    }
  261|   476k|#undef matches
  262|   476k|}
decode.c:derive_warpmv:
  268|  92.5k|{
  269|  92.5k|    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
  270|  92.5k|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  271|       |
  272|  92.5k|#define add_sample(dx, dy, sx, sy, rp) do { \
  273|  92.5k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  274|  92.5k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  275|  92.5k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  276|  92.5k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  277|  92.5k|    np++; \
  278|  92.5k|} while (0)
  279|       |
  280|       |    // use masks[] to find the projectable motion vectors in the edges
  281|  92.5k|    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
  ------------------
  |  Branch (281:9): [True: 71.8k, False: 20.6k]
  |  Branch (281:37): [True: 29.1k, False: 42.6k]
  ------------------
  282|  29.1k|        const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
  ------------------
  |  |  205|  29.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  283|  29.1k|        add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
  ------------------
  |  |  272|  29.1k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  29.1k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  29.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  29.1k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  29.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  29.1k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  29.1k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  29.1k|    np++; \
  |  |  278|  29.1k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 29.1k]
  |  |  ------------------
  ------------------
  284|   128k|    } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
  ------------------
  |  Branch (284:64): [True: 128k, False: 328]
  |  Branch (284:74): [True: 65.0k, False: 63.0k]
  ------------------
  285|  65.0k|        const int tz = ctz(xmask);
  286|  65.0k|        off += tz;
  287|  65.0k|        xmask >>= tz;
  288|  65.0k|        add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
  ------------------
  |  |  272|  65.0k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  65.0k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  65.0k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  65.0k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  65.0k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  65.0k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  65.0k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  65.0k|    np++; \
  |  |  278|  65.0k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 65.0k]
  |  |  ------------------
  ------------------
  289|  65.0k|        xmask &= ~1;
  290|  65.0k|    }
  291|  92.5k|    if (np < 8 && masks[1] == 1) {
  ------------------
  |  Branch (291:9): [True: 92.1k, False: 328]
  |  Branch (291:19): [True: 30.1k, False: 62.0k]
  ------------------
  292|  30.1k|        const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
  ------------------
  |  |  205|  30.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  293|  30.1k|        add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
  ------------------
  |  |  272|  30.1k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  30.1k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  30.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  30.1k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  30.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  30.1k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  30.1k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  30.1k|    np++; \
  |  |  278|  30.1k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 30.1k]
  |  |  ------------------
  ------------------
  294|   131k|    } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
  ------------------
  |  Branch (294:64): [True: 130k, False: 917]
  |  Branch (294:74): [True: 68.6k, False: 61.4k]
  ------------------
  295|  68.6k|        const int tz = ctz(ymask);
  296|  68.6k|        off += tz;
  297|  68.6k|        ymask >>= tz;
  298|  68.6k|        add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
  ------------------
  |  |  272|  68.6k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  68.6k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  68.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  68.6k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  68.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  68.6k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  68.6k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  68.6k|    np++; \
  |  |  278|  68.6k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 68.6k]
  |  |  ------------------
  ------------------
  299|  68.6k|        ymask &= ~1;
  300|  68.6k|    }
  301|  92.5k|    if (np < 8 && masks[1] >> 32) // top/left
  ------------------
  |  Branch (301:9): [True: 91.4k, False: 1.09k]
  |  Branch (301:19): [True: 50.3k, False: 41.0k]
  ------------------
  302|  50.3k|        add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
  ------------------
  |  |  272|  50.3k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  50.3k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  50.3k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  50.3k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  50.3k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  50.3k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  50.3k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  50.3k|    np++; \
  |  |  278|  50.3k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 50.3k]
  |  |  ------------------
  ------------------
  303|  92.5k|    if (np < 8 && masks[0] >> 32) // top/right
  ------------------
  |  Branch (303:9): [True: 91.0k, False: 1.46k]
  |  Branch (303:19): [True: 34.0k, False: 56.9k]
  ------------------
  304|  34.0k|        add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
  ------------------
  |  |  272|  34.0k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  34.0k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  34.0k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  34.0k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  34.0k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  34.0k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  34.0k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  34.0k|    np++; \
  |  |  278|  34.0k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 34.0k]
  |  |  ------------------
  ------------------
  305|  92.5k|    assert(np > 0 && np <= 8);
  ------------------
  |  Branch (305:5): [True: 92.5k, False: 0]
  |  Branch (305:5): [True: 92.5k, False: 0]
  ------------------
  306|  92.5k|#undef bs
  307|       |
  308|       |    // select according to motion vector difference against a threshold
  309|  92.5k|    int mvd[8], ret = 0;
  310|  92.5k|    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
  311|   369k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (311:21): [True: 277k, False: 92.5k]
  ------------------
  312|   277k|        mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
  313|   277k|                 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
  314|   277k|        if (mvd[i] > thresh)
  ------------------
  |  Branch (314:13): [True: 54.8k, False: 222k]
  ------------------
  315|  54.8k|            mvd[i] = -1;
  316|   222k|        else
  317|   222k|            ret++;
  318|   277k|    }
  319|  92.5k|    if (!ret) {
  ------------------
  |  Branch (319:9): [True: 11.0k, False: 81.4k]
  ------------------
  320|  11.0k|        ret = 1;
  321|  90.2k|    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
  ------------------
  |  Branch (321:47): [True: 22.5k, False: 67.6k]
  ------------------
  322|  36.4k|        while (mvd[i] != -1) i++;
  ------------------
  |  Branch (322:16): [True: 13.8k, False: 22.5k]
  ------------------
  323|  45.1k|        while (mvd[j] == -1) j--;
  ------------------
  |  Branch (323:16): [True: 22.5k, False: 22.5k]
  ------------------
  324|  22.5k|        assert(i != j);
  ------------------
  |  Branch (324:9): [True: 22.5k, False: 0]
  ------------------
  325|  22.5k|        if (i > j) break;
  ------------------
  |  Branch (325:13): [True: 13.7k, False: 8.82k]
  ------------------
  326|       |        // replace the discarded samples;
  327|  8.82k|        mvd[i] = mvd[j];
  328|  8.82k|        memcpy(pts[i], pts[j], sizeof(*pts));
  329|  8.82k|    }
  330|       |
  331|  92.5k|    if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
  ------------------
  |  Branch (331:9): [True: 89.2k, False: 3.26k]
  ------------------
  332|  89.2k|        !dav1d_get_shear_params(wmp))
  ------------------
  |  Branch (332:9): [True: 86.4k, False: 2.81k]
  ------------------
  333|  86.4k|    {
  334|  86.4k|        wmp->type = DAV1D_WM_TYPE_AFFINE;
  335|  86.4k|    } else
  336|  6.07k|        wmp->type = DAV1D_WM_TYPE_IDENTITY;
  337|  92.5k|}
decode.c:splat_tworef_mv:
  550|   213k|{
  551|   213k|    assert(bw4 >= 2 && bh4 >= 2);
  ------------------
  |  Branch (551:5): [True: 213k, False: 0]
  |  Branch (551:5): [True: 213k, False: 0]
  ------------------
  552|   213k|    const enum CompInterPredMode mode = b->inter_mode;
  553|   213k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  554|   213k|        .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
  555|   213k|        .mv.mv = { b->mv[0], b->mv[1] },
  556|   213k|        .bs = bs,
  557|   213k|        .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
  558|   213k|    };
  559|   213k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  560|   213k|}
decode.c:splat_oneref_mv:
  519|   927k|{
  520|   927k|    const enum InterPredMode mode = b->inter_mode;
  521|   927k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  522|   927k|        .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
  ------------------
  |  Branch (522:37): [True: 41.5k, False: 886k]
  ------------------
  523|   927k|        .mv.mv[0] = b->mv[0],
  524|   927k|        .bs = bs,
  525|   927k|        .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
  ------------------
  |  Branch (525:16): [True: 384k, False: 542k]
  |  Branch (525:36): [True: 268k, False: 116k]
  ------------------
  526|   927k|    };
  527|   927k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  528|   927k|}
decode.c:read_restoration_info:
 2514|  99.5k|{
 2515|  99.5k|    const Dav1dFrameContext *const f = t->f;
 2516|  99.5k|    Dav1dTileState *const ts = t->ts;
 2517|       |
 2518|  99.5k|    if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
  ------------------
  |  Branch (2518:9): [True: 35.3k, False: 64.1k]
  ------------------
 2519|  35.3k|        const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|  35.3k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 2520|  35.3k|                               ts->cdf.m.restore_switchable, 2);
 2521|  35.3k|        lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */
 2522|  64.1k|    } else {
 2523|  64.1k|        const unsigned type =
 2524|  64.1k|            dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  64.1k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 2525|  64.1k|                frame_type == DAV1D_RESTORATION_WIENER ?
  ------------------
  |  Branch (2525:17): [True: 20.7k, False: 43.3k]
  ------------------
 2526|  43.3k|                ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
 2527|  64.1k|        lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
  ------------------
  |  Branch (2527:20): [True: 31.8k, False: 32.3k]
  ------------------
 2528|  64.1k|    }
 2529|       |
 2530|  99.5k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (2530:9): [True: 16.4k, False: 83.0k]
  ------------------
 2531|  16.4k|        lr->filter_v[0] = p ? 0 :
  ------------------
  |  Branch (2531:27): [True: 8.86k, False: 7.58k]
  ------------------
 2532|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2533|  7.58k|                ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
 2534|  16.4k|        lr->filter_v[1] =
 2535|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2536|  16.4k|                ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
 2537|  16.4k|        lr->filter_v[2] =
 2538|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2539|  16.4k|                ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
 2540|       |
 2541|  16.4k|        lr->filter_h[0] = p ? 0 :
  ------------------
  |  Branch (2541:27): [True: 8.86k, False: 7.58k]
  ------------------
 2542|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2543|  7.58k|                ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
 2544|  16.4k|        lr->filter_h[1] =
 2545|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2546|  16.4k|                ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
 2547|  16.4k|        lr->filter_h[2] =
 2548|  16.4k|            dav1d_msac_decode_subexp(&ts->msac,
 2549|  16.4k|                ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
 2550|  16.4k|        memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
 2551|  16.4k|        ts->lr_ref[p] = lr;
 2552|  16.4k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  16.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 16.4k]
  |  |  ------------------
  |  |   35|  16.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  16.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2553|      0|            printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
 2554|      0|                   p, lr->filter_v[0], lr->filter_v[1],
 2555|      0|                   lr->filter_v[2], lr->filter_h[0],
 2556|      0|                   lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
 2557|  83.0k|    } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
  ------------------
  |  Branch (2557:16): [True: 33.4k, False: 49.6k]
  ------------------
 2558|  33.4k|        const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
 2559|  33.4k|        const uint16_t *const sgr_params = dav1d_sgr_params[idx];
 2560|  33.4k|        lr->type += idx;
 2561|  33.4k|        lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2561:30): [True: 25.9k, False: 7.50k]
  ------------------
 2562|  25.9k|            ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
 2563|  33.4k|        lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2563:30): [True: 24.6k, False: 8.85k]
  ------------------
 2564|  24.6k|            ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
 2565|  33.4k|        memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
 2566|  33.4k|        memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
 2567|  33.4k|        ts->lr_ref[p] = lr;
 2568|  33.4k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  33.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 33.4k]
  |  |  ------------------
  |  |   35|  33.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  33.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2569|      0|            printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
 2570|      0|                   p, idx, lr->sgr_weights[0],
 2571|      0|                   lr->sgr_weights[1], ts->msac.rng);
 2572|  33.4k|    }
 2573|  99.5k|}
decode.c:init_quant_tables:
   57|  52.3k|{
   58|   182k|    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
  ------------------
  |  Branch (58:21): [True: 130k, False: 52.3k]
  |  Branch (58:26): [True: 100k, False: 82.3k]
  ------------------
   59|   130k|        const int yac = frame_hdr->segmentation.enabled ?
  ------------------
  |  Branch (59:25): [True: 89.2k, False: 41.1k]
  ------------------
   60|  89.2k|            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
   61|   130k|        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
   62|   130k|        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
   63|   130k|        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
   64|   130k|        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
   65|   130k|        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
   66|       |
   67|   130k|        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
   68|   130k|        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
   69|   130k|        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
   70|   130k|        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
   71|   130k|        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
   72|   130k|        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
   73|   130k|    }
   74|  52.3k|}
decode.c:setup_tile:
 2430|  46.3k|{
 2431|  46.3k|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2432|  46.3k|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2433|  46.3k|    const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
 2434|  46.3k|    const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
 2435|  46.3k|    const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
 2436|  46.3k|    const int sb_shift = f->sb_shift;
 2437|       |
 2438|  46.3k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2439|   139k|    for (int p = 0; p < 2; p++) {
  ------------------
  |  Branch (2439:21): [True: 92.7k, False: 46.3k]
  ------------------
 2440|  92.7k|        ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ?
  ------------------
  |  Branch (2440:39): [True: 0, False: 92.7k]
  ------------------
 2441|      0|            &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] :
 2442|  92.7k|            NULL;
 2443|  92.7k|        ts->frame_thread[p].cbi = f->frame_thread.cbi ?
  ------------------
  |  Branch (2443:35): [True: 0, False: 92.7k]
  ------------------
 2444|      0|            &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] :
 2445|  92.7k|            NULL;
 2446|  92.7k|        ts->frame_thread[p].cf = f->frame_thread.cf ?
  ------------------
  |  Branch (2446:34): [True: 0, False: 92.7k]
  ------------------
 2447|      0|            (uint8_t*)f->frame_thread.cf +
 2448|      0|                (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
 2449|  92.7k|            NULL;
 2450|  92.7k|    }
 2451|       |
 2452|  46.3k|    dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
 2453|  46.3k|    ts->last_qidx = f->frame_hdr->quant.yac;
 2454|  46.3k|    ts->last_delta_lf.u32 = 0;
 2455|       |
 2456|  46.3k|    dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
 2457|       |
 2458|  46.3k|    ts->tiling.row = tile_row;
 2459|  46.3k|    ts->tiling.col = tile_col;
 2460|  46.3k|    ts->tiling.col_start = col_sb_start << sb_shift;
 2461|  46.3k|    ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
 2462|  46.3k|    ts->tiling.row_start = row_sb_start << sb_shift;
 2463|  46.3k|    ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
 2464|       |
 2465|       |    // Reference Restoration Unit (used for exp coding)
 2466|  46.3k|    int sb_idx, unit_idx;
 2467|  46.3k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2467:9): [True: 4.50k, False: 41.8k]
  ------------------
 2468|       |        // vertical components only
 2469|  4.50k|        sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
 2470|  4.50k|        unit_idx = (ts->tiling.row_start & 16) >> 3;
 2471|  41.8k|    } else {
 2472|  41.8k|        sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
 2473|  41.8k|        unit_idx = ((ts->tiling.row_start & 16) >> 3) +
 2474|  41.8k|                   ((ts->tiling.col_start & 16) >> 4);
 2475|  41.8k|    }
 2476|   185k|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2476:21): [True: 139k, False: 46.3k]
  ------------------
 2477|   139k|        if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2477:13): [True: 117k, False: 21.0k]
  ------------------
 2478|   117k|            continue;
 2479|       |
 2480|  21.0k|        if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2480:13): [True: 5.90k, False: 15.1k]
  ------------------
 2481|  5.90k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2481:32): [True: 3.44k, False: 2.45k]
  |  Branch (2481:37): [True: 2.42k, False: 1.01k]
  ------------------
 2482|  5.90k|            const int d = f->frame_hdr->super_res.width_scale_denominator;
 2483|  5.90k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2484|  5.90k|            const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
 2485|  5.90k|            const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
 2486|  5.90k|            const int px_x = x << (unit_size_log2 + ss_hor);
 2487|  5.90k|            const int u_idx = unit_idx + ((px_x & 64) >> 6);
 2488|  5.90k|            const int sb128x = px_x >> 7;
 2489|  5.90k|            if (sb128x >= f->sr_sb128w) continue;
  ------------------
  |  Branch (2489:17): [True: 361, False: 5.54k]
  ------------------
 2490|  5.54k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
 2491|  15.1k|        } else {
 2492|  15.1k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2493|  15.1k|        }
 2494|       |
 2495|  20.7k|        ts->lr_ref[p]->filter_v[0] = 3;
 2496|  20.7k|        ts->lr_ref[p]->filter_v[1] = -7;
 2497|  20.7k|        ts->lr_ref[p]->filter_v[2] = 15;
 2498|  20.7k|        ts->lr_ref[p]->filter_h[0] = 3;
 2499|  20.7k|        ts->lr_ref[p]->filter_h[1] = -7;
 2500|  20.7k|        ts->lr_ref[p]->filter_h[2] = 15;
 2501|  20.7k|        ts->lr_ref[p]->sgr_weights[0] = -32;
 2502|  20.7k|        ts->lr_ref[p]->sgr_weights[1] = 31;
 2503|  20.7k|    }
 2504|       |
 2505|  46.3k|    if (f->c->n_tc > 1) {
  ------------------
  |  Branch (2505:9): [True: 0, False: 46.3k]
  ------------------
 2506|      0|        for (int p = 0; p < 2; p++)
  ------------------
  |  Branch (2506:25): [True: 0, False: 0]
  ------------------
 2507|      0|            atomic_init(&ts->progress[p], row_sb_start);
 2508|      0|    }
 2509|  46.3k|}
decode.c:get_upscale_x0:
 3321|  8.23k|static int get_upscale_x0(const int in_w, const int out_w, const int step) {
 3322|  8.23k|    const int err = out_w * step - (in_w << 14);
 3323|  8.23k|    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2);
 3324|  8.23k|    return x0 & 0x3fff;
 3325|  8.23k|}

obu.c:get_poc_diff:
  239|   241k|{
  240|   241k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 0, False: 241k]
  ------------------
  241|   241k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   241k|    const int diff = poc0 - poc1;
  243|   241k|    return (diff & (mask - 1)) - (diff & mask);
  244|   241k|}
refmvs.c:get_gmv_2d:
  482|  1.35M|{
  483|  1.35M|    switch (gmv->type) {
  484|   183k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 183k, False: 1.16M]
  ------------------
  485|   183k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 183k, False: 0]
  ------------------
  486|   183k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 183k, False: 0]
  ------------------
  487|       |        // fall-through
  488|   183k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 1.35M]
  ------------------
  489|   216k|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 32.8k, False: 1.32M]
  ------------------
  490|   216k|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|   216k|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|   216k|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|   216k|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|   216k|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|   216k|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|   216k|        const int shift = 16 - (3 - !hdr->hp);
  497|   216k|        const int round = (1 << shift) >> 1;
  498|   216k|        mv res = (mv) {
  499|   216k|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|   216k|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|   216k|        };
  502|   216k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 57.3k, False: 159k]
  ------------------
  503|  57.3k|            fix_int_mv_precision(&res);
  504|   216k|        return res;
  505|   183k|    }
  506|   167k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 167k, False: 1.18M]
  ------------------
  507|   167k|        mv res = (mv) {
  508|   167k|            .y = gmv->matrix[0] >> 13,
  509|   167k|            .x = gmv->matrix[1] >> 13,
  510|   167k|        };
  511|   167k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 12.7k, False: 154k]
  ------------------
  512|  12.7k|            fix_int_mv_precision(&res);
  513|   167k|        return res;
  514|   183k|    }
  515|   969k|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 969k, False: 383k]
  ------------------
  516|   969k|        return (mv) { .x = 0, .y = 0 };
  517|  1.35M|    }
  518|  1.35M|}
refmvs.c:fix_int_mv_precision:
  462|  88.0k|static inline void fix_int_mv_precision(mv *const mv) {
  463|  88.0k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|  88.0k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|  88.0k|}
refmvs.c:fix_mv_precision:
  469|  1.05M|{
  470|  1.05M|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 17.9k, False: 1.03M]
  ------------------
  471|  17.9k|        fix_int_mv_precision(mv);
  472|  1.03M|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 26.7k, False: 1.01M]
  ------------------
  473|  26.7k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|  26.7k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|  26.7k|    }
  476|  1.05M|}
refmvs.c:get_poc_diff:
  239|   516k|{
  240|   516k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 217k, False: 298k]
  ------------------
  241|   298k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   298k|    const int diff = poc0 - poc1;
  243|   298k|    return (diff & (mask - 1)) - (diff & mask);
  244|   516k|}
decode.c:get_partition_ctx:
   87|  2.81M|{
   88|  2.81M|    return ((a->partition[xb8] >> (4 - bl)) & 1) +
   89|  2.81M|          (((l->partition[yb8] >> (4 - bl)) & 1) << 1);
   90|  2.81M|}
decode.c:get_cur_frame_segid:
  445|   980k|{
  446|   980k|    cur_seg_map += bx + by * stride;
  447|   980k|    if (have_left && have_top) {
  ------------------
  |  Branch (447:9): [True: 872k, False: 107k]
  |  Branch (447:22): [True: 657k, False: 215k]
  ------------------
  448|   657k|        const int l = cur_seg_map[-1];
  449|   657k|        const int a = cur_seg_map[-stride];
  450|   657k|        const int al = cur_seg_map[-(stride + 1)];
  451|       |
  452|   657k|        if (l == a && al == l) *seg_ctx = 2;
  ------------------
  |  Branch (452:13): [True: 387k, False: 269k]
  |  Branch (452:23): [True: 370k, False: 16.8k]
  ------------------
  453|   286k|        else if (l == a || al == l || a == al) *seg_ctx = 1;
  ------------------
  |  Branch (453:18): [True: 16.8k, False: 269k]
  |  Branch (453:28): [True: 116k, False: 152k]
  |  Branch (453:39): [True: 95.6k, False: 57.2k]
  ------------------
  454|  57.2k|        else *seg_ctx = 0;
  455|   657k|        return a == al ? a : l;
  ------------------
  |  Branch (455:16): [True: 466k, False: 190k]
  ------------------
  456|   657k|    } else {
  457|   323k|        *seg_ctx = 0;
  458|   323k|        return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0;
  ------------------
  |  Branch (458:16): [True: 215k, False: 107k]
  |  Branch (458:46): [True: 104k, False: 3.68k]
  ------------------
  459|   323k|    }
  460|   980k|}
decode.c:get_intra_ctx:
   63|   873k|{
   64|   873k|    if (have_left) {
  ------------------
  |  Branch (64:9): [True: 837k, False: 35.5k]
  ------------------
   65|   837k|        if (have_top) {
  ------------------
  |  Branch (65:13): [True: 755k, False: 81.5k]
  ------------------
   66|   755k|            const int ctx = l->intra[yb4] + a->intra[xb4];
   67|   755k|            return ctx + (ctx == 2);
   68|   755k|        } else
   69|  81.5k|            return l->intra[yb4] * 2;
   70|   837k|    } else {
   71|  35.5k|        return have_top ? a->intra[xb4] * 2 : 0;
  ------------------
  |  Branch (71:16): [True: 24.5k, False: 10.9k]
  ------------------
   72|  35.5k|    }
   73|   873k|}
decode.c:get_tx_ctx:
   79|   480k|{
   80|   480k|    return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw);
   81|   480k|}
decode.c:get_comp_ctx:
  160|   366k|{
  161|   366k|    if (have_top) {
  ------------------
  |  Branch (161:9): [True: 320k, False: 46.2k]
  ------------------
  162|   320k|        if (have_left) {
  ------------------
  |  Branch (162:13): [True: 307k, False: 12.8k]
  ------------------
  163|   307k|            if (a->comp_type[xb4]) {
  ------------------
  |  Branch (163:17): [True: 135k, False: 171k]
  ------------------
  164|   135k|                if (l->comp_type[yb4]) {
  ------------------
  |  Branch (164:21): [True: 93.3k, False: 42.2k]
  ------------------
  165|  93.3k|                    return 4;
  166|  93.3k|                } else {
  167|       |                    // 4U means intra (-1) or bwd (>= 4)
  168|  42.2k|                    return 2 + ((unsigned)l->ref[0][yb4] >= 4U);
  169|  42.2k|                }
  170|   171k|            } else if (l->comp_type[yb4]) {
  ------------------
  |  Branch (170:24): [True: 45.2k, False: 126k]
  ------------------
  171|       |                // 4U means intra (-1) or bwd (>= 4)
  172|  45.2k|                return 2 + ((unsigned)a->ref[0][xb4] >= 4U);
  173|   126k|            } else {
  174|   126k|                return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4);
  175|   126k|            }
  176|   307k|        } else {
  177|  12.8k|            return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4;
  ------------------
  |  Branch (177:20): [True: 5.74k, False: 7.10k]
  ------------------
  178|  12.8k|        }
  179|   320k|    } else if (have_left) {
  ------------------
  |  Branch (179:16): [True: 41.8k, False: 4.34k]
  ------------------
  180|  41.8k|        return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4;
  ------------------
  |  Branch (180:16): [True: 20.9k, False: 20.9k]
  ------------------
  181|  41.8k|    } else {
  182|  4.34k|        return 1;
  183|  4.34k|    }
  184|   366k|}
decode.c:fix_mv_precision:
  469|   668k|{
  470|   668k|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 140k, False: 527k]
  ------------------
  471|   140k|        fix_int_mv_precision(mv);
  472|   527k|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 121k, False: 406k]
  ------------------
  473|   121k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|   121k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|   121k|    }
  476|   668k|}
decode.c:fix_int_mv_precision:
  462|   167k|static inline void fix_int_mv_precision(mv *const mv) {
  463|   167k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|   167k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|   167k|}
decode.c:get_comp_dir_ctx:
  190|   189k|{
  191|   189k|#define has_uni_comp(edge, off) \
  192|   189k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  193|       |
  194|   189k|    if (have_top && have_left) {
  ------------------
  |  Branch (194:9): [True: 164k, False: 24.4k]
  |  Branch (194:21): [True: 158k, False: 6.01k]
  ------------------
  195|   158k|        const int a_intra = a->intra[xb4], l_intra = l->intra[yb4];
  196|       |
  197|   158k|        if (a_intra && l_intra) return 2;
  ------------------
  |  Branch (197:13): [True: 5.75k, False: 153k]
  |  Branch (197:24): [True: 1.12k, False: 4.62k]
  ------------------
  198|   157k|        if (a_intra || l_intra) {
  ------------------
  |  Branch (198:13): [True: 4.62k, False: 153k]
  |  Branch (198:24): [True: 3.88k, False: 149k]
  ------------------
  199|  8.51k|            const BlockContext *const edge = a_intra ? l : a;
  ------------------
  |  Branch (199:46): [True: 4.62k, False: 3.88k]
  ------------------
  200|  8.51k|            const int off = a_intra ? yb4 : xb4;
  ------------------
  |  Branch (200:29): [True: 4.62k, False: 3.88k]
  ------------------
  201|       |
  202|  8.51k|            if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (202:17): [True: 2.37k, False: 6.13k]
  ------------------
  203|  6.13k|            return 1 + 2 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  6.13k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  204|  8.51k|        }
  205|       |
  206|   149k|        const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE;
  207|   149k|        const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE;
  208|   149k|        const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4];
  209|       |
  210|   149k|        if (!a_comp && !l_comp) {
  ------------------
  |  Branch (210:13): [True: 40.5k, False: 108k]
  |  Branch (210:24): [True: 14.0k, False: 26.5k]
  ------------------
  211|  14.0k|            return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4));
  212|   135k|        } else if (!a_comp || !l_comp) {
  ------------------
  |  Branch (212:20): [True: 26.5k, False: 108k]
  |  Branch (212:31): [True: 22.8k, False: 85.7k]
  ------------------
  213|  49.3k|            const BlockContext *const edge = a_comp ? a : l;
  ------------------
  |  Branch (213:46): [True: 22.8k, False: 26.5k]
  ------------------
  214|  49.3k|            const int off = a_comp ? xb4 : yb4;
  ------------------
  |  Branch (214:29): [True: 22.8k, False: 26.5k]
  ------------------
  215|       |
  216|  49.3k|            if (!has_uni_comp(edge, off)) return 1;
  ------------------
  |  |  192|  49.3k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  |  Branch (216:17): [True: 41.5k, False: 7.79k]
  ------------------
  217|  7.79k|            return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4));
  218|  85.7k|        } else {
  219|  85.7k|            const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|  85.7k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
                          const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|  85.7k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  220|       |
  221|  85.7k|            if (!a_uni && !l_uni) return 0;
  ------------------
  |  Branch (221:17): [True: 75.2k, False: 10.4k]
  |  Branch (221:27): [True: 69.8k, False: 5.33k]
  ------------------
  222|  15.8k|            if (!a_uni || !l_uni) return 2;
  ------------------
  |  Branch (222:17): [True: 5.33k, False: 10.4k]
  |  Branch (222:27): [True: 5.74k, False: 4.73k]
  ------------------
  223|  4.73k|            return 3 + ((a_ref0 == 4) == (l_ref0 == 4));
  224|  15.8k|        }
  225|   149k|    } else if (have_top || have_left) {
  ------------------
  |  Branch (225:16): [True: 6.01k, False: 24.4k]
  |  Branch (225:28): [True: 23.3k, False: 1.14k]
  ------------------
  226|  29.3k|        const BlockContext *const edge = have_left ? l : a;
  ------------------
  |  Branch (226:42): [True: 23.3k, False: 6.01k]
  ------------------
  227|  29.3k|        const int off = have_left ? yb4 : xb4;
  ------------------
  |  Branch (227:25): [True: 23.3k, False: 6.01k]
  ------------------
  228|       |
  229|  29.3k|        if (edge->intra[off]) return 2;
  ------------------
  |  Branch (229:13): [True: 770, False: 28.5k]
  ------------------
  230|  28.5k|        if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (230:13): [True: 6.72k, False: 21.8k]
  ------------------
  231|  21.8k|        return 4 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  21.8k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  232|  28.5k|    } else {
  233|  1.14k|        return 2;
  234|  1.14k|    }
  235|   189k|}
decode.c:av1_get_fwd_ref_ctx:
  307|   519k|{
  308|   519k|    int cnt[4] = { 0 };
  309|       |
  310|   519k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (310:9): [True: 469k, False: 49.5k]
  |  Branch (310:21): [True: 446k, False: 23.3k]
  ------------------
  311|   446k|        if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (311:13): [True: 408k, False: 37.7k]
  ------------------
  312|   446k|        if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (312:13): [True: 136k, False: 309k]
  |  Branch (312:34): [True: 14.1k, False: 122k]
  ------------------
  313|   446k|    }
  314|       |
  315|   519k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (315:9): [True: 503k, False: 15.2k]
  |  Branch (315:22): [True: 480k, False: 23.7k]
  ------------------
  316|   480k|        if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (316:13): [True: 443k, False: 36.9k]
  ------------------
  317|   480k|        if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (317:13): [True: 157k, False: 322k]
  |  Branch (317:34): [True: 15.2k, False: 142k]
  ------------------
  318|   480k|    }
  319|       |
  320|   519k|    cnt[0] += cnt[1];
  321|   519k|    cnt[2] += cnt[3];
  322|       |
  323|   519k|    return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (323:12): [True: 77.5k, False: 441k]
  |  Branch (323:35): [True: 93.7k, False: 347k]
  ------------------
  324|   519k|}
decode.c:av1_get_fwd_ref_2_ctx:
  350|   139k|{
  351|   139k|    int cnt[2] = { 0 };
  352|       |
  353|   139k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (353:9): [True: 117k, False: 22.1k]
  |  Branch (353:21): [True: 111k, False: 6.29k]
  ------------------
  354|   111k|        if ((a->ref[0][xb4] ^ 2U) < 2) cnt[a->ref[0][xb4] - 2]++;
  ------------------
  |  Branch (354:13): [True: 66.4k, False: 44.6k]
  ------------------
  355|   111k|        if (a->comp_type[xb4] && (a->ref[1][xb4] ^ 2U) < 2) cnt[a->ref[1][xb4] - 2]++;
  ------------------
  |  Branch (355:13): [True: 44.6k, False: 66.4k]
  |  Branch (355:34): [True: 6.10k, False: 38.5k]
  ------------------
  356|   111k|    }
  357|       |
  358|   139k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (358:9): [True: 134k, False: 5.43k]
  |  Branch (358:22): [True: 127k, False: 6.08k]
  ------------------
  359|   127k|        if ((l->ref[0][yb4] ^ 2U) < 2) cnt[l->ref[0][yb4] - 2]++;
  ------------------
  |  Branch (359:13): [True: 84.1k, False: 43.8k]
  ------------------
  360|   127k|        if (l->comp_type[yb4] && (l->ref[1][yb4] ^ 2U) < 2) cnt[l->ref[1][yb4] - 2]++;
  ------------------
  |  Branch (360:13): [True: 59.4k, False: 68.4k]
  |  Branch (360:34): [True: 7.30k, False: 52.1k]
  ------------------
  361|   127k|    }
  362|       |
  363|   139k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (363:12): [True: 32.8k, False: 106k]
  |  Branch (363:35): [True: 79.8k, False: 26.7k]
  ------------------
  364|   139k|}
decode.c:av1_get_fwd_ref_1_ctx:
  330|   392k|{
  331|   392k|    int cnt[2] = { 0 };
  332|       |
  333|   392k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (333:9): [True: 364k, False: 28.4k]
  |  Branch (333:21): [True: 346k, False: 17.5k]
  ------------------
  334|   346k|        if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (334:13): [True: 301k, False: 45.4k]
  ------------------
  335|   346k|        if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (335:13): [True: 99.3k, False: 247k]
  |  Branch (335:34): [True: 5.52k, False: 93.8k]
  ------------------
  336|   346k|    }
  337|       |
  338|   392k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (338:9): [True: 381k, False: 10.8k]
  |  Branch (338:22): [True: 363k, False: 18.0k]
  ------------------
  339|   363k|        if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (339:13): [True: 316k, False: 46.7k]
  ------------------
  340|   363k|        if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (340:13): [True: 106k, False: 257k]
  |  Branch (340:34): [True: 5.43k, False: 100k]
  ------------------
  341|   363k|    }
  342|       |
  343|   392k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (343:12): [True: 46.7k, False: 345k]
  |  Branch (343:35): [True: 25.3k, False: 320k]
  ------------------
  344|   392k|}
decode.c:av1_get_bwd_ref_ctx:
  370|   374k|{
  371|   374k|    int cnt[3] = { 0 };
  372|       |
  373|   374k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (373:9): [True: 322k, False: 51.7k]
  |  Branch (373:21): [True: 308k, False: 13.7k]
  ------------------
  374|   308k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (374:13): [True: 147k, False: 161k]
  ------------------
  375|   308k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (375:13): [True: 120k, False: 188k]
  |  Branch (375:34): [True: 113k, False: 6.71k]
  ------------------
  376|   308k|    }
  377|       |
  378|   374k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (378:9): [True: 359k, False: 14.7k]
  |  Branch (378:22): [True: 344k, False: 15.0k]
  ------------------
  379|   344k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (379:13): [True: 161k, False: 182k]
  ------------------
  380|   344k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (380:13): [True: 141k, False: 203k]
  |  Branch (380:34): [True: 134k, False: 6.54k]
  ------------------
  381|   344k|    }
  382|       |
  383|   374k|    cnt[1] += cnt[0];
  384|       |
  385|   374k|    return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (385:12): [True: 58.1k, False: 315k]
  |  Branch (385:35): [True: 189k, False: 126k]
  ------------------
  386|   374k|}
decode.c:av1_get_bwd_ref_1_ctx:
  392|   155k|{
  393|   155k|    int cnt[3] = { 0 };
  394|       |
  395|   155k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (395:9): [True: 141k, False: 13.7k]
  |  Branch (395:21): [True: 134k, False: 6.59k]
  ------------------
  396|   134k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (396:13): [True: 48.0k, False: 86.8k]
  ------------------
  397|   134k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (397:13): [True: 66.4k, False: 68.5k]
  |  Branch (397:34): [True: 62.3k, False: 4.07k]
  ------------------
  398|   134k|    }
  399|       |
  400|   155k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (400:9): [True: 150k, False: 4.89k]
  |  Branch (400:22): [True: 143k, False: 7.30k]
  ------------------
  401|   143k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (401:13): [True: 48.2k, False: 94.7k]
  ------------------
  402|   143k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (402:13): [True: 74.0k, False: 69.0k]
  |  Branch (402:34): [True: 70.3k, False: 3.69k]
  ------------------
  403|   143k|    }
  404|       |
  405|   155k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (405:12): [True: 29.7k, False: 125k]
  |  Branch (405:35): [True: 39.4k, False: 86.0k]
  ------------------
  406|   155k|}
decode.c:av1_get_ref_ctx:
  287|   592k|{
  288|   592k|    int cnt[2] = { 0 };
  289|       |
  290|   592k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (290:9): [True: 532k, False: 60.5k]
  |  Branch (290:21): [True: 503k, False: 28.9k]
  ------------------
  291|   503k|        cnt[a->ref[0][xb4] >= 4]++;
  292|   503k|        if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++;
  ------------------
  |  Branch (292:13): [True: 69.3k, False: 433k]
  ------------------
  293|   503k|    }
  294|       |
  295|   592k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (295:9): [True: 571k, False: 21.6k]
  |  Branch (295:22): [True: 540k, False: 30.7k]
  ------------------
  296|   540k|        cnt[l->ref[0][yb4] >= 4]++;
  297|   540k|        if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++;
  ------------------
  |  Branch (297:13): [True: 77.2k, False: 463k]
  ------------------
  298|   540k|    }
  299|       |
  300|   592k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (300:12): [True: 82.9k, False: 509k]
  |  Branch (300:35): [True: 177k, False: 332k]
  ------------------
  301|   592k|}
decode.c:av1_get_uni_p1_ctx:
  412|  20.3k|{
  413|  20.3k|    int cnt[3] = { 0 };
  414|       |
  415|  20.3k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (415:9): [True: 18.5k, False: 1.79k]
  |  Branch (415:21): [True: 17.6k, False: 972]
  ------------------
  416|  17.6k|        if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++;
  ------------------
  |  Branch (416:13): [True: 3.75k, False: 13.8k]
  ------------------
  417|  17.6k|        if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++;
  ------------------
  |  Branch (417:13): [True: 11.5k, False: 6.04k]
  |  Branch (417:34): [True: 6.63k, False: 4.95k]
  ------------------
  418|  17.6k|    }
  419|       |
  420|  20.3k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (420:9): [True: 19.1k, False: 1.20k]
  |  Branch (420:22): [True: 18.2k, False: 962]
  ------------------
  421|  18.2k|        if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++;
  ------------------
  |  Branch (421:13): [True: 4.02k, False: 14.1k]
  ------------------
  422|  18.2k|        if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++;
  ------------------
  |  Branch (422:13): [True: 12.6k, False: 5.57k]
  |  Branch (422:34): [True: 7.24k, False: 5.40k]
  ------------------
  423|  18.2k|    }
  424|       |
  425|  20.3k|    cnt[1] += cnt[2];
  426|       |
  427|  20.3k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (427:12): [True: 6.41k, False: 13.9k]
  |  Branch (427:35): [True: 9.29k, False: 4.67k]
  ------------------
  428|  20.3k|}
decode.c:get_drl_context:
  432|   310k|{
  433|   310k|    if (ref_mv_stack[ref_idx].weight >= 640)
  ------------------
  |  Branch (433:9): [True: 245k, False: 64.9k]
  ------------------
  434|   245k|        return ref_mv_stack[ref_idx + 1].weight < 640;
  435|       |
  436|  64.9k|    return ref_mv_stack[ref_idx + 1].weight < 640 ? 2 : 0;
  ------------------
  |  Branch (436:12): [True: 64.9k, False: 0]
  ------------------
  437|   310k|}
decode.c:get_gmv_2d:
  482|   414k|{
  483|   414k|    switch (gmv->type) {
  484|  47.2k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 47.2k, False: 367k]
  ------------------
  485|  47.2k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 47.2k, False: 0]
  ------------------
  486|  47.2k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 47.2k, False: 0]
  ------------------
  487|       |        // fall-through
  488|  47.2k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 414k]
  ------------------
  489|  61.5k|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 14.2k, False: 400k]
  ------------------
  490|  61.5k|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|  61.5k|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|  61.5k|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|  61.5k|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|  61.5k|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|  61.5k|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|  61.5k|        const int shift = 16 - (3 - !hdr->hp);
  497|  61.5k|        const int round = (1 << shift) >> 1;
  498|  61.5k|        mv res = (mv) {
  499|  61.5k|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|  61.5k|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|  61.5k|        };
  502|  61.5k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 21.8k, False: 39.7k]
  ------------------
  503|  21.8k|            fix_int_mv_precision(&res);
  504|  61.5k|        return res;
  505|  47.2k|    }
  506|   144k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 144k, False: 269k]
  ------------------
  507|   144k|        mv res = (mv) {
  508|   144k|            .y = gmv->matrix[0] >> 13,
  509|   144k|            .x = gmv->matrix[1] >> 13,
  510|   144k|        };
  511|   144k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 5.07k, False: 139k]
  ------------------
  512|  5.07k|            fix_int_mv_precision(&res);
  513|   144k|        return res;
  514|  47.2k|    }
  515|   208k|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 208k, False: 206k]
  ------------------
  516|   208k|        return (mv) { .x = 0, .y = 0 };
  517|   414k|    }
  518|   414k|}
decode.c:get_mask_comp_ctx:
  266|   163k|{
  267|   163k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (267:23): [True: 28.2k, False: 135k]
  ------------------
  268|   163k|                      a->ref[0][xb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (268:23): [True: 6.86k, False: 128k]
  ------------------
  269|   163k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (269:23): [True: 38.7k, False: 125k]
  ------------------
  270|   163k|                      l->ref[0][yb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (270:23): [True: 6.50k, False: 118k]
  ------------------
  271|       |
  272|   163k|    return imin(a_ctx + l_ctx, 5);
  273|   163k|}
decode.c:get_jnt_comp_ctx:
  251|   100k|{
  252|   100k|    const int d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
  253|   100k|    const int d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc));
  254|   100k|    const int offset = d0 == d1;
  255|   100k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (255:23): [True: 48.7k, False: 51.8k]
  ------------------
  256|  51.8k|                      a->ref[0][xb4] == 6;
  ------------------
  |  Branch (256:23): [True: 3.65k, False: 48.2k]
  ------------------
  257|   100k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (257:23): [True: 49.9k, False: 50.7k]
  ------------------
  258|  50.7k|                      l->ref[0][yb4] == 6;
  ------------------
  |  Branch (258:23): [True: 3.99k, False: 46.7k]
  ------------------
  259|       |
  260|   100k|    return 3 * offset + a_ctx + l_ctx;
  261|   100k|}
decode.c:get_filter_ctx:
  139|   619k|{
  140|   619k|    const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
  ------------------
  |  Branch (140:27): [True: 413k, False: 206k]
  |  Branch (140:52): [True: 19.8k, False: 186k]
  ------------------
  141|   433k|                         a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS;
  142|   619k|    const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
  ------------------
  |  Branch (142:27): [True: 449k, False: 170k]
  |  Branch (142:52): [True: 20.3k, False: 150k]
  ------------------
  143|   469k|                         l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS;
  144|       |
  145|   619k|    if (a_filter == l_filter) {
  ------------------
  |  Branch (145:9): [True: 391k, False: 228k]
  ------------------
  146|   391k|        return comp * 4 + a_filter;
  147|   391k|    } else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (147:16): [True: 117k, False: 110k]
  ------------------
  148|   117k|        return comp * 4 + l_filter;
  149|   117k|    } else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (149:16): [True: 80.8k, False: 30.1k]
  ------------------
  150|  80.8k|        return comp * 4 + a_filter;
  151|  80.8k|    } else {
  152|  30.1k|        return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS;
  153|  30.1k|    }
  154|   619k|}
decode.c:gather_top_partition_prob:
  106|   491k|{
  107|       |    // Exploit the fact that cdfs for PARTITION_V, PARTITION_SPLIT and
  108|       |    // PARTITION_T_TOP_SPLIT are neighbors.
  109|   491k|    unsigned out = in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT];
  110|       |    // Exploit the facts that cdfs for PARTITION_T_LEFT_SPLIT and
  111|       |    // PARTITION_T_RIGHT_SPLIT are neighbors, the probability for
  112|       |    // PARTITION_V4 is always zero, and the probability for
  113|       |    // PARTITION_T_RIGHT_SPLIT is zero in 128x128 blocks.
  114|   491k|    out += in[PARTITION_T_LEFT_SPLIT - 1];
  115|   491k|    if (bl != BL_128X128)
  ------------------
  |  Branch (115:9): [True: 454k, False: 37.3k]
  ------------------
  116|   454k|        out += in[PARTITION_V4 - 1] - in[PARTITION_T_RIGHT_SPLIT];
  117|   491k|    return out;
  118|   491k|}
decode.c:gather_left_partition_prob:
   94|  54.6k|{
   95|  54.6k|    unsigned out = in[PARTITION_H - 1] - in[PARTITION_H];
   96|       |    // Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT,
   97|       |    // PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors.
   98|  54.6k|    out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT];
   99|  54.6k|    if (bl != BL_128X128)
  ------------------
  |  Branch (99:9): [True: 42.8k, False: 11.8k]
  ------------------
  100|  42.8k|        out += in[PARTITION_H4 - 1] - in[PARTITION_H4];
  101|  54.6k|    return out;
  102|  54.6k|}
decode.c:get_poc_diff:
  239|   608k|{
  240|   608k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 42.3k, False: 566k]
  ------------------
  241|   566k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   566k|    const int diff = poc0 - poc1;
  243|   566k|    return (diff & (mask - 1)) - (diff & mask);
  244|   608k|}
recon_tmpl.c:get_uv_inter_txtp:
  122|   226k|{
  123|   226k|    if (uvt_dim->max == TX_32X32)
  ------------------
  |  Branch (123:9): [True: 60.6k, False: 166k]
  ------------------
  124|  60.6k|        return ytxtp == IDTX ? IDTX : DCT_DCT;
  ------------------
  |  Branch (124:16): [True: 2.18k, False: 58.4k]
  ------------------
  125|   166k|    if (uvt_dim->min == TX_16X16 &&
  ------------------
  |  Branch (125:9): [True: 20.1k, False: 145k]
  ------------------
  126|  20.1k|        ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) |
  ------------------
  |  Branch (126:9): [True: 512, False: 19.6k]
  ------------------
  127|  20.1k|                         (1 << H_ADST) | (1 << V_ADST))))
  128|    512|    {
  129|    512|        return DCT_DCT;
  130|    512|    }
  131|       |
  132|   165k|    return ytxtp;
  133|   166k|}

dav1d_prep_grain_8bpc:
  105|  1.56k|{
  106|  1.56k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|       |#if BITDEPTH != 8
  108|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|       |#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  1.56k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  113|  1.56k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 820, False: 740]
  |  Branch (113:35): [True: 275, False: 465]
  ------------------
  114|  1.09k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|  1.09k|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  116|  1.56k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 628, False: 932]
  |  Branch (116:35): [True: 275, False: 657]
  ------------------
  117|    903|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|    903|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  1.56k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 865, False: 695]
  |  Branch (121:31): [True: 226, False: 469]
  ------------------
  122|  1.09k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  1.56k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 820, False: 740]
  ------------------
  124|    820|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  1.56k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 628, False: 932]
  ------------------
  126|    628|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  1.56k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 1.56k, False: 0]
  ------------------
  130|  1.56k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 695, False: 865]
  ------------------
  131|    695|        const ptrdiff_t stride = out->stride[0];
  132|    695|        const ptrdiff_t sz = out->p.h * stride;
  133|    695|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 695]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|    695|        else
  137|    695|            memcpy(out->data[0], in->data[0], sz);
  138|    695|    }
  139|       |
  140|  1.56k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 1.27k, False: 281]
  |  Branch (140:52): [True: 1.00k, False: 275]
  ------------------
  141|  1.00k|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 1.00k, False: 0]
  ------------------
  142|  1.00k|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|  1.00k|        const ptrdiff_t stride = out->stride[1];
  144|  1.00k|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|  1.00k|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 1.00k]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|  1.00k|        } else {
  153|  1.00k|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 184, False: 820]
  ------------------
  154|    184|                memcpy(out->data[1], in->data[1], sz);
  155|  1.00k|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 376, False: 628]
  ------------------
  156|    376|                memcpy(out->data[2], in->data[2], sz);
  157|  1.00k|        }
  158|  1.00k|    }
  159|  1.56k|}
dav1d_apply_grain_row_8bpc:
  167|  6.14k|{
  168|       |    // Synthesize grain for the affected planes
  169|  6.14k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  6.14k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  6.14k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  6.14k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  6.14k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  6.14k|    pixel *const luma_src =
  175|  6.14k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  6.14k|#define FG_BLOCK_SIZE 32
  ------------------
                      ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   53|  6.14k|#define PXSTRIDE(x) (x)
  ------------------
  176|       |#if BITDEPTH != 8
  177|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|       |#endif
  179|       |
  180|  6.14k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 1.94k, False: 4.20k]
  ------------------
  181|  1.94k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  1.94k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  1.94k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  1.94k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  1.94k|#define FG_BLOCK_SIZE 32
  ------------------
                      dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   53|  1.94k|#define PXSTRIDE(x) (x)
  ------------------
  183|  1.94k|                         luma_src, out->stride[0], data,
  184|  1.94k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  185|  1.94k|    }
  186|       |
  187|  6.14k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 1.95k, False: 4.19k]
  |  Branch (187:36): [True: 1.26k, False: 690]
  ------------------
  188|  1.26k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 631, False: 631]
  ------------------
  189|    631|    {
  190|    631|        return;
  191|    631|    }
  192|       |
  193|  5.51k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  5.51k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  5.51k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  5.51k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 2.90k, False: 2.61k]
  ------------------
  197|  2.90k|        pixel *ptr = luma_src;
  198|  91.7k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 88.8k, False: 2.90k]
  ------------------
  199|  88.8k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|  88.8k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  ------------------
  |  |   53|  88.8k|#define PXSTRIDE(x) (x)
  ------------------
  201|  88.8k|        }
  202|  2.90k|    }
  203|       |
  204|  5.51k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  5.51k|#define FG_BLOCK_SIZE 32
  ------------------
                  const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   53|  5.51k|#define PXSTRIDE(x) (x)
  ------------------
  205|  5.51k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 631, False: 4.88k]
  ------------------
  206|  1.89k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 1.26k, False: 631]
  ------------------
  207|  1.26k|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|  1.26k|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|  1.26k|                                                in->stride[1], data, cpw,
  210|  1.26k|                                                scaling[0], grain_lut[1 + pl],
  211|  1.26k|                                                bh, row, luma_src, in->stride[0],
  212|  1.26k|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  213|  4.88k|    } else {
  214|  14.6k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 9.77k, False: 4.88k]
  ------------------
  215|  9.77k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 5.86k, False: 3.91k]
  ------------------
  216|  5.86k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  5.86k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  5.86k|                                                    in->stride[1], data, cpw,
  219|  5.86k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  5.86k|                                                    bh, row, luma_src, in->stride[0],
  221|  5.86k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  222|  4.88k|    }
  223|  5.51k|}
dav1d_apply_grain_8bpc:
  228|  1.56k|{
  229|  1.56k|    ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]);
  ------------------
  |  |  100|  1.56k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  1.56k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  230|  1.56k|#if ARCH_X86_64 && BITDEPTH == 8
  231|  1.56k|    ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]);
  ------------------
  |  |   96|  1.56k|    ALIGN(type var[sz1d]sznd, ALIGN_64_VAL)
  |  |  ------------------
  |  |  |  |   86|  1.56k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  232|       |#else
  233|       |    uint8_t scaling[3][SCALING_SIZE];
  234|       |#endif
  235|  1.56k|    const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  1.56k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  1.56k|#define FG_BLOCK_SIZE 32
  ------------------
  236|       |
  237|  1.56k|    bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
  ------------------
  |  |   51|  1.56k|#define bitfn(x) x##_8bpc
  ------------------
  238|  7.70k|    for (int row = 0; row < rows; row++)
  ------------------
  |  Branch (238:23): [True: 6.14k, False: 1.56k]
  ------------------
  239|  6.14k|        bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row);
  ------------------
  |  |   51|  6.14k|#define bitfn(x) x##_8bpc
  ------------------
  240|  1.56k|}
fg_apply_tmpl.c:generate_scaling:
   44|  2.53k|{
   45|  2.53k|#if BITDEPTH == 8
   46|  2.53k|    const int shift_x = 0;
   47|  2.53k|    const int scaling_size = SCALING_SIZE;
  ------------------
  |  |   39|  2.53k|#define SCALING_SIZE 256
  ------------------
   48|       |#else
   49|       |    assert(bitdepth > 8);
   50|       |    const int shift_x = bitdepth - 8;
   51|       |    const int scaling_size = 1 << bitdepth;
   52|       |#endif
   53|       |
   54|  2.53k|    if (num == 0) {
  ------------------
  |  Branch (54:9): [True: 226, False: 2.31k]
  ------------------
   55|    226|        memset(scaling, 0, scaling_size);
   56|    226|        return;
   57|    226|    }
   58|       |
   59|       |    // Fill up the preceding entries with the initial value
   60|  2.31k|    memset(scaling, points[0][1], points[0][0] << shift_x);
   61|       |
   62|       |    // Linearly interpolate the values in the middle
   63|  4.87k|    for (int i = 0; i < num - 1; i++) {
  ------------------
  |  Branch (63:21): [True: 2.55k, False: 2.31k]
  ------------------
   64|  2.55k|        const int bx = points[i][0];
   65|  2.55k|        const int by = points[i][1];
   66|  2.55k|        const int ex = points[i+1][0];
   67|  2.55k|        const int ey = points[i+1][1];
   68|  2.55k|        const int dx = ex - bx;
   69|  2.55k|        const int dy = ey - by;
   70|  2.55k|        assert(dx > 0);
  ------------------
  |  Branch (70:9): [True: 2.55k, False: 0]
  ------------------
   71|  2.55k|        const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
   72|   181k|        for (int x = 0, d = 0x8000; x < dx; x++) {
  ------------------
  |  Branch (72:37): [True: 178k, False: 2.55k]
  ------------------
   73|   178k|            scaling[(bx + x) << shift_x] = by + (d >> 16);
   74|   178k|            d += delta;
   75|   178k|        }
   76|  2.55k|    }
   77|       |
   78|       |    // Fill up the remaining entries with the final value
   79|  2.31k|    const int n = points[num - 1][0] << shift_x;
   80|  2.31k|    memset(&scaling[n], points[num - 1][1], scaling_size - n);
   81|       |
   82|       |#if BITDEPTH != 8
   83|       |    const int pad = 1 << shift_x, rnd = pad >> 1;
   84|       |    for (int i = 0; i < num - 1; i++) {
   85|       |        const int bx = points[i][0] << shift_x;
   86|       |        const int ex = points[i+1][0] << shift_x;
   87|       |        const int dx = ex - bx;
   88|       |        for (int x = 0; x < dx; x += pad) {
   89|       |            const int range = scaling[bx + x + pad] - scaling[bx + x];
   90|       |            for (int n = 1, r = rnd; n < pad; n++) {
   91|       |                r += range;
   92|       |                scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x);
   93|       |            }
   94|       |        }
   95|       |    }
   96|       |#endif
   97|  2.31k|}
dav1d_prep_grain_16bpc:
  105|  3.05k|{
  106|  3.05k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|  3.05k|#if BITDEPTH != 8
  108|  3.05k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|  3.05k|#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  3.05k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  ------------------
  |  |   74|  3.05k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  113|  3.05k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 455, False: 2.60k]
  |  Branch (113:35): [True: 261, False: 2.34k]
  ------------------
  114|    716|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|    716|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|    716|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  116|  3.05k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 620, False: 2.43k]
  |  Branch (116:35): [True: 261, False: 2.17k]
  ------------------
  117|    881|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|    881|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|    881|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  3.05k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 2.41k, False: 638]
  |  Branch (121:31): [True: 243, False: 395]
  ------------------
  122|  2.66k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  3.05k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 455, False: 2.60k]
  ------------------
  124|    455|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  3.05k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 620, False: 2.43k]
  ------------------
  126|    620|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  3.05k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 3.05k, False: 0]
  ------------------
  130|  3.05k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 638, False: 2.41k]
  ------------------
  131|    638|        const ptrdiff_t stride = out->stride[0];
  132|    638|        const ptrdiff_t sz = out->p.h * stride;
  133|    638|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 638]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|    638|        else
  137|    638|            memcpy(out->data[0], in->data[0], sz);
  138|    638|    }
  139|       |
  140|  3.05k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 1.10k, False: 1.95k]
  |  Branch (140:52): [True: 839, False: 261]
  ------------------
  141|    839|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 839, False: 0]
  ------------------
  142|    839|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|    839|        const ptrdiff_t stride = out->stride[1];
  144|    839|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|    839|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 839]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|    839|        } else {
  153|    839|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 384, False: 455]
  ------------------
  154|    384|                memcpy(out->data[1], in->data[1], sz);
  155|    839|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 219, False: 620]
  ------------------
  156|    219|                memcpy(out->data[2], in->data[2], sz);
  157|    839|        }
  158|    839|    }
  159|  3.05k|}
dav1d_apply_grain_row_16bpc:
  167|  6.98k|{
  168|       |    // Synthesize grain for the affected planes
  169|  6.98k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  6.98k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  6.98k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  6.98k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  6.98k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  6.98k|    pixel *const luma_src =
  175|  6.98k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  6.98k|#define FG_BLOCK_SIZE 32
  ------------------
  176|  6.98k|#if BITDEPTH != 8
  177|  6.98k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|  6.98k|#endif
  179|       |
  180|  6.98k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 4.59k, False: 2.38k]
  ------------------
  181|  4.59k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.59k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.59k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  4.59k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  4.59k|#define FG_BLOCK_SIZE 32
  ------------------
  183|  4.59k|                         luma_src, out->stride[0], data,
  184|  4.59k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  4.59k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  185|  4.59k|    }
  186|       |
  187|  6.98k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 5.44k, False: 1.54k]
  |  Branch (187:36): [True: 4.65k, False: 783]
  ------------------
  188|  4.65k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 4.15k, False: 498]
  ------------------
  189|  4.15k|    {
  190|  4.15k|        return;
  191|  4.15k|    }
  192|       |
  193|  2.82k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  2.82k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  2.82k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  2.82k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 866, False: 1.95k]
  ------------------
  197|    866|        pixel *ptr = luma_src;
  198|  21.7k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 20.8k, False: 866]
  ------------------
  199|  20.8k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|  20.8k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  201|  20.8k|        }
  202|    866|    }
  203|       |
  204|  2.82k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  2.82k|#define FG_BLOCK_SIZE 32
  ------------------
  205|  2.82k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 498, False: 2.32k]
  ------------------
  206|  1.49k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 996, False: 498]
  ------------------
  207|    996|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|    996|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|    996|                                                in->stride[1], data, cpw,
  210|    996|                                                scaling[0], grain_lut[1 + pl],
  211|    996|                                                bh, row, luma_src, in->stride[0],
  212|    996|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|    996|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  213|  2.32k|    } else {
  214|  6.98k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 4.65k, False: 2.32k]
  ------------------
  215|  4.65k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 2.77k, False: 1.87k]
  ------------------
  216|  2.77k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  2.77k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  2.77k|                                                    in->stride[1], data, cpw,
  219|  2.77k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  2.77k|                                                    bh, row, luma_src, in->stride[0],
  221|  2.77k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  2.77k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  222|  2.32k|    }
  223|  2.82k|}
dav1d_apply_grain_16bpc:
  228|  3.05k|{
  229|  3.05k|    ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]);
  ------------------
  |  |  100|  3.05k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.05k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  230|       |#if ARCH_X86_64 && BITDEPTH == 8
  231|       |    ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]);
  232|       |#else
  233|  3.05k|    uint8_t scaling[3][SCALING_SIZE];
  234|  3.05k|#endif
  235|  3.05k|    const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  3.05k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  3.05k|#define FG_BLOCK_SIZE 32
  ------------------
  236|       |
  237|  3.05k|    bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
  ------------------
  |  |   77|  3.05k|#define bitfn(x) x##_16bpc
  ------------------
  238|  10.0k|    for (int row = 0; row < rows; row++)
  ------------------
  |  Branch (238:23): [True: 6.98k, False: 3.05k]
  ------------------
  239|  6.98k|        bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row);
  ------------------
  |  |   77|  6.98k|#define bitfn(x) x##_16bpc
  ------------------
  240|  3.05k|}

dav1d_film_grain_dsp_init_8bpc:
  423|  3.41k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  3.41k|    c->generate_grain_y = generate_grain_y_c;
  425|  3.41k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  3.41k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  3.41k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  3.41k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  3.41k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  3.41k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  3.41k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  3.41k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  3.41k|#endif
  440|  3.41k|#endif
  441|  3.41k|}
dav1d_film_grain_dsp_init_16bpc:
  423|  4.61k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  4.61k|    c->generate_grain_y = generate_grain_y_c;
  425|  4.61k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  4.61k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  4.61k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  4.61k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  4.61k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  4.61k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  4.61k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  4.61k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  4.61k|#endif
  440|  4.61k|#endif
  441|  4.61k|}

dav1d_init_get_bits:
   38|   119k|{
   39|   119k|    assert(sz);
  ------------------
  |  Branch (39:5): [True: 119k, False: 0]
  ------------------
   40|   119k|    c->ptr = c->ptr_start = data;
   41|   119k|    c->ptr_end = &c->ptr_start[sz];
   42|   119k|    c->state = 0;
   43|   119k|    c->bits_left = 0;
   44|   119k|    c->error = 0;
   45|   119k|}
dav1d_get_bit:
   47|  2.91M|unsigned dav1d_get_bit(GetBits *const c) {
   48|  2.91M|    if (!c->bits_left) {
  ------------------
  |  Branch (48:9): [True: 438k, False: 2.47M]
  ------------------
   49|   438k|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (49:13): [True: 4.66k, False: 434k]
  ------------------
   50|  4.66k|            c->error = 1;
   51|   434k|        } else {
   52|   434k|            const unsigned state = *c->ptr++;
   53|   434k|            c->bits_left = 7;
   54|   434k|            c->state = (uint64_t) state << 57;
   55|   434k|            return state >> 7;
   56|   434k|        }
   57|   438k|    }
   58|       |
   59|  2.48M|    const uint64_t state = c->state;
   60|  2.48M|    c->bits_left--;
   61|  2.48M|    c->state = state << 1;
   62|  2.48M|    return (unsigned) (state >> 63);
   63|  2.91M|}
dav1d_get_uleb128:
   95|  58.7k|unsigned dav1d_get_uleb128(GetBits *const c) {
   96|  58.7k|    uint64_t val = 0;
   97|  58.7k|    unsigned i = 0, more;
   98|       |
   99|  61.8k|    do {
  100|  61.8k|        const int v = dav1d_get_bits(c, 8);
  101|  61.8k|        more = v & 0x80;
  102|  61.8k|        val |= ((uint64_t) (v & 0x7F)) << i;
  103|  61.8k|        i += 7;
  104|  61.8k|    } while (more && i < 56);
  ------------------
  |  Branch (104:14): [True: 3.25k, False: 58.6k]
  |  Branch (104:22): [True: 3.14k, False: 109]
  ------------------
  105|       |
  106|  58.7k|    if (val > UINT32_MAX || more) {
  ------------------
  |  Branch (106:9): [True: 225, False: 58.4k]
  |  Branch (106:29): [True: 84, False: 58.4k]
  ------------------
  107|    309|        c->error = 1;
  108|    309|        return 0;
  109|    309|    }
  110|       |
  111|  58.4k|    return (unsigned) val;
  112|  58.7k|}
dav1d_get_uniform:
  114|  86.6k|unsigned dav1d_get_uniform(GetBits *const c, const unsigned max) {
  115|       |    // Output in range [0..max-1]
  116|       |    // max must be > 1, or else nothing is read from the bitstream
  117|  86.6k|    assert(max > 1);
  ------------------
  |  Branch (117:5): [True: 86.6k, False: 0]
  ------------------
  118|  86.6k|    const int l = ulog2(max) + 1;
  119|  86.6k|    assert(l > 1);
  ------------------
  |  Branch (119:5): [True: 86.6k, False: 0]
  ------------------
  120|  86.6k|    const unsigned m = (1U << l) - max;
  121|  86.6k|    const unsigned v = dav1d_get_bits(c, l - 1);
  122|  86.6k|    return v < m ? v : (v << 1) - m + dav1d_get_bit(c);
  ------------------
  |  Branch (122:12): [True: 80.9k, False: 5.64k]
  ------------------
  123|  86.6k|}
dav1d_get_vlc:
  125|  1.01k|unsigned dav1d_get_vlc(GetBits *const c) {
  126|  1.01k|    if (dav1d_get_bit(c))
  ------------------
  |  Branch (126:9): [True: 358, False: 653]
  ------------------
  127|    358|        return 0;
  128|       |
  129|    653|    int n_bits = 0;
  130|  11.9k|    do {
  131|  11.9k|        if (++n_bits == 32)
  ------------------
  |  Branch (131:13): [True: 196, False: 11.7k]
  ------------------
  132|    196|            return UINT32_MAX;
  133|  11.9k|    } while (!dav1d_get_bit(c));
  ------------------
  |  Branch (133:14): [True: 11.2k, False: 457]
  ------------------
  134|       |
  135|    457|    return ((1U << n_bits) - 1) + dav1d_get_bits(c, n_bits);
  136|    653|}
dav1d_get_bits_subexp:
  162|  40.6k|int dav1d_get_bits_subexp(GetBits *const c, const int ref, const unsigned n) {
  163|  40.6k|    return (int) get_bits_subexp_u(c, ref + (1 << n), 2 << n) - (1 << n);
  164|  40.6k|}
getbits.c:refill:
   65|   598k|static inline void refill(GetBits *const c, const int n) {
   66|   598k|    assert(c->bits_left >= 0 && c->bits_left < 32);
  ------------------
  |  Branch (66:5): [True: 598k, False: 0]
  |  Branch (66:5): [True: 598k, False: 0]
  ------------------
   67|   598k|    unsigned state = 0;
   68|   656k|    do {
   69|   656k|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (69:13): [True: 17.4k, False: 638k]
  ------------------
   70|  17.4k|            c->error = 1;
   71|  17.4k|            if (state) break;
  ------------------
  |  Branch (71:17): [True: 2.08k, False: 15.3k]
  ------------------
   72|  15.3k|            return;
   73|  17.4k|        }
   74|   638k|        state = (state << 8) | *c->ptr++;
   75|   638k|        c->bits_left += 8;
   76|   638k|    } while (n > c->bits_left);
  ------------------
  |  Branch (76:14): [True: 58.2k, False: 580k]
  ------------------
   77|   582k|    c->state |= (uint64_t) state << (64 - c->bits_left);
   78|   582k|}
getbits.c:get_bits_subexp_u:
  140|  40.6k|{
  141|  40.6k|    unsigned v = 0;
  142|       |
  143|   110k|    for (int i = 0;; i++) {
  144|   110k|        const int b = i ? 3 + i - 1 : 3;
  ------------------
  |  Branch (144:23): [True: 70.3k, False: 40.6k]
  ------------------
  145|       |
  146|   110k|        if (n < v + 3 * (1 << b)) {
  ------------------
  |  Branch (146:13): [True: 4.85k, False: 106k]
  ------------------
  147|  4.85k|            v += dav1d_get_uniform(c, n - v + 1);
  148|  4.85k|            break;
  149|  4.85k|        }
  150|       |
  151|   106k|        if (!dav1d_get_bit(c)) {
  ------------------
  |  Branch (151:13): [True: 35.8k, False: 70.3k]
  ------------------
  152|  35.8k|            v += dav1d_get_bits(c, b);
  153|  35.8k|            break;
  154|  35.8k|        }
  155|       |
  156|  70.3k|        v += 1 << b;
  157|  70.3k|    }
  158|       |
  159|  40.6k|    return ref * 2 <= n ? inv_recenter(ref, v) : n - inv_recenter(n - ref, v);
  ------------------
  |  Branch (159:12): [True: 34.2k, False: 6.46k]
  ------------------
  160|  40.6k|}

obu.c:dav1d_bytealign_get_bits:
   52|   115k|static inline void dav1d_bytealign_get_bits(GetBits *c) {
   53|       |    // bits_left is never more than 7, because it is only incremented
   54|       |    // by refill(), called by dav1d_get_bits and that never reads more
   55|       |    // than 7 bits more than it needs.
   56|       |    //
   57|       |    // If this wasn't true, we would need to work out how many bits to
   58|       |    // discard (bits_left % 8), subtract that from bits_left and then
   59|       |    // shift state right by that amount.
   60|   115k|    assert(c->bits_left <= 7);
  ------------------
  |  Branch (60:5): [True: 115k, False: 0]
  ------------------
   61|       |
   62|   115k|    c->bits_left = 0;
   63|   115k|    c->state = 0;
   64|   115k|}

dav1d_init_intra_edge_tree:
  126|      1|COLD void dav1d_init_intra_edge_tree(void) {
  127|       |    // This function is guaranteed to be called only once
  128|      1|    struct ModeSelMem mem;
  129|       |
  130|      1|    mem.nwc[BL_128X128] = &nodes.branch_sb128[1];
  131|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb128[1 + 4];
  132|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb128[1 + 4 + 16];
  133|      1|    mem.nt = nodes.tip_sb128;
  134|      1|    init_mode_node(nodes.branch_sb128, BL_128X128, &mem, 1, 0);
  135|      1|    assert(mem.nwc[BL_128X128] == &nodes.branch_sb128[1 + 4]);
  ------------------
  |  Branch (135:5): [True: 1, False: 0]
  ------------------
  136|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb128[1 + 4 + 16]);
  ------------------
  |  Branch (136:5): [True: 1, False: 0]
  ------------------
  137|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb128[1 + 4 + 16 + 64]);
  ------------------
  |  Branch (137:5): [True: 1, False: 0]
  ------------------
  138|      1|    assert(mem.nt == &nodes.tip_sb128[256]);
  ------------------
  |  Branch (138:5): [True: 1, False: 0]
  ------------------
  139|       |
  140|      1|    mem.nwc[BL_128X128] = NULL;
  141|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb64[1];
  142|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb64[1 + 4];
  143|      1|    mem.nt = nodes.tip_sb64;
  144|      1|    init_mode_node(nodes.branch_sb64, BL_64X64, &mem, 1, 0);
  145|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb64[1 + 4]);
  ------------------
  |  Branch (145:5): [True: 1, False: 0]
  ------------------
  146|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb64[1 + 4 + 16]);
  ------------------
  |  Branch (146:5): [True: 1, False: 0]
  ------------------
  147|      1|    assert(mem.nt == &nodes.tip_sb64[64]);
  ------------------
  |  Branch (147:5): [True: 1, False: 0]
  ------------------
  148|      1|}
intra_edge.c:init_mode_node:
  101|    106|{
  102|    106|    init_edges(&nwc->node, bl,
  103|    106|               (top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
  ------------------
  |  Branch (103:17): [True: 73, False: 33]
  ------------------
  104|    106|               (left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
  ------------------
  |  Branch (104:17): [True: 33, False: 73]
  ------------------
  105|    106|    if (bl == BL_16X16) {
  ------------------
  |  Branch (105:9): [True: 80, False: 26]
  ------------------
  106|    400|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (106:25): [True: 320, False: 80]
  ------------------
  107|    320|            EdgeTip *const nt = mem->nt++;
  108|    320|            nwc->split_offset[n] = PTR_OFFSET(nwc, nt);
  ------------------
  |  |   94|    320|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  109|    320|            init_edges(&nt->node, bl + 1,
  110|    320|                       ((n == 3 || (n == 1 && !top_has_right)) ? 0 :
  ------------------
  |  Branch (110:26): [True: 80, False: 240]
  |  Branch (110:37): [True: 80, False: 160]
  |  Branch (110:47): [True: 26, False: 54]
  ------------------
  111|    320|                        EDGE_ALL_TOP_HAS_RIGHT) |
  112|    320|                       (!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
  ------------------
  |  Branch (112:27): [True: 80, False: 240]
  |  Branch (112:38): [True: 80, False: 160]
  |  Branch (112:48): [True: 26, False: 54]
  ------------------
  113|    320|                        EDGE_ALL_LEFT_HAS_BOTTOM));
  114|    320|        }
  115|     80|    } else {
  116|    130|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (116:25): [True: 104, False: 26]
  ------------------
  117|    104|            EdgeBranch *const nwc_child = mem->nwc[bl]++;
  118|    104|            nwc->split_offset[n] = PTR_OFFSET(nwc, nwc_child);
  ------------------
  |  |   94|    104|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  119|    104|            init_mode_node(nwc_child, bl + 1, mem,
  120|    104|                           !(n == 3 || (n == 1 && !top_has_right)),
  ------------------
  |  Branch (120:30): [True: 26, False: 78]
  |  Branch (120:41): [True: 26, False: 52]
  |  Branch (120:51): [True: 7, False: 19]
  ------------------
  121|    104|                           n == 0 || (n == 2 && left_has_bottom));
  ------------------
  |  Branch (121:28): [True: 26, False: 78]
  |  Branch (121:39): [True: 26, False: 52]
  |  Branch (121:49): [True: 7, False: 19]
  ------------------
  122|    104|        }
  123|     26|    }
  124|    106|}
intra_edge.c:init_edges:
   58|    426|{
   59|    426|    node->o = edge_flags;
   60|    426|    node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
   61|    426|    node->v[0] = edge_flags | EDGE_ALL_TOP_HAS_RIGHT;
   62|       |
   63|    426|    if (bl == BL_8X8) {
  ------------------
  |  Branch (63:9): [True: 320, False: 106]
  ------------------
   64|    320|        EdgeTip *const nt = (EdgeTip *) node;
   65|       |
   66|    320|        node->h[1] = edge_flags & (EDGE_ALL_LEFT_HAS_BOTTOM |
   67|    320|                                   EDGE_I420_TOP_HAS_RIGHT);
   68|    320|        node->v[1] = edge_flags & (EDGE_ALL_TOP_HAS_RIGHT |
   69|    320|                                   EDGE_I420_LEFT_HAS_BOTTOM |
   70|    320|                                   EDGE_I422_LEFT_HAS_BOTTOM);
   71|       |
   72|    320|        nt->split[0] = (edge_flags & EDGE_ALL_TOP_HAS_RIGHT) |
   73|    320|                       EDGE_I422_LEFT_HAS_BOTTOM;
   74|    320|        nt->split[1] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
   75|    320|        nt->split[2] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
   76|    320|                                     EDGE_I420_LEFT_HAS_BOTTOM |
   77|    320|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   78|    320|    } else {
   79|    106|        EdgeBranch *const nwc = (EdgeBranch *) node;
   80|       |
   81|    106|        node->h[1] = edge_flags & EDGE_ALL_LEFT_HAS_BOTTOM;
   82|    106|        node->v[1] = edge_flags & EDGE_ALL_TOP_HAS_RIGHT;
   83|       |
   84|    106|        nwc->h4 = EDGE_ALL_LEFT_HAS_BOTTOM;
   85|    106|        nwc->v4 = EDGE_ALL_TOP_HAS_RIGHT;
   86|    106|        if (bl == BL_16X16) {
  ------------------
  |  Branch (86:13): [True: 80, False: 26]
  ------------------
   87|     80|            nwc->h4 |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
   88|     80|            nwc->v4 |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
   89|     80|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   90|     80|        }
   91|    106|    }
   92|    426|}

recon_tmpl.c:sm_flag:
   95|  4.90M|static inline int sm_flag(const BlockContext *const b, const int idx) {
   96|  4.90M|    if (!b->intra[idx]) return 0;
  ------------------
  |  Branch (96:9): [True: 210k, False: 4.69M]
  ------------------
   97|  4.69M|    const enum IntraPredMode m = b->mode[idx];
   98|  4.69M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (98:13): [True: 307k, False: 4.38M]
  |  Branch (98:33): [True: 103k, False: 4.28M]
  ------------------
   99|  4.28M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   491k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (99:13): [True: 81.0k, False: 4.20M]
  ------------------
  100|  4.90M|}
recon_tmpl.c:sm_uv_flag:
  102|  3.24M|static inline int sm_uv_flag(const BlockContext *const b, const int idx) {
  103|  3.24M|    const enum IntraPredMode m = b->uvmode[idx];
  104|  3.24M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (104:13): [True: 241k, False: 2.99M]
  |  Branch (104:33): [True: 93.3k, False: 2.90M]
  ------------------
  105|  2.90M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   381k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (105:13): [True: 46.1k, False: 2.86M]
  ------------------
  106|  3.24M|}

dav1d_prepare_intra_edges_8bpc:
   86|  5.32M|{
   87|  5.32M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   58|  5.32M|#define bitdepth_from_max(x) 8
  ------------------
   88|  5.32M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 5.32M, False: 0]
  |  Branch (88:5): [True: 5.32M, False: 0]
  ------------------
   89|       |
   90|  5.32M|    switch (mode) {
   91|   169k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 169k, False: 5.15M]
  ------------------
   92|   458k|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 288k, False: 5.03M]
  ------------------
   93|   514k|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 55.2k, False: 5.26M]
  ------------------
   94|   577k|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 63.7k, False: 5.26M]
  ------------------
   95|   642k|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 65.0k, False: 5.25M]
  ------------------
   96|   720k|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 77.4k, False: 5.24M]
  ------------------
   97|   848k|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 128k, False: 5.19M]
  ------------------
   98|   932k|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 83.8k, False: 5.24M]
  ------------------
   99|   932k|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|   932k|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 269k, False: 662k]
  ------------------
  102|   269k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 167k, False: 101k]
  |  Branch (102:35): [True: 143k, False: 24.3k]
  ------------------
  103|   662k|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 302k, False: 359k]
  ------------------
  104|   302k|            mode = Z2_PRED;
  105|   359k|        else
  106|   359k|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 186k, False: 173k]
  |  Branch (106:36): [True: 184k, False: 2.57k]
  ------------------
  107|   932k|        break;
  108|   848k|    }
  109|  3.25M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 3.25M, False: 2.07M]
  ------------------
  110|  3.67M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 419k, False: 4.90M]
  ------------------
  111|  3.67M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  3.67M|        break;
  113|   722k|    default:
  ------------------
  |  Branch (113:5): [True: 722k, False: 4.60M]
  ------------------
  114|   722k|        break;
  115|  5.32M|    }
  116|       |
  117|  5.32M|    const pixel *dst_top;
  118|  5.32M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 4.67M, False: 645k]
  ------------------
  119|  4.67M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 4.37M, False: 305k]
  ------------------
  120|   305k|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 152k, False: 152k]
  ------------------
  121|   152k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 152k, False: 0]
  |  Branch (121:58): [True: 4.55k, False: 148k]
  ------------------
  122|  4.53M|    {
  123|  4.53M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 321k, False: 4.20M]
  ------------------
  124|   321k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  4.20M|        } else {
  126|  4.20M|            dst_top = &dst[-PXSTRIDE(stride)];
  ------------------
  |  |   53|  4.20M|#define PXSTRIDE(x) (x)
  ------------------
  127|  4.20M|        }
  128|  4.53M|    }
  129|       |
  130|  5.32M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 4.76M, False: 562k]
  ------------------
  131|  4.76M|        const int sz = th << 2;
  132|  4.76M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  4.76M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 4.73M, False: 23.8k]
  ------------------
  135|  4.73M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  59.4M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 54.6M, False: 4.73M]
  ------------------
  138|  54.6M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  ------------------
  |  |   53|  54.6M|#define PXSTRIDE(x) (x)
  ------------------
  139|  4.73M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 68.2k, False: 4.67M]
  ------------------
  140|  68.2k|                pixel_set(left, left[sz - px_have], sz - px_have);
  ------------------
  |  |   48|  68.2k|#define pixel_set memset
  ------------------
  141|  4.73M|        } else {
  142|  23.8k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  |   48|  23.8k|#define pixel_set memset
  ------------------
  |  Branch (142:29): [True: 16.7k, False: 7.08k]
  ------------------
  143|  23.8k|        }
  144|       |
  145|  4.76M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 184k, False: 4.57M]
  ------------------
  146|   184k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 0, False: 184k]
  |  Branch (146:56): [True: 30.3k, False: 153k]
  ------------------
  147|   184k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   184k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 54.7k, False: 129k]
  ------------------
  150|  54.7k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   625k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 570k, False: 54.7k]
  ------------------
  153|   570k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  ------------------
  |  |   53|   570k|#define PXSTRIDE(x) (x)
  ------------------
  154|  54.7k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 1.08k, False: 53.6k]
  ------------------
  155|  1.08k|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  ------------------
  |  |   48|  1.08k|#define pixel_set memset
  ------------------
  156|   129k|            } else {
  157|   129k|                pixel_set(left - sz, left[0], sz);
  ------------------
  |  |   48|   129k|#define pixel_set memset
  ------------------
  158|   129k|            }
  159|   184k|        }
  160|  4.76M|    }
  161|       |
  162|  5.32M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 4.57M, False: 752k]
  ------------------
  163|  4.57M|        const int sz = tw << 2;
  164|  4.57M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  4.57M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 4.37M, False: 198k]
  ------------------
  167|  4.37M|            const int px_have = imin(sz, (w - x) << 2);
  168|  4.37M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   47|  4.37M|#define pixel_copy memcpy
  ------------------
  169|  4.37M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 154k, False: 4.21M]
  ------------------
  170|   154k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  ------------------
  |  |   48|   154k|#define pixel_set memset
  ------------------
  171|  4.37M|        } else {
  172|   198k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  |   48|   198k|#define pixel_set memset
  ------------------
  |  Branch (172:28): [True: 190k, False: 7.40k]
  ------------------
  173|   198k|        }
  174|       |
  175|  4.57M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 143k, False: 4.42M]
  ------------------
  176|   143k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 0, False: 143k]
  |  Branch (176:53): [True: 3.87k, False: 139k]
  ------------------
  177|   143k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   143k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 88.6k, False: 54.8k]
  ------------------
  180|  88.6k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|  88.6k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   47|  88.6k|#define pixel_copy memcpy
  ------------------
  183|  88.6k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 574, False: 88.1k]
  ------------------
  184|    574|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  ------------------
  |  |   48|    574|#define pixel_set memset
  ------------------
  185|    574|                              sz - px_have);
  186|  88.6k|            } else {
  187|  54.8k|                pixel_set(top + sz, top[sz - 1], sz);
  ------------------
  |  |   48|  54.8k|#define pixel_set memset
  ------------------
  188|  54.8k|            }
  189|   143k|        }
  190|  4.57M|    }
  191|       |
  192|  5.32M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 1.20M, False: 4.12M]
  ------------------
  193|  1.20M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 1.19M, False: 10.4k]
  ------------------
  194|  1.19M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 1.07M, False: 113k]
  ------------------
  195|  10.4k|        else
  196|  10.4k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 7.97k, False: 2.45k]
  ------------------
  197|       |
  198|  1.20M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 302k, False: 897k]
  |  Branch (198:32): [True: 127k, False: 175k]
  |  Branch (198:48): [True: 20.2k, False: 106k]
  ------------------
  199|  20.2k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  20.2k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  1.20M|    }
  202|       |
  203|  5.32M|    return mode;
  204|  5.32M|}
dav1d_prepare_intra_edges_16bpc:
   86|  6.73M|{
   87|  6.73M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   75|  6.73M|#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
  ------------------
   88|  6.73M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 6.73M, False: 0]
  |  Branch (88:5): [True: 6.73M, False: 0]
  ------------------
   89|       |
   90|  6.73M|    switch (mode) {
   91|   200k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 200k, False: 6.53M]
  ------------------
   92|   596k|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 395k, False: 6.33M]
  ------------------
   93|   671k|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 75.4k, False: 6.65M]
  ------------------
   94|   739k|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 67.9k, False: 6.66M]
  ------------------
   95|   804k|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 65.4k, False: 6.66M]
  ------------------
   96|   936k|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 131k, False: 6.60M]
  ------------------
   97|  1.11M|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 182k, False: 6.55M]
  ------------------
   98|  1.21M|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 93.6k, False: 6.64M]
  ------------------
   99|  1.21M|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|  1.21M|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 322k, False: 889k]
  ------------------
  102|   322k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 203k, False: 119k]
  |  Branch (102:35): [True: 154k, False: 48.3k]
  ------------------
  103|   889k|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 394k, False: 494k]
  ------------------
  104|   394k|            mode = Z2_PRED;
  105|   494k|        else
  106|   494k|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 267k, False: 226k]
  |  Branch (106:36): [True: 257k, False: 10.2k]
  ------------------
  107|  1.21M|        break;
  108|  1.11M|    }
  109|  3.78M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 3.78M, False: 2.94M]
  ------------------
  110|  4.63M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 843k, False: 5.89M]
  ------------------
  111|  4.63M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  4.63M|        break;
  113|   891k|    default:
  ------------------
  |  Branch (113:5): [True: 891k, False: 5.84M]
  ------------------
  114|   891k|        break;
  115|  6.73M|    }
  116|       |
  117|  6.73M|    const pixel *dst_top;
  118|  6.73M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 5.34M, False: 1.38M]
  ------------------
  119|  5.34M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 4.97M, False: 366k]
  ------------------
  120|   366k|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 180k, False: 185k]
  ------------------
  121|   185k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 185k, False: 0]
  |  Branch (121:58): [True: 10.7k, False: 174k]
  ------------------
  122|  5.17M|    {
  123|  5.17M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 239k, False: 4.93M]
  ------------------
  124|   239k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  4.93M|        } else {
  126|  4.93M|            dst_top = &dst[-PXSTRIDE(stride)];
  127|  4.93M|        }
  128|  5.17M|    }
  129|       |
  130|  6.73M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 6.15M, False: 576k]
  ------------------
  131|  6.15M|        const int sz = th << 2;
  132|  6.15M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  6.15M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 6.11M, False: 38.3k]
  ------------------
  135|  6.11M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  74.4M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 68.3M, False: 6.11M]
  ------------------
  138|  68.3M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  139|  6.11M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 250k, False: 5.86M]
  ------------------
  140|   250k|                pixel_set(left, left[sz - px_have], sz - px_have);
  141|  6.11M|        } else {
  142|  38.3k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  Branch (142:29): [True: 30.5k, False: 7.76k]
  ------------------
  143|  38.3k|        }
  144|       |
  145|  6.15M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 257k, False: 5.90M]
  ------------------
  146|   257k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 0, False: 257k]
  |  Branch (146:56): [True: 75.5k, False: 181k]
  ------------------
  147|   257k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   257k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 80.0k, False: 177k]
  ------------------
  150|  80.0k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   892k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 812k, False: 80.0k]
  ------------------
  153|   812k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  154|  80.0k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 6.34k, False: 73.6k]
  ------------------
  155|  6.34k|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  156|   177k|            } else {
  157|   177k|                pixel_set(left - sz, left[0], sz);
  158|   177k|            }
  159|   257k|        }
  160|  6.15M|    }
  161|       |
  162|  6.73M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 5.42M, False: 1.30M]
  ------------------
  163|  5.42M|        const int sz = tw << 2;
  164|  5.42M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  5.42M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 4.97M, False: 445k]
  ------------------
  167|  4.97M|            const int px_have = imin(sz, (w - x) << 2);
  168|  4.97M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   65|  4.97M|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  169|  4.97M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 107k, False: 4.87M]
  ------------------
  170|   107k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  171|  4.97M|        } else {
  172|   445k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  Branch (172:28): [True: 439k, False: 6.73k]
  ------------------
  173|   445k|        }
  174|       |
  175|  5.42M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 154k, False: 5.27M]
  ------------------
  176|   154k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 0, False: 154k]
  |  Branch (176:53): [True: 4.65k, False: 150k]
  ------------------
  177|   154k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   154k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 88.6k, False: 66.0k]
  ------------------
  180|  88.6k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|  88.6k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   65|  88.6k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  183|  88.6k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 772, False: 87.8k]
  ------------------
  184|    772|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  185|    772|                              sz - px_have);
  186|  88.6k|            } else {
  187|  66.0k|                pixel_set(top + sz, top[sz - 1], sz);
  188|  66.0k|            }
  189|   154k|        }
  190|  5.42M|    }
  191|       |
  192|  6.73M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 1.64M, False: 5.08M]
  ------------------
  193|  1.64M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 1.62M, False: 18.7k]
  ------------------
  194|  1.62M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 1.38M, False: 244k]
  ------------------
  195|  18.7k|        else
  196|  18.7k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 16.8k, False: 1.89k]
  ------------------
  197|       |
  198|  1.64M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 394k, False: 1.25M]
  |  Branch (198:32): [True: 190k, False: 204k]
  |  Branch (198:48): [True: 50.2k, False: 140k]
  ------------------
  199|  50.2k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  50.2k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  1.64M|    }
  202|       |
  203|  6.73M|    return mode;
  204|  6.73M|}

dav1d_intra_pred_dsp_init_8bpc:
  744|  3.41k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  3.41k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  3.41k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  3.41k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  3.41k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  3.41k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  3.41k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  3.41k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  3.41k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  3.41k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  3.41k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  3.41k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  3.41k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  3.41k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  3.41k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  3.41k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  3.41k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  3.41k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  3.41k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  3.41k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  3.41k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  3.41k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  3.41k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  3.41k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  3.41k|#endif
  782|  3.41k|}
dav1d_intra_pred_dsp_init_16bpc:
  744|  4.61k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  4.61k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  4.61k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  4.61k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  4.61k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  4.61k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  4.61k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  4.61k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  4.61k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  4.61k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  4.61k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  4.61k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  4.61k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  4.61k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  4.61k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  4.61k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  4.61k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  4.61k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  4.61k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  4.61k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  4.61k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  4.61k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  4.61k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  4.61k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  4.61k|#endif
  782|  4.61k|}

itx_1d.c:inv_dct4_1d_internal_c:
   68|  4.90M|{
   69|  4.90M|    assert(stride > 0);
  ------------------
  |  Branch (69:5): [True: 4.90M, False: 0]
  ------------------
   70|  4.90M|    const int in0 = c[0 * stride], in1 = c[1 * stride];
   71|       |
   72|  4.90M|    int t0, t1, t2, t3;
   73|  4.90M|    if (tx64) {
  ------------------
  |  Branch (73:9): [True: 1.39M, False: 3.50M]
  ------------------
   74|  1.39M|        t0 = t1 = (in0 * 181 + 128) >> 8;
   75|  1.39M|        t2 = (in1 * 1567 + 2048) >> 12;
   76|  1.39M|        t3 = (in1 * 3784 + 2048) >> 12;
   77|  3.50M|    } else {
   78|  3.50M|        const int in2 = c[2 * stride], in3 = c[3 * stride];
   79|       |
   80|  3.50M|        t0 = ((in0 + in2) * 181 + 128) >> 8;
   81|  3.50M|        t1 = ((in0 - in2) * 181 + 128) >> 8;
   82|  3.50M|        t2 = ((in1 *  1567         - in3 * (3784 - 4096) + 2048) >> 12) - in3;
   83|  3.50M|        t3 = ((in1 * (3784 - 4096) + in3 *  1567         + 2048) >> 12) + in1;
   84|  3.50M|    }
   85|       |
   86|  4.90M|    c[0 * stride] = CLIP(t0 + t3);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
   87|  4.90M|    c[1 * stride] = CLIP(t1 + t2);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
   88|  4.90M|    c[2 * stride] = CLIP(t1 - t2);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
   89|  4.90M|    c[3 * stride] = CLIP(t0 - t3);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
   90|  4.90M|}
itx_1d.c:inv_dct8_1d_internal_c:
  101|  4.90M|{
  102|  4.90M|    assert(stride > 0);
  ------------------
  |  Branch (102:5): [True: 4.90M, False: 0]
  ------------------
  103|  4.90M|    inv_dct4_1d_internal_c(c, stride << 1, min, max, tx64);
  104|       |
  105|  4.90M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  106|       |
  107|  4.90M|    int t4a, t5a, t6a, t7a;
  108|  4.90M|    if (tx64) {
  ------------------
  |  Branch (108:9): [True: 1.39M, False: 3.50M]
  ------------------
  109|  1.39M|        t4a = (in1 *   799 + 2048) >> 12;
  110|  1.39M|        t5a = (in3 * -2276 + 2048) >> 12;
  111|  1.39M|        t6a = (in3 *  3406 + 2048) >> 12;
  112|  1.39M|        t7a = (in1 *  4017 + 2048) >> 12;
  113|  3.50M|    } else {
  114|  3.50M|        const int in5 = c[5 * stride], in7 = c[7 * stride];
  115|       |
  116|  3.50M|        t4a = ((in1 *   799         - in7 * (4017 - 4096) + 2048) >> 12) - in7;
  117|  3.50M|        t5a =  (in5 *  1703         - in3 *  1138         + 1024) >> 11;
  118|  3.50M|        t6a =  (in5 *  1138         + in3 *  1703         + 1024) >> 11;
  119|  3.50M|        t7a = ((in1 * (4017 - 4096) + in7 *  799          + 2048) >> 12) + in1;
  120|  3.50M|    }
  121|       |
  122|  4.90M|    const int t4  = CLIP(t4a + t5a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  123|  4.90M|              t5a = CLIP(t4a - t5a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  124|  4.90M|    const int t7  = CLIP(t7a + t6a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  125|  4.90M|              t6a = CLIP(t7a - t6a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  126|       |
  127|  4.90M|    const int t5  = ((t6a - t5a) * 181 + 128) >> 8;
  128|  4.90M|    const int t6  = ((t6a + t5a) * 181 + 128) >> 8;
  129|       |
  130|  4.90M|    const int t0 = c[0 * stride];
  131|  4.90M|    const int t1 = c[2 * stride];
  132|  4.90M|    const int t2 = c[4 * stride];
  133|  4.90M|    const int t3 = c[6 * stride];
  134|       |
  135|  4.90M|    c[0 * stride] = CLIP(t0 + t7);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  136|  4.90M|    c[1 * stride] = CLIP(t1 + t6);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  137|  4.90M|    c[2 * stride] = CLIP(t2 + t5);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  138|  4.90M|    c[3 * stride] = CLIP(t3 + t4);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  139|  4.90M|    c[4 * stride] = CLIP(t3 - t4);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  140|  4.90M|    c[5 * stride] = CLIP(t2 - t5);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  141|  4.90M|    c[6 * stride] = CLIP(t1 - t6);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  142|  4.90M|    c[7 * stride] = CLIP(t0 - t7);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  143|  4.90M|}
itx_1d.c:inv_dct16_1d_c:
  242|  1.18M|{
  243|  1.18M|    inv_dct16_1d_internal_c(c, stride, min, max, 0);
  244|  1.18M|}
itx_1d.c:inv_dct16_1d_internal_c:
  154|  4.90M|{
  155|  4.90M|    assert(stride > 0);
  ------------------
  |  Branch (155:5): [True: 4.90M, False: 0]
  ------------------
  156|  4.90M|    inv_dct8_1d_internal_c(c, stride << 1, min, max, tx64);
  157|       |
  158|  4.90M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  159|  4.90M|    const int in5 = c[5 * stride], in7 = c[7 * stride];
  160|       |
  161|  4.90M|    int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
  162|  4.90M|    if (tx64) {
  ------------------
  |  Branch (162:9): [True: 1.39M, False: 3.50M]
  ------------------
  163|  1.39M|        t8a  = (in1 *   401 + 2048) >> 12;
  164|  1.39M|        t9a  = (in7 * -2598 + 2048) >> 12;
  165|  1.39M|        t10a = (in5 *  1931 + 2048) >> 12;
  166|  1.39M|        t11a = (in3 * -1189 + 2048) >> 12;
  167|  1.39M|        t12a = (in3 *  3920 + 2048) >> 12;
  168|  1.39M|        t13a = (in5 *  3612 + 2048) >> 12;
  169|  1.39M|        t14a = (in7 *  3166 + 2048) >> 12;
  170|  1.39M|        t15a = (in1 *  4076 + 2048) >> 12;
  171|  3.50M|    } else {
  172|  3.50M|        const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  173|  3.50M|        const int in13 = c[13 * stride], in15 = c[15 * stride];
  174|       |
  175|  3.50M|        t8a  = ((in1  *   401         - in15 * (4076 - 4096) + 2048) >> 12) - in15;
  176|  3.50M|        t9a  =  (in9  *  1583         - in7  *  1299         + 1024) >> 11;
  177|  3.50M|        t10a = ((in5  *  1931         - in11 * (3612 - 4096) + 2048) >> 12) - in11;
  178|  3.50M|        t11a = ((in13 * (3920 - 4096) - in3  *  1189         + 2048) >> 12) + in13;
  179|  3.50M|        t12a = ((in13 *  1189         + in3  * (3920 - 4096) + 2048) >> 12) + in3;
  180|  3.50M|        t13a = ((in5  * (3612 - 4096) + in11 *  1931         + 2048) >> 12) + in5;
  181|  3.50M|        t14a =  (in9  *  1299         + in7  *  1583         + 1024) >> 11;
  182|  3.50M|        t15a = ((in1  * (4076 - 4096) + in15 *   401         + 2048) >> 12) + in1;
  183|  3.50M|    }
  184|       |
  185|  4.90M|    int t8  = CLIP(t8a  + t9a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  186|  4.90M|    int t9  = CLIP(t8a  - t9a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  187|  4.90M|    int t10 = CLIP(t11a - t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  188|  4.90M|    int t11 = CLIP(t11a + t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  189|  4.90M|    int t12 = CLIP(t12a + t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  190|  4.90M|    int t13 = CLIP(t12a - t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  191|  4.90M|    int t14 = CLIP(t15a - t14a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  192|  4.90M|    int t15 = CLIP(t15a + t14a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  193|       |
  194|  4.90M|    t9a  = ((  t14 *  1567         - t9  * (3784 - 4096)  + 2048) >> 12) - t9;
  195|  4.90M|    t14a = ((  t14 * (3784 - 4096) + t9  *  1567          + 2048) >> 12) + t14;
  196|  4.90M|    t10a = ((-(t13 * (3784 - 4096) + t10 *  1567)         + 2048) >> 12) - t13;
  197|  4.90M|    t13a = ((  t13 *  1567         - t10 * (3784 - 4096)  + 2048) >> 12) - t10;
  198|       |
  199|  4.90M|    t8a  = CLIP(t8   + t11);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  200|  4.90M|    t9   = CLIP(t9a  + t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  201|  4.90M|    t10  = CLIP(t9a  - t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  202|  4.90M|    t11a = CLIP(t8   - t11);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  203|  4.90M|    t12a = CLIP(t15  - t12);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  204|  4.90M|    t13  = CLIP(t14a - t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  205|  4.90M|    t14  = CLIP(t14a + t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  206|  4.90M|    t15a = CLIP(t15  + t12);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  207|       |
  208|  4.90M|    t10a = ((t13  - t10)  * 181 + 128) >> 8;
  209|  4.90M|    t13a = ((t13  + t10)  * 181 + 128) >> 8;
  210|  4.90M|    t11  = ((t12a - t11a) * 181 + 128) >> 8;
  211|  4.90M|    t12  = ((t12a + t11a) * 181 + 128) >> 8;
  212|       |
  213|  4.90M|    const int t0 = c[ 0 * stride];
  214|  4.90M|    const int t1 = c[ 2 * stride];
  215|  4.90M|    const int t2 = c[ 4 * stride];
  216|  4.90M|    const int t3 = c[ 6 * stride];
  217|  4.90M|    const int t4 = c[ 8 * stride];
  218|  4.90M|    const int t5 = c[10 * stride];
  219|  4.90M|    const int t6 = c[12 * stride];
  220|  4.90M|    const int t7 = c[14 * stride];
  221|       |
  222|  4.90M|    c[ 0 * stride] = CLIP(t0 + t15a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  223|  4.90M|    c[ 1 * stride] = CLIP(t1 + t14);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  224|  4.90M|    c[ 2 * stride] = CLIP(t2 + t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  225|  4.90M|    c[ 3 * stride] = CLIP(t3 + t12);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  226|  4.90M|    c[ 4 * stride] = CLIP(t4 + t11);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  227|  4.90M|    c[ 5 * stride] = CLIP(t5 + t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  228|  4.90M|    c[ 6 * stride] = CLIP(t6 + t9);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  229|  4.90M|    c[ 7 * stride] = CLIP(t7 + t8a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  230|  4.90M|    c[ 8 * stride] = CLIP(t7 - t8a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  231|  4.90M|    c[ 9 * stride] = CLIP(t6 - t9);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  232|  4.90M|    c[10 * stride] = CLIP(t5 - t10a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  233|  4.90M|    c[11 * stride] = CLIP(t4 - t11);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  234|  4.90M|    c[12 * stride] = CLIP(t3 - t12);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  235|  4.90M|    c[13 * stride] = CLIP(t2 - t13a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  236|  4.90M|    c[14 * stride] = CLIP(t1 - t14);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  237|  4.90M|    c[15 * stride] = CLIP(t0 - t15a);
  ------------------
  |  |   37|  4.90M|#define CLIP(a) iclip(a, min, max)
  ------------------
  238|  4.90M|}
itx_1d.c:inv_dct32_1d_c:
  432|  2.32M|{
  433|  2.32M|    inv_dct32_1d_internal_c(c, stride, min, max, 0);
  434|  2.32M|}
itx_1d.c:inv_dct32_1d_internal_c:
  249|  3.71M|{
  250|  3.71M|    assert(stride > 0);
  ------------------
  |  Branch (250:5): [True: 3.71M, False: 0]
  ------------------
  251|  3.71M|    inv_dct16_1d_internal_c(c, stride << 1, min, max, tx64);
  252|       |
  253|  3.71M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  254|  3.71M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  255|  3.71M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  256|  3.71M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  257|       |
  258|  3.71M|    int t16a, t17a, t18a, t19a, t20a, t21a, t22a, t23a;
  259|  3.71M|    int t24a, t25a, t26a, t27a, t28a, t29a, t30a, t31a;
  260|  3.71M|    if (tx64) {
  ------------------
  |  Branch (260:9): [True: 1.39M, False: 2.32M]
  ------------------
  261|  1.39M|        t16a = (in1  *   201 + 2048) >> 12;
  262|  1.39M|        t17a = (in15 * -2751 + 2048) >> 12;
  263|  1.39M|        t18a = (in9  *  1751 + 2048) >> 12;
  264|  1.39M|        t19a = (in7  * -1380 + 2048) >> 12;
  265|  1.39M|        t20a = (in5  *   995 + 2048) >> 12;
  266|  1.39M|        t21a = (in11 * -2106 + 2048) >> 12;
  267|  1.39M|        t22a = (in13 *  2440 + 2048) >> 12;
  268|  1.39M|        t23a = (in3  *  -601 + 2048) >> 12;
  269|  1.39M|        t24a = (in3  *  4052 + 2048) >> 12;
  270|  1.39M|        t25a = (in13 *  3290 + 2048) >> 12;
  271|  1.39M|        t26a = (in11 *  3513 + 2048) >> 12;
  272|  1.39M|        t27a = (in5  *  3973 + 2048) >> 12;
  273|  1.39M|        t28a = (in7  *  3857 + 2048) >> 12;
  274|  1.39M|        t29a = (in9  *  3703 + 2048) >> 12;
  275|  1.39M|        t30a = (in15 *  3035 + 2048) >> 12;
  276|  1.39M|        t31a = (in1  *  4091 + 2048) >> 12;
  277|  2.32M|    } else {
  278|  2.32M|        const int in17 = c[17 * stride], in19 = c[19 * stride];
  279|  2.32M|        const int in21 = c[21 * stride], in23 = c[23 * stride];
  280|  2.32M|        const int in25 = c[25 * stride], in27 = c[27 * stride];
  281|  2.32M|        const int in29 = c[29 * stride], in31 = c[31 * stride];
  282|       |
  283|  2.32M|        t16a = ((in1  *   201         - in31 * (4091 - 4096) + 2048) >> 12) - in31;
  284|  2.32M|        t17a = ((in17 * (3035 - 4096) - in15 *  2751         + 2048) >> 12) + in17;
  285|  2.32M|        t18a = ((in9  *  1751         - in23 * (3703 - 4096) + 2048) >> 12) - in23;
  286|  2.32M|        t19a = ((in25 * (3857 - 4096) - in7  *  1380         + 2048) >> 12) + in25;
  287|  2.32M|        t20a = ((in5  *   995         - in27 * (3973 - 4096) + 2048) >> 12) - in27;
  288|  2.32M|        t21a = ((in21 * (3513 - 4096) - in11 *  2106         + 2048) >> 12) + in21;
  289|  2.32M|        t22a =  (in13 *  1220         - in19 *  1645         + 1024) >> 11;
  290|  2.32M|        t23a = ((in29 * (4052 - 4096) - in3  *   601         + 2048) >> 12) + in29;
  291|  2.32M|        t24a = ((in29 *   601         + in3  * (4052 - 4096) + 2048) >> 12) + in3;
  292|  2.32M|        t25a =  (in13 *  1645         + in19 *  1220         + 1024) >> 11;
  293|  2.32M|        t26a = ((in21 *  2106         + in11 * (3513 - 4096) + 2048) >> 12) + in11;
  294|  2.32M|        t27a = ((in5  * (3973 - 4096) + in27 *   995         + 2048) >> 12) + in5;
  295|  2.32M|        t28a = ((in25 *  1380         + in7  * (3857 - 4096) + 2048) >> 12) + in7;
  296|  2.32M|        t29a = ((in9  * (3703 - 4096) + in23 *  1751         + 2048) >> 12) + in9;
  297|  2.32M|        t30a = ((in17 *  2751         + in15 * (3035 - 4096) + 2048) >> 12) + in15;
  298|  2.32M|        t31a = ((in1  * (4091 - 4096) + in31 *   201         + 2048) >> 12) + in1;
  299|  2.32M|    }
  300|       |
  301|  3.71M|    int t16 = CLIP(t16a + t17a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  302|  3.71M|    int t17 = CLIP(t16a - t17a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  303|  3.71M|    int t18 = CLIP(t19a - t18a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  304|  3.71M|    int t19 = CLIP(t19a + t18a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  305|  3.71M|    int t20 = CLIP(t20a + t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  306|  3.71M|    int t21 = CLIP(t20a - t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  307|  3.71M|    int t22 = CLIP(t23a - t22a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  308|  3.71M|    int t23 = CLIP(t23a + t22a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  309|  3.71M|    int t24 = CLIP(t24a + t25a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  310|  3.71M|    int t25 = CLIP(t24a - t25a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  311|  3.71M|    int t26 = CLIP(t27a - t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  312|  3.71M|    int t27 = CLIP(t27a + t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  313|  3.71M|    int t28 = CLIP(t28a + t29a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  314|  3.71M|    int t29 = CLIP(t28a - t29a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  315|  3.71M|    int t30 = CLIP(t31a - t30a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  316|  3.71M|    int t31 = CLIP(t31a + t30a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  317|       |
  318|  3.71M|    t17a = ((  t30 *   799         - t17 * (4017 - 4096)  + 2048) >> 12) - t17;
  319|  3.71M|    t30a = ((  t30 * (4017 - 4096) + t17 *   799          + 2048) >> 12) + t30;
  320|  3.71M|    t18a = ((-(t29 * (4017 - 4096) + t18 *   799)         + 2048) >> 12) - t29;
  321|  3.71M|    t29a = ((  t29 *   799         - t18 * (4017 - 4096)  + 2048) >> 12) - t18;
  322|  3.71M|    t21a =  (  t26 *  1703         - t21 *  1138          + 1024) >> 11;
  323|  3.71M|    t26a =  (  t26 *  1138         + t21 *  1703          + 1024) >> 11;
  324|  3.71M|    t22a =  (-(t25 *  1138         + t22 *  1703        ) + 1024) >> 11;
  325|  3.71M|    t25a =  (  t25 *  1703         - t22 *  1138          + 1024) >> 11;
  326|       |
  327|  3.71M|    t16a = CLIP(t16  + t19);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  328|  3.71M|    t17  = CLIP(t17a + t18a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  329|  3.71M|    t18  = CLIP(t17a - t18a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  330|  3.71M|    t19a = CLIP(t16  - t19);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  331|  3.71M|    t20a = CLIP(t23  - t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  332|  3.71M|    t21  = CLIP(t22a - t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  333|  3.71M|    t22  = CLIP(t22a + t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  334|  3.71M|    t23a = CLIP(t23  + t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  335|  3.71M|    t24a = CLIP(t24  + t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  336|  3.71M|    t25  = CLIP(t25a + t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  337|  3.71M|    t26  = CLIP(t25a - t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  338|  3.71M|    t27a = CLIP(t24  - t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  339|  3.71M|    t28a = CLIP(t31  - t28);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  340|  3.71M|    t29  = CLIP(t30a - t29a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  341|  3.71M|    t30  = CLIP(t30a + t29a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  342|  3.71M|    t31a = CLIP(t31  + t28);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  343|       |
  344|  3.71M|    t18a = ((  t29  *  1567         - t18  * (3784 - 4096)  + 2048) >> 12) - t18;
  345|  3.71M|    t29a = ((  t29  * (3784 - 4096) + t18  *  1567          + 2048) >> 12) + t29;
  346|  3.71M|    t19  = ((  t28a *  1567         - t19a * (3784 - 4096)  + 2048) >> 12) - t19a;
  347|  3.71M|    t28  = ((  t28a * (3784 - 4096) + t19a *  1567          + 2048) >> 12) + t28a;
  348|  3.71M|    t20  = ((-(t27a * (3784 - 4096) + t20a *  1567)         + 2048) >> 12) - t27a;
  349|  3.71M|    t27  = ((  t27a *  1567         - t20a * (3784 - 4096)  + 2048) >> 12) - t20a;
  350|  3.71M|    t21a = ((-(t26  * (3784 - 4096) + t21  *  1567)         + 2048) >> 12) - t26;
  351|  3.71M|    t26a = ((  t26  *  1567         - t21  * (3784 - 4096)  + 2048) >> 12) - t21;
  352|       |
  353|  3.71M|    t16  = CLIP(t16a + t23a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  354|  3.71M|    t17a = CLIP(t17  + t22);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  355|  3.71M|    t18  = CLIP(t18a + t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  356|  3.71M|    t19a = CLIP(t19  + t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  357|  3.71M|    t20a = CLIP(t19  - t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  358|  3.71M|    t21  = CLIP(t18a - t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  359|  3.71M|    t22a = CLIP(t17  - t22);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  360|  3.71M|    t23  = CLIP(t16a - t23a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  361|  3.71M|    t24  = CLIP(t31a - t24a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  362|  3.71M|    t25a = CLIP(t30  - t25);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  363|  3.71M|    t26  = CLIP(t29a - t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  364|  3.71M|    t27a = CLIP(t28  - t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  365|  3.71M|    t28a = CLIP(t28  + t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  366|  3.71M|    t29  = CLIP(t29a + t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  367|  3.71M|    t30a = CLIP(t30  + t25);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  368|  3.71M|    t31  = CLIP(t31a + t24a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  369|       |
  370|  3.71M|    t20  = ((t27a - t20a) * 181 + 128) >> 8;
  371|  3.71M|    t27  = ((t27a + t20a) * 181 + 128) >> 8;
  372|  3.71M|    t21a = ((t26  - t21 ) * 181 + 128) >> 8;
  373|  3.71M|    t26a = ((t26  + t21 ) * 181 + 128) >> 8;
  374|  3.71M|    t22  = ((t25a - t22a) * 181 + 128) >> 8;
  375|  3.71M|    t25  = ((t25a + t22a) * 181 + 128) >> 8;
  376|  3.71M|    t23a = ((t24  - t23 ) * 181 + 128) >> 8;
  377|  3.71M|    t24a = ((t24  + t23 ) * 181 + 128) >> 8;
  378|       |
  379|  3.71M|    const int t0  = c[ 0 * stride];
  380|  3.71M|    const int t1  = c[ 2 * stride];
  381|  3.71M|    const int t2  = c[ 4 * stride];
  382|  3.71M|    const int t3  = c[ 6 * stride];
  383|  3.71M|    const int t4  = c[ 8 * stride];
  384|  3.71M|    const int t5  = c[10 * stride];
  385|  3.71M|    const int t6  = c[12 * stride];
  386|  3.71M|    const int t7  = c[14 * stride];
  387|  3.71M|    const int t8  = c[16 * stride];
  388|  3.71M|    const int t9  = c[18 * stride];
  389|  3.71M|    const int t10 = c[20 * stride];
  390|  3.71M|    const int t11 = c[22 * stride];
  391|  3.71M|    const int t12 = c[24 * stride];
  392|  3.71M|    const int t13 = c[26 * stride];
  393|  3.71M|    const int t14 = c[28 * stride];
  394|  3.71M|    const int t15 = c[30 * stride];
  395|       |
  396|  3.71M|    c[ 0 * stride] = CLIP(t0  + t31);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  397|  3.71M|    c[ 1 * stride] = CLIP(t1  + t30a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  398|  3.71M|    c[ 2 * stride] = CLIP(t2  + t29);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  399|  3.71M|    c[ 3 * stride] = CLIP(t3  + t28a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  400|  3.71M|    c[ 4 * stride] = CLIP(t4  + t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  401|  3.71M|    c[ 5 * stride] = CLIP(t5  + t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  402|  3.71M|    c[ 6 * stride] = CLIP(t6  + t25);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  403|  3.71M|    c[ 7 * stride] = CLIP(t7  + t24a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  404|  3.71M|    c[ 8 * stride] = CLIP(t8  + t23a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  405|  3.71M|    c[ 9 * stride] = CLIP(t9  + t22);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  406|  3.71M|    c[10 * stride] = CLIP(t10 + t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  407|  3.71M|    c[11 * stride] = CLIP(t11 + t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  408|  3.71M|    c[12 * stride] = CLIP(t12 + t19a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  409|  3.71M|    c[13 * stride] = CLIP(t13 + t18);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  410|  3.71M|    c[14 * stride] = CLIP(t14 + t17a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  411|  3.71M|    c[15 * stride] = CLIP(t15 + t16);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  412|  3.71M|    c[16 * stride] = CLIP(t15 - t16);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  413|  3.71M|    c[17 * stride] = CLIP(t14 - t17a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  414|  3.71M|    c[18 * stride] = CLIP(t13 - t18);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  415|  3.71M|    c[19 * stride] = CLIP(t12 - t19a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  416|  3.71M|    c[20 * stride] = CLIP(t11 - t20);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  417|  3.71M|    c[21 * stride] = CLIP(t10 - t21a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  418|  3.71M|    c[22 * stride] = CLIP(t9  - t22);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  419|  3.71M|    c[23 * stride] = CLIP(t8  - t23a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  420|  3.71M|    c[24 * stride] = CLIP(t7  - t24a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  421|  3.71M|    c[25 * stride] = CLIP(t6  - t25);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  422|  3.71M|    c[26 * stride] = CLIP(t5  - t26a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  423|  3.71M|    c[27 * stride] = CLIP(t4  - t27);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  424|  3.71M|    c[28 * stride] = CLIP(t3  - t28a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  425|  3.71M|    c[29 * stride] = CLIP(t2  - t29);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  426|  3.71M|    c[30 * stride] = CLIP(t1  - t30a);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  427|  3.71M|    c[31 * stride] = CLIP(t0  - t31);
  ------------------
  |  |   37|  3.71M|#define CLIP(a) iclip(a, min, max)
  ------------------
  428|  3.71M|}
itx_1d.c:inv_dct64_1d_c:
  438|  1.39M|{
  439|  1.39M|    assert(stride > 0);
  ------------------
  |  Branch (439:5): [True: 1.39M, False: 0]
  ------------------
  440|  1.39M|    inv_dct32_1d_internal_c(c, stride << 1, min, max, 1);
  441|       |
  442|  1.39M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  443|  1.39M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  444|  1.39M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  445|  1.39M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  446|  1.39M|    const int in17 = c[17 * stride], in19 = c[19 * stride];
  447|  1.39M|    const int in21 = c[21 * stride], in23 = c[23 * stride];
  448|  1.39M|    const int in25 = c[25 * stride], in27 = c[27 * stride];
  449|  1.39M|    const int in29 = c[29 * stride], in31 = c[31 * stride];
  450|       |
  451|  1.39M|    int t32a = (in1  *   101 + 2048) >> 12;
  452|  1.39M|    int t33a = (in31 * -2824 + 2048) >> 12;
  453|  1.39M|    int t34a = (in17 *  1660 + 2048) >> 12;
  454|  1.39M|    int t35a = (in15 * -1474 + 2048) >> 12;
  455|  1.39M|    int t36a = (in9  *   897 + 2048) >> 12;
  456|  1.39M|    int t37a = (in23 * -2191 + 2048) >> 12;
  457|  1.39M|    int t38a = (in25 *  2359 + 2048) >> 12;
  458|  1.39M|    int t39a = (in7  *  -700 + 2048) >> 12;
  459|  1.39M|    int t40a = (in5  *   501 + 2048) >> 12;
  460|  1.39M|    int t41a = (in27 * -2520 + 2048) >> 12;
  461|  1.39M|    int t42a = (in21 *  2019 + 2048) >> 12;
  462|  1.39M|    int t43a = (in11 * -1092 + 2048) >> 12;
  463|  1.39M|    int t44a = (in13 *  1285 + 2048) >> 12;
  464|  1.39M|    int t45a = (in19 * -1842 + 2048) >> 12;
  465|  1.39M|    int t46a = (in29 *  2675 + 2048) >> 12;
  466|  1.39M|    int t47a = (in3  *  -301 + 2048) >> 12;
  467|  1.39M|    int t48a = (in3  *  4085 + 2048) >> 12;
  468|  1.39M|    int t49a = (in29 *  3102 + 2048) >> 12;
  469|  1.39M|    int t50a = (in19 *  3659 + 2048) >> 12;
  470|  1.39M|    int t51a = (in13 *  3889 + 2048) >> 12;
  471|  1.39M|    int t52a = (in11 *  3948 + 2048) >> 12;
  472|  1.39M|    int t53a = (in21 *  3564 + 2048) >> 12;
  473|  1.39M|    int t54a = (in27 *  3229 + 2048) >> 12;
  474|  1.39M|    int t55a = (in5  *  4065 + 2048) >> 12;
  475|  1.39M|    int t56a = (in7  *  4036 + 2048) >> 12;
  476|  1.39M|    int t57a = (in25 *  3349 + 2048) >> 12;
  477|  1.39M|    int t58a = (in23 *  3461 + 2048) >> 12;
  478|  1.39M|    int t59a = (in9  *  3996 + 2048) >> 12;
  479|  1.39M|    int t60a = (in15 *  3822 + 2048) >> 12;
  480|  1.39M|    int t61a = (in17 *  3745 + 2048) >> 12;
  481|  1.39M|    int t62a = (in31 *  2967 + 2048) >> 12;
  482|  1.39M|    int t63a = (in1  *  4095 + 2048) >> 12;
  483|       |
  484|  1.39M|    int t32 = CLIP(t32a + t33a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  485|  1.39M|    int t33 = CLIP(t32a - t33a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  486|  1.39M|    int t34 = CLIP(t35a - t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  487|  1.39M|    int t35 = CLIP(t35a + t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  488|  1.39M|    int t36 = CLIP(t36a + t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  489|  1.39M|    int t37 = CLIP(t36a - t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  490|  1.39M|    int t38 = CLIP(t39a - t38a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  491|  1.39M|    int t39 = CLIP(t39a + t38a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  492|  1.39M|    int t40 = CLIP(t40a + t41a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  493|  1.39M|    int t41 = CLIP(t40a - t41a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  494|  1.39M|    int t42 = CLIP(t43a - t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  495|  1.39M|    int t43 = CLIP(t43a + t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  496|  1.39M|    int t44 = CLIP(t44a + t45a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  497|  1.39M|    int t45 = CLIP(t44a - t45a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  498|  1.39M|    int t46 = CLIP(t47a - t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  499|  1.39M|    int t47 = CLIP(t47a + t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  500|  1.39M|    int t48 = CLIP(t48a + t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  501|  1.39M|    int t49 = CLIP(t48a - t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  502|  1.39M|    int t50 = CLIP(t51a - t50a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  503|  1.39M|    int t51 = CLIP(t51a + t50a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  504|  1.39M|    int t52 = CLIP(t52a + t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  505|  1.39M|    int t53 = CLIP(t52a - t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  506|  1.39M|    int t54 = CLIP(t55a - t54a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  507|  1.39M|    int t55 = CLIP(t55a + t54a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  508|  1.39M|    int t56 = CLIP(t56a + t57a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  509|  1.39M|    int t57 = CLIP(t56a - t57a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  510|  1.39M|    int t58 = CLIP(t59a - t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  511|  1.39M|    int t59 = CLIP(t59a + t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  512|  1.39M|    int t60 = CLIP(t60a + t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  513|  1.39M|    int t61 = CLIP(t60a - t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  514|  1.39M|    int t62 = CLIP(t63a - t62a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  515|  1.39M|    int t63 = CLIP(t63a + t62a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  516|       |
  517|  1.39M|    t33a = ((t33 * (4096 - 4076) + t62 *   401         + 2048) >> 12) - t33;
  518|  1.39M|    t34a = ((t34 *  -401         + t61 * (4096 - 4076) + 2048) >> 12) - t61;
  519|  1.39M|    t37a =  (t37 * -1299         + t58 *  1583         + 1024) >> 11;
  520|  1.39M|    t38a =  (t38 * -1583         + t57 * -1299         + 1024) >> 11;
  521|  1.39M|    t41a = ((t41 * (4096 - 3612) + t54 *  1931         + 2048) >> 12) - t41;
  522|  1.39M|    t42a = ((t42 * -1931         + t53 * (4096 - 3612) + 2048) >> 12) - t53;
  523|  1.39M|    t45a = ((t45 * -1189         + t50 * (3920 - 4096) + 2048) >> 12) + t50;
  524|  1.39M|    t46a = ((t46 * (4096 - 3920) + t49 * -1189         + 2048) >> 12) - t46;
  525|  1.39M|    t49a = ((t46 * -1189         + t49 * (3920 - 4096) + 2048) >> 12) + t49;
  526|  1.39M|    t50a = ((t45 * (3920 - 4096) + t50 *  1189         + 2048) >> 12) + t45;
  527|  1.39M|    t53a = ((t42 * (4096 - 3612) + t53 *  1931         + 2048) >> 12) - t42;
  528|  1.39M|    t54a = ((t41 *  1931         + t54 * (3612 - 4096) + 2048) >> 12) + t54;
  529|  1.39M|    t57a =  (t38 * -1299         + t57 *  1583         + 1024) >> 11;
  530|  1.39M|    t58a =  (t37 *  1583         + t58 *  1299         + 1024) >> 11;
  531|  1.39M|    t61a = ((t34 * (4096 - 4076) + t61 *   401         + 2048) >> 12) - t34;
  532|  1.39M|    t62a = ((t33 *   401         + t62 * (4076 - 4096) + 2048) >> 12) + t62;
  533|       |
  534|  1.39M|    t32a = CLIP(t32  + t35);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  535|  1.39M|    t33  = CLIP(t33a + t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  536|  1.39M|    t34  = CLIP(t33a - t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  537|  1.39M|    t35a = CLIP(t32  - t35);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  538|  1.39M|    t36a = CLIP(t39  - t36);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  539|  1.39M|    t37  = CLIP(t38a - t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  540|  1.39M|    t38  = CLIP(t38a + t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  541|  1.39M|    t39a = CLIP(t39  + t36);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  542|  1.39M|    t40a = CLIP(t40  + t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  543|  1.39M|    t41  = CLIP(t41a + t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  544|  1.39M|    t42  = CLIP(t41a - t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  545|  1.39M|    t43a = CLIP(t40  - t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  546|  1.39M|    t44a = CLIP(t47  - t44);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  547|  1.39M|    t45  = CLIP(t46a - t45a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  548|  1.39M|    t46  = CLIP(t46a + t45a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  549|  1.39M|    t47a = CLIP(t47  + t44);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  550|  1.39M|    t48a = CLIP(t48  + t51);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  551|  1.39M|    t49  = CLIP(t49a + t50a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  552|  1.39M|    t50  = CLIP(t49a - t50a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  553|  1.39M|    t51a = CLIP(t48  - t51);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  554|  1.39M|    t52a = CLIP(t55  - t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  555|  1.39M|    t53  = CLIP(t54a - t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  556|  1.39M|    t54  = CLIP(t54a + t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  557|  1.39M|    t55a = CLIP(t55  + t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  558|  1.39M|    t56a = CLIP(t56  + t59);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  559|  1.39M|    t57  = CLIP(t57a + t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  560|  1.39M|    t58  = CLIP(t57a - t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  561|  1.39M|    t59a = CLIP(t56  - t59);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  562|  1.39M|    t60a = CLIP(t63  - t60);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  563|  1.39M|    t61  = CLIP(t62a - t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  564|  1.39M|    t62  = CLIP(t62a + t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  565|  1.39M|    t63a = CLIP(t63  + t60);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  566|       |
  567|  1.39M|    t34a = ((t34  * (4096 - 4017) + t61  *   799         + 2048) >> 12) - t34;
  568|  1.39M|    t35  = ((t35a * (4096 - 4017) + t60a *   799         + 2048) >> 12) - t35a;
  569|  1.39M|    t36  = ((t36a *  -799         + t59a * (4096 - 4017) + 2048) >> 12) - t59a;
  570|  1.39M|    t37a = ((t37  *  -799         + t58  * (4096 - 4017) + 2048) >> 12) - t58;
  571|  1.39M|    t42a =  (t42  * -1138         + t53  *  1703         + 1024) >> 11;
  572|  1.39M|    t43  =  (t43a * -1138         + t52a *  1703         + 1024) >> 11;
  573|  1.39M|    t44  =  (t44a * -1703         + t51a * -1138         + 1024) >> 11;
  574|  1.39M|    t45a =  (t45  * -1703         + t50  * -1138         + 1024) >> 11;
  575|  1.39M|    t50a =  (t45  * -1138         + t50  *  1703         + 1024) >> 11;
  576|  1.39M|    t51  =  (t44a * -1138         + t51a *  1703         + 1024) >> 11;
  577|  1.39M|    t52  =  (t43a *  1703         + t52a *  1138         + 1024) >> 11;
  578|  1.39M|    t53a =  (t42  *  1703         + t53  *  1138         + 1024) >> 11;
  579|  1.39M|    t58a = ((t37  * (4096 - 4017) + t58  *   799         + 2048) >> 12) - t37;
  580|  1.39M|    t59  = ((t36a * (4096 - 4017) + t59a *   799         + 2048) >> 12) - t36a;
  581|  1.39M|    t60  = ((t35a *   799         + t60a * (4017 - 4096) + 2048) >> 12) + t60a;
  582|  1.39M|    t61a = ((t34  *   799         + t61  * (4017 - 4096) + 2048) >> 12) + t61;
  583|       |
  584|  1.39M|    t32  = CLIP(t32a + t39a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  585|  1.39M|    t33a = CLIP(t33  + t38);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  586|  1.39M|    t34  = CLIP(t34a + t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  587|  1.39M|    t35a = CLIP(t35  + t36);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  588|  1.39M|    t36a = CLIP(t35  - t36);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  589|  1.39M|    t37  = CLIP(t34a - t37a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  590|  1.39M|    t38a = CLIP(t33  - t38);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  591|  1.39M|    t39  = CLIP(t32a - t39a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  592|  1.39M|    t40  = CLIP(t47a - t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  593|  1.39M|    t41a = CLIP(t46  - t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  594|  1.39M|    t42  = CLIP(t45a - t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  595|  1.39M|    t43a = CLIP(t44  - t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  596|  1.39M|    t44a = CLIP(t44  + t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  597|  1.39M|    t45  = CLIP(t45a + t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  598|  1.39M|    t46a = CLIP(t46  + t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  599|  1.39M|    t47  = CLIP(t47a + t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  600|  1.39M|    t48  = CLIP(t48a + t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  601|  1.39M|    t49a = CLIP(t49  + t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  602|  1.39M|    t50  = CLIP(t50a + t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  603|  1.39M|    t51a = CLIP(t51  + t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  604|  1.39M|    t52a = CLIP(t51  - t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  605|  1.39M|    t53  = CLIP(t50a - t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  606|  1.39M|    t54a = CLIP(t49  - t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  607|  1.39M|    t55  = CLIP(t48a - t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  608|  1.39M|    t56  = CLIP(t63a - t56a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  609|  1.39M|    t57a = CLIP(t62  - t57);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  610|  1.39M|    t58  = CLIP(t61a - t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  611|  1.39M|    t59a = CLIP(t60  - t59);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  612|  1.39M|    t60a = CLIP(t60  + t59);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  613|  1.39M|    t61  = CLIP(t61a + t58a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  614|  1.39M|    t62a = CLIP(t62  + t57);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  615|  1.39M|    t63  = CLIP(t63a + t56a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  616|       |
  617|  1.39M|    t36  = ((t36a * (4096 - 3784) + t59a *  1567         + 2048) >> 12) - t36a;
  618|  1.39M|    t37a = ((t37  * (4096 - 3784) + t58  *  1567         + 2048) >> 12) - t37;
  619|  1.39M|    t38  = ((t38a * (4096 - 3784) + t57a *  1567         + 2048) >> 12) - t38a;
  620|  1.39M|    t39a = ((t39  * (4096 - 3784) + t56  *  1567         + 2048) >> 12) - t39;
  621|  1.39M|    t40a = ((t40  * -1567         + t55  * (4096 - 3784) + 2048) >> 12) - t55;
  622|  1.39M|    t41  = ((t41a * -1567         + t54a * (4096 - 3784) + 2048) >> 12) - t54a;
  623|  1.39M|    t42a = ((t42  * -1567         + t53  * (4096 - 3784) + 2048) >> 12) - t53;
  624|  1.39M|    t43  = ((t43a * -1567         + t52a * (4096 - 3784) + 2048) >> 12) - t52a;
  625|  1.39M|    t52  = ((t43a * (4096 - 3784) + t52a *  1567         + 2048) >> 12) - t43a;
  626|  1.39M|    t53a = ((t42  * (4096 - 3784) + t53  *  1567         + 2048) >> 12) - t42;
  627|  1.39M|    t54  = ((t41a * (4096 - 3784) + t54a *  1567         + 2048) >> 12) - t41a;
  628|  1.39M|    t55a = ((t40  * (4096 - 3784) + t55  *  1567         + 2048) >> 12) - t40;
  629|  1.39M|    t56a = ((t39  *  1567         + t56  * (3784 - 4096) + 2048) >> 12) + t56;
  630|  1.39M|    t57  = ((t38a *  1567         + t57a * (3784 - 4096) + 2048) >> 12) + t57a;
  631|  1.39M|    t58a = ((t37  *  1567         + t58  * (3784 - 4096) + 2048) >> 12) + t58;
  632|  1.39M|    t59  = ((t36a *  1567         + t59a * (3784 - 4096) + 2048) >> 12) + t59a;
  633|       |
  634|  1.39M|    t32a = CLIP(t32  + t47);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  635|  1.39M|    t33  = CLIP(t33a + t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  636|  1.39M|    t34a = CLIP(t34  + t45);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  637|  1.39M|    t35  = CLIP(t35a + t44a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  638|  1.39M|    t36a = CLIP(t36  + t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  639|  1.39M|    t37  = CLIP(t37a + t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  640|  1.39M|    t38a = CLIP(t38  + t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  641|  1.39M|    t39  = CLIP(t39a + t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  642|  1.39M|    t40  = CLIP(t39a - t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  643|  1.39M|    t41a = CLIP(t38  - t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  644|  1.39M|    t42  = CLIP(t37a - t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  645|  1.39M|    t43a = CLIP(t36  - t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  646|  1.39M|    t44  = CLIP(t35a - t44a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  647|  1.39M|    t45a = CLIP(t34  - t45);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  648|  1.39M|    t46  = CLIP(t33a - t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  649|  1.39M|    t47a = CLIP(t32  - t47);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  650|  1.39M|    t48a = CLIP(t63  - t48);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  651|  1.39M|    t49  = CLIP(t62a - t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  652|  1.39M|    t50a = CLIP(t61  - t50);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  653|  1.39M|    t51  = CLIP(t60a - t51a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  654|  1.39M|    t52a = CLIP(t59  - t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  655|  1.39M|    t53  = CLIP(t58a - t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  656|  1.39M|    t54a = CLIP(t57  - t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  657|  1.39M|    t55  = CLIP(t56a - t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  658|  1.39M|    t56  = CLIP(t56a + t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  659|  1.39M|    t57a = CLIP(t57  + t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  660|  1.39M|    t58  = CLIP(t58a + t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  661|  1.39M|    t59a = CLIP(t59  + t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  662|  1.39M|    t60  = CLIP(t60a + t51a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  663|  1.39M|    t61a = CLIP(t61  + t50);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  664|  1.39M|    t62  = CLIP(t62a + t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  665|  1.39M|    t63a = CLIP(t63  + t48);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  666|       |
  667|  1.39M|    t40a = ((t55  - t40 ) * 181 + 128) >> 8;
  668|  1.39M|    t41  = ((t54a - t41a) * 181 + 128) >> 8;
  669|  1.39M|    t42a = ((t53  - t42 ) * 181 + 128) >> 8;
  670|  1.39M|    t43  = ((t52a - t43a) * 181 + 128) >> 8;
  671|  1.39M|    t44a = ((t51  - t44 ) * 181 + 128) >> 8;
  672|  1.39M|    t45  = ((t50a - t45a) * 181 + 128) >> 8;
  673|  1.39M|    t46a = ((t49  - t46 ) * 181 + 128) >> 8;
  674|  1.39M|    t47  = ((t48a - t47a) * 181 + 128) >> 8;
  675|  1.39M|    t48  = ((t47a + t48a) * 181 + 128) >> 8;
  676|  1.39M|    t49a = ((t46  + t49 ) * 181 + 128) >> 8;
  677|  1.39M|    t50  = ((t45a + t50a) * 181 + 128) >> 8;
  678|  1.39M|    t51a = ((t44  + t51 ) * 181 + 128) >> 8;
  679|  1.39M|    t52  = ((t43a + t52a) * 181 + 128) >> 8;
  680|  1.39M|    t53a = ((t42  + t53 ) * 181 + 128) >> 8;
  681|  1.39M|    t54  = ((t41a + t54a) * 181 + 128) >> 8;
  682|  1.39M|    t55a = ((t40  + t55 ) * 181 + 128) >> 8;
  683|       |
  684|  1.39M|    const int t0  = c[ 0 * stride];
  685|  1.39M|    const int t1  = c[ 2 * stride];
  686|  1.39M|    const int t2  = c[ 4 * stride];
  687|  1.39M|    const int t3  = c[ 6 * stride];
  688|  1.39M|    const int t4  = c[ 8 * stride];
  689|  1.39M|    const int t5  = c[10 * stride];
  690|  1.39M|    const int t6  = c[12 * stride];
  691|  1.39M|    const int t7  = c[14 * stride];
  692|  1.39M|    const int t8  = c[16 * stride];
  693|  1.39M|    const int t9  = c[18 * stride];
  694|  1.39M|    const int t10 = c[20 * stride];
  695|  1.39M|    const int t11 = c[22 * stride];
  696|  1.39M|    const int t12 = c[24 * stride];
  697|  1.39M|    const int t13 = c[26 * stride];
  698|  1.39M|    const int t14 = c[28 * stride];
  699|  1.39M|    const int t15 = c[30 * stride];
  700|  1.39M|    const int t16 = c[32 * stride];
  701|  1.39M|    const int t17 = c[34 * stride];
  702|  1.39M|    const int t18 = c[36 * stride];
  703|  1.39M|    const int t19 = c[38 * stride];
  704|  1.39M|    const int t20 = c[40 * stride];
  705|  1.39M|    const int t21 = c[42 * stride];
  706|  1.39M|    const int t22 = c[44 * stride];
  707|  1.39M|    const int t23 = c[46 * stride];
  708|  1.39M|    const int t24 = c[48 * stride];
  709|  1.39M|    const int t25 = c[50 * stride];
  710|  1.39M|    const int t26 = c[52 * stride];
  711|  1.39M|    const int t27 = c[54 * stride];
  712|  1.39M|    const int t28 = c[56 * stride];
  713|  1.39M|    const int t29 = c[58 * stride];
  714|  1.39M|    const int t30 = c[60 * stride];
  715|  1.39M|    const int t31 = c[62 * stride];
  716|       |
  717|  1.39M|    c[ 0 * stride] = CLIP(t0  + t63a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  718|  1.39M|    c[ 1 * stride] = CLIP(t1  + t62);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  719|  1.39M|    c[ 2 * stride] = CLIP(t2  + t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  720|  1.39M|    c[ 3 * stride] = CLIP(t3  + t60);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  721|  1.39M|    c[ 4 * stride] = CLIP(t4  + t59a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  722|  1.39M|    c[ 5 * stride] = CLIP(t5  + t58);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  723|  1.39M|    c[ 6 * stride] = CLIP(t6  + t57a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  724|  1.39M|    c[ 7 * stride] = CLIP(t7  + t56);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  725|  1.39M|    c[ 8 * stride] = CLIP(t8  + t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  726|  1.39M|    c[ 9 * stride] = CLIP(t9  + t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  727|  1.39M|    c[10 * stride] = CLIP(t10 + t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  728|  1.39M|    c[11 * stride] = CLIP(t11 + t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  729|  1.39M|    c[12 * stride] = CLIP(t12 + t51a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  730|  1.39M|    c[13 * stride] = CLIP(t13 + t50);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  731|  1.39M|    c[14 * stride] = CLIP(t14 + t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  732|  1.39M|    c[15 * stride] = CLIP(t15 + t48);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  733|  1.39M|    c[16 * stride] = CLIP(t16 + t47);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  734|  1.39M|    c[17 * stride] = CLIP(t17 + t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  735|  1.39M|    c[18 * stride] = CLIP(t18 + t45);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  736|  1.39M|    c[19 * stride] = CLIP(t19 + t44a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  737|  1.39M|    c[20 * stride] = CLIP(t20 + t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  738|  1.39M|    c[21 * stride] = CLIP(t21 + t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  739|  1.39M|    c[22 * stride] = CLIP(t22 + t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  740|  1.39M|    c[23 * stride] = CLIP(t23 + t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  741|  1.39M|    c[24 * stride] = CLIP(t24 + t39);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  742|  1.39M|    c[25 * stride] = CLIP(t25 + t38a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  743|  1.39M|    c[26 * stride] = CLIP(t26 + t37);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  744|  1.39M|    c[27 * stride] = CLIP(t27 + t36a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  745|  1.39M|    c[28 * stride] = CLIP(t28 + t35);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  746|  1.39M|    c[29 * stride] = CLIP(t29 + t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  747|  1.39M|    c[30 * stride] = CLIP(t30 + t33);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  748|  1.39M|    c[31 * stride] = CLIP(t31 + t32a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  749|  1.39M|    c[32 * stride] = CLIP(t31 - t32a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  750|  1.39M|    c[33 * stride] = CLIP(t30 - t33);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  751|  1.39M|    c[34 * stride] = CLIP(t29 - t34a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  752|  1.39M|    c[35 * stride] = CLIP(t28 - t35);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  753|  1.39M|    c[36 * stride] = CLIP(t27 - t36a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  754|  1.39M|    c[37 * stride] = CLIP(t26 - t37);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  755|  1.39M|    c[38 * stride] = CLIP(t25 - t38a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  756|  1.39M|    c[39 * stride] = CLIP(t24 - t39);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  757|  1.39M|    c[40 * stride] = CLIP(t23 - t40a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  758|  1.39M|    c[41 * stride] = CLIP(t22 - t41);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  759|  1.39M|    c[42 * stride] = CLIP(t21 - t42a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  760|  1.39M|    c[43 * stride] = CLIP(t20 - t43);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  761|  1.39M|    c[44 * stride] = CLIP(t19 - t44a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  762|  1.39M|    c[45 * stride] = CLIP(t18 - t45);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  763|  1.39M|    c[46 * stride] = CLIP(t17 - t46a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  764|  1.39M|    c[47 * stride] = CLIP(t16 - t47);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  765|  1.39M|    c[48 * stride] = CLIP(t15 - t48);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  766|  1.39M|    c[49 * stride] = CLIP(t14 - t49a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  767|  1.39M|    c[50 * stride] = CLIP(t13 - t50);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  768|  1.39M|    c[51 * stride] = CLIP(t12 - t51a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  769|  1.39M|    c[52 * stride] = CLIP(t11 - t52);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  770|  1.39M|    c[53 * stride] = CLIP(t10 - t53a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  771|  1.39M|    c[54 * stride] = CLIP(t9  - t54);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  772|  1.39M|    c[55 * stride] = CLIP(t8  - t55a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  773|  1.39M|    c[56 * stride] = CLIP(t7  - t56);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  774|  1.39M|    c[57 * stride] = CLIP(t6  - t57a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  775|  1.39M|    c[58 * stride] = CLIP(t5  - t58);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  776|  1.39M|    c[59 * stride] = CLIP(t4  - t59a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  777|  1.39M|    c[60 * stride] = CLIP(t3  - t60);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  778|  1.39M|    c[61 * stride] = CLIP(t2  - t61a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  779|  1.39M|    c[62 * stride] = CLIP(t1  - t62);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  780|  1.39M|    c[63 * stride] = CLIP(t0  - t63a);
  ------------------
  |  |   37|  1.39M|#define CLIP(a) iclip(a, min, max)
  ------------------
  781|  1.39M|}

dav1d_itx_dsp_init_8bpc:
  220|  3.41k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  3.41k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  3.41k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  3.41k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  3.41k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  3.41k|
  264|  3.41k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  3.41k|  ARCH_AARCH64 || \
  266|  3.41k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  3.41k|))
  268|  3.41k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  3.41k|#endif
  270|  3.41k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  3.41k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  3.41k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  3.41k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  3.41k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  3.41k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  3.41k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  3.41k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  3.41k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  3.41k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.41k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.41k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.41k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.41k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  3.41k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  3.41k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  3.41k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  3.41k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  3.41k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  3.41k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  3.41k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  3.41k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  3.41k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  3.41k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  3.41k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  3.41k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  3.41k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  3.41k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  3.41k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  3.41k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  3.41k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  3.41k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.41k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  3.41k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  3.41k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  3.41k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  3.41k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.41k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  3.41k|    int all_simd = 0;
  291|  3.41k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  3.41k|#if ARCH_X86
  305|  3.41k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  3.41k|#endif
  307|  3.41k|#endif
  308|       |
  309|  3.41k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 0, False: 3.41k]
  ------------------
  310|      0|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  3.41k|}
itx_tmpl.c:inv_txfm_add_c:
   47|   141k|{
   48|   141k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
   49|   141k|    const int w = 4 * t_dim->w, h = 4 * t_dim->h;
   50|   141k|    const int has_dconly = txtp == DCT_DCT;
   51|   141k|    assert(w >= 4 && w <= 64);
  ------------------
  |  Branch (51:5): [True: 141k, False: 0]
  |  Branch (51:5): [True: 141k, False: 0]
  ------------------
   52|   141k|    assert(h >= 4 && h <= 64);
  ------------------
  |  Branch (52:5): [True: 141k, False: 0]
  |  Branch (52:5): [True: 141k, False: 0]
  ------------------
   53|   141k|    assert(eob >= 0);
  ------------------
  |  Branch (53:5): [True: 141k, False: 0]
  ------------------
   54|       |
   55|   141k|    const int is_rect2 = w * 2 == h || h * 2 == w;
  ------------------
  |  Branch (55:26): [True: 17.8k, False: 123k]
  |  Branch (55:40): [True: 51.3k, False: 72.1k]
  ------------------
   56|   141k|    const int rnd = (1 << shift) >> 1;
   57|       |
   58|   141k|    if (eob < has_dconly) {
  ------------------
  |  Branch (58:9): [True: 39.8k, False: 101k]
  ------------------
   59|  39.8k|        int dc = coeff[0];
   60|  39.8k|        coeff[0] = 0;
   61|  39.8k|        if (is_rect2)
  ------------------
  |  Branch (61:13): [True: 18.4k, False: 21.3k]
  ------------------
   62|  18.4k|            dc = (dc * 181 + 128) >> 8;
   63|  39.8k|        dc = (dc * 181 + 128) >> 8;
   64|  39.8k|        dc = (dc + rnd) >> shift;
   65|  39.8k|        dc = (dc * 181 + 128 + 2048) >> 12;
   66|  1.36M|        for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  1.32M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (66:25): [True: 1.32M, False: 39.8k]
  ------------------
   67|  58.6M|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (67:29): [True: 57.3M, False: 1.32M]
  ------------------
   68|  57.3M|                dst[x] = iclip_pixel(dst[x] + dc);
  ------------------
  |  |   49|  57.3M|#define iclip_pixel iclip_u8
  ------------------
   69|  39.8k|        return;
   70|  39.8k|    }
   71|       |
   72|   101k|    const uint8_t *const txtps = dav1d_tx1d_types[txtp];
   73|   101k|    const itx_1d_fn first_1d_fn = dav1d_tx1d_fns[t_dim->lw][txtps[0]];
   74|   101k|    const itx_1d_fn second_1d_fn = dav1d_tx1d_fns[t_dim->lh][txtps[1]];
   75|   101k|    const int sh = imin(h, 32), sw = imin(w, 32);
   76|   101k|#if BITDEPTH == 8
   77|   101k|    const int row_clip_min = INT16_MIN;
   78|   101k|    const int col_clip_min = INT16_MIN;
   79|       |#else
   80|       |    const int row_clip_min = (int) ((unsigned) ~bitdepth_max << 7);
   81|       |    const int col_clip_min = (int) ((unsigned) ~bitdepth_max << 5);
   82|       |#endif
   83|   101k|    const int row_clip_max = ~row_clip_min;
   84|   101k|    const int col_clip_max = ~col_clip_min;
   85|       |
   86|   101k|    int32_t tmp[64 * 64], *c = tmp;
   87|   101k|    int last_nonzero_col; // in first 1d itx
   88|   101k|    if (txtps[1] == IDENTITY && txtps[0] != IDENTITY) {
  ------------------
  |  Branch (88:9): [True: 0, False: 101k]
  |  Branch (88:33): [True: 0, False: 0]
  ------------------
   89|      0|        last_nonzero_col = imin(sh - 1, eob);
   90|   101k|    } else if (txtps[0] == IDENTITY && txtps[1] != IDENTITY) {
  ------------------
  |  Branch (90:16): [True: 0, False: 101k]
  |  Branch (90:40): [True: 0, False: 0]
  ------------------
   91|      0|        last_nonzero_col = eob >> (t_dim->lw + 2);
   92|   101k|    } else {
   93|   101k|        last_nonzero_col = dav1d_last_nonzero_col_from_eob[tx][eob];
   94|   101k|    }
   95|   101k|    assert(last_nonzero_col < sh);
  ------------------
  |  Branch (95:5): [True: 101k, False: 0]
  ------------------
   96|  1.00M|    for (int y = 0; y <= last_nonzero_col; y++, c += w) {
  ------------------
  |  Branch (96:21): [True: 901k, False: 101k]
  ------------------
   97|   901k|        if (is_rect2)
  ------------------
  |  Branch (97:13): [True: 373k, False: 528k]
  ------------------
   98|  11.1M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (98:29): [True: 10.8M, False: 373k]
  ------------------
   99|  10.8M|                c[x] = (coeff[y + x * sh] * 181 + 128) >> 8;
  100|   528k|        else
  101|  17.1M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (101:29): [True: 16.6M, False: 528k]
  ------------------
  102|  16.6M|                c[x] = coeff[y + x * sh];
  103|   901k|        first_1d_fn(c, 1, row_clip_min, row_clip_max);
  104|   901k|    }
  105|   101k|    if (last_nonzero_col + 1 < sh)
  ------------------
  |  Branch (105:9): [True: 91.9k, False: 9.53k]
  ------------------
  106|  91.9k|        memset(c, 0, sizeof(*c) * (sh - last_nonzero_col - 1) * w);
  107|       |
  108|   101k|    memset(coeff, 0, sizeof(*coeff) * sw * sh);
  109|   110M|    for (int i = 0; i < w * sh; i++)
  ------------------
  |  Branch (109:21): [True: 110M, False: 101k]
  ------------------
  110|   110M|        tmp[i] = iclip((tmp[i] + rnd) >> shift, col_clip_min, col_clip_max);
  111|       |
  112|  4.10M|    for (int x = 0; x < w; x++)
  ------------------
  |  Branch (112:21): [True: 3.99M, False: 101k]
  ------------------
  113|  3.99M|        second_1d_fn(&tmp[x], w, col_clip_min, col_clip_max);
  114|       |
  115|   101k|    c = tmp;
  116|  3.47M|    for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  3.36M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (116:21): [True: 3.36M, False: 101k]
  ------------------
  117|   147M|        for (int x = 0; x < w; x++)
  ------------------
  |  Branch (117:25): [True: 144M, False: 3.36M]
  ------------------
  118|   144M|            dst[x] = iclip_pixel(dst[x] + ((*c++ + 8) >> 4));
  ------------------
  |  |   49|   144M|#define iclip_pixel iclip_u8
  ------------------
  119|   101k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x32_c:
  127|  15.3k|                                               HIGHBD_DECL_SUFFIX) \
  128|  15.3k|{ \
  129|  15.3k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  15.3k|                   HIGHBD_TAIL_SUFFIX); \
  131|  15.3k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x64_c:
  127|  2.03k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.03k|{ \
  129|  2.03k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.03k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.03k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x16_c:
  127|  34.3k|                                               HIGHBD_DECL_SUFFIX) \
  128|  34.3k|{ \
  129|  34.3k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  34.3k|                   HIGHBD_TAIL_SUFFIX); \
  131|  34.3k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x32_c:
  127|  45.1k|                                               HIGHBD_DECL_SUFFIX) \
  128|  45.1k|{ \
  129|  45.1k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  45.1k|                   HIGHBD_TAIL_SUFFIX); \
  131|  45.1k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x64_c:
  127|  2.54k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.54k|{ \
  129|  2.54k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.54k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.54k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x16_c:
  127|  4.53k|                                               HIGHBD_DECL_SUFFIX) \
  128|  4.53k|{ \
  129|  4.53k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  4.53k|                   HIGHBD_TAIL_SUFFIX); \
  131|  4.53k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x32_c:
  127|  17.0k|                                               HIGHBD_DECL_SUFFIX) \
  128|  17.0k|{ \
  129|  17.0k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  17.0k|                   HIGHBD_TAIL_SUFFIX); \
  131|  17.0k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x64_c:
  127|  20.3k|                                               HIGHBD_DECL_SUFFIX) \
  128|  20.3k|{ \
  129|  20.3k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  20.3k|                   HIGHBD_TAIL_SUFFIX); \
  131|  20.3k|}
dav1d_itx_dsp_init_16bpc:
  220|  4.61k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  4.61k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  4.61k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  4.61k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  4.61k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  4.61k|
  264|  4.61k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  4.61k|  ARCH_AARCH64 || \
  266|  4.61k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  4.61k|))
  268|  4.61k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  4.61k|#endif
  270|  4.61k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  4.61k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  4.61k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  4.61k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  4.61k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  4.61k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  4.61k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  4.61k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  4.61k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  4.61k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.61k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.61k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.61k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.61k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  4.61k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  4.61k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  4.61k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  4.61k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  4.61k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  4.61k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  4.61k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  4.61k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  4.61k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  4.61k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  4.61k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  4.61k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  4.61k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  4.61k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  4.61k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  4.61k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  4.61k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  4.61k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.61k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  4.61k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  4.61k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  4.61k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  4.61k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  4.61k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.61k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  4.61k|    int all_simd = 0;
  291|  4.61k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  4.61k|#if ARCH_X86
  305|  4.61k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  4.61k|#endif
  307|  4.61k|#endif
  308|       |
  309|  4.61k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 2.35k, False: 2.25k]
  ------------------
  310|  2.35k|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  4.61k|}

dav1d_copy_lpf_8bpc:
  106|  46.0k|{
  107|  46.0k|    const int have_tt = f->c->n_tc > 1;
  108|  46.0k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|  46.0k|    const int offset = 8 * !!sby;
  110|  46.0k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|  46.0k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|  46.0k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|  46.0k|    pixel *const dst[3] = {
  114|  46.0k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  ------------------
  |  |   53|  46.0k|#define PXSTRIDE(x) (x)
  ------------------
  115|  46.0k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  ------------------
  |  |   53|  46.0k|#define PXSTRIDE(x) (x)
  ------------------
  116|  46.0k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  ------------------
  |  |   53|  46.0k|#define PXSTRIDE(x) (x)
  ------------------
  117|  46.0k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|  46.0k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|  46.0k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 35.9k, False: 10.1k]
  |  Branch (122:29): [True: 7.62k, False: 2.47k]
  ------------------
  123|  43.5k|        const int h = f->cur.p.h;
  124|  43.5k|        const int w = f->bw << 2;
  125|  43.5k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|  43.5k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|  43.5k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 15.8k, False: 27.6k]
  |  Branch (127:46): [True: 26.9k, False: 758]
  ------------------
  128|  42.7k|            backup_lpf(f, dst[0], lr_stride[0],
  129|  42.7k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|  42.7k|#define PXSTRIDE(x) (x)
  ------------------
  130|  42.7k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|  43.5k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 0, False: 43.5k]
  |  Branch (131:24): [True: 0, False: 0]
  ------------------
  132|      0|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  133|      0|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|      0|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  135|      0|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|      0|        }
  137|  43.5k|    }
  138|  46.0k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 35.9k, False: 10.1k]
  |  Branch (138:30): [True: 3.75k, False: 6.34k]
  ------------------
  139|  39.6k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 12.9k, False: 26.6k]
  ------------------
  140|  12.9k|    {
  141|  12.9k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|  12.9k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|  12.9k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|  12.9k|        const int w = f->bw << (2 - ss_hor);
  145|  12.9k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|  12.9k|        const int offset_uv = offset >> ss_ver;
  147|  12.9k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|  12.9k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  ------------------
  |  |   53|  12.9k|#define PXSTRIDE(x) (x)
  ------------------
  149|  12.9k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 9.23k, False: 3.75k]
  |  Branch (149:33): [True: 765, False: 2.99k]
  ------------------
  150|  10.0k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 1.92k, False: 8.07k]
  |  Branch (150:50): [True: 7.58k, False: 490]
  ------------------
  151|  9.51k|                backup_lpf(f, dst[1], lr_stride[1],
  152|  9.51k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  9.51k|#define PXSTRIDE(x) (x)
  ------------------
  153|  9.51k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|  9.51k|                           row_h, w, h, ss_hor, 1);
  155|  10.0k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 0, False: 10.0k]
  |  Branch (155:28): [True: 0, False: 0]
  ------------------
  156|      0|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|      0|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  158|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|      0|                           row_h, w, h, ss_hor, 0);
  160|  10.0k|        }
  161|  12.9k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 9.23k, False: 3.75k]
  |  Branch (161:33): [True: 3.44k, False: 314]
  ------------------
  162|  12.6k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 5.02k, False: 7.65k]
  |  Branch (162:50): [True: 6.82k, False: 822]
  ------------------
  163|  11.8k|                backup_lpf(f, dst[2], lr_stride[1],
  164|  11.8k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  11.8k|#define PXSTRIDE(x) (x)
  ------------------
  165|  11.8k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|  11.8k|                           row_h, w, h, ss_hor, 1);
  167|  12.6k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 0, False: 12.6k]
  |  Branch (167:28): [True: 0, False: 0]
  ------------------
  168|      0|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|      0|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  170|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|      0|                           row_h, w, h, ss_hor, 0);
  172|  12.6k|        }
  173|  12.9k|    }
  174|  46.0k|}
dav1d_loopfilter_sbrow_cols_8bpc:
  316|  30.5k|{
  317|  30.5k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|  30.5k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|  30.5k|    const int starty4 = (sby & is_sb64) << 4;
  321|  30.5k|    const int sbsz = 32 >> is_sb64;
  322|  30.5k|    const int sbl2 = 5 - is_sb64;
  323|  30.5k|    const int halign = (f->bh + 31) & ~31;
  324|  30.5k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|  30.5k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|  30.5k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|  30.5k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|  30.5k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|  30.5k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|  30.5k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|  30.5k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|  31.7k|    for (int tile_col = 1;; tile_col++) {
  335|  31.7k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|  31.7k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 30.5k, False: 1.20k]
  ------------------
  337|  1.20k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 518, False: 690]
  ------------------
  338|  1.20k|        x >>= is_sb64;
  339|       |
  340|  1.20k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|  29.3k|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 28.1k, False: 1.20k]
  ------------------
  342|  28.1k|            const int sidx = mask >= 0x10000U;
  343|  28.1k|            const unsigned smask = mask >> (sidx << 4);
  344|  28.1k|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|  28.1k|                                !!(y_hmask[1][sidx] & smask);
  346|  28.1k|            y_hmask[2][sidx] &= ~smask;
  347|  28.1k|            y_hmask[1][sidx] &= ~smask;
  348|  28.1k|            y_hmask[0][sidx] &= ~smask;
  349|  28.1k|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|  28.1k|        }
  351|       |
  352|  1.20k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 428, False: 780]
  ------------------
  353|    428|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  5.48k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 5.05k, False: 428]
  ------------------
  355|  5.05k|                 y++, uv_mask <<= 1)
  356|  5.05k|            {
  357|  5.05k|                const int sidx = uv_mask >= vmax;
  358|  5.05k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  5.05k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  5.05k|                uv_hmask[1][sidx] &= ~smask;
  361|  5.05k|                uv_hmask[0][sidx] &= ~smask;
  362|  5.05k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  5.05k|            }
  364|    428|        }
  365|  1.20k|        lpf_y  += halign;
  366|  1.20k|        lpf_uv += halign >> ss_ver;
  367|  1.20k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|  30.5k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 375, False: 30.1k]
  ------------------
  371|    375|        const BlockContext *a;
  372|    375|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  1.72k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 1.35k, False: 375]
  ------------------
  374|  1.35k|        {
  375|  1.35k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  1.35k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  37.1k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 35.7k, False: 1.35k]
  ------------------
  378|  35.7k|                const int sidx = mask >= 0x10000U;
  379|  35.7k|                const unsigned smask = mask >> (sidx << 4);
  380|  35.7k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  35.7k|                                    !!(y_vmask[1][sidx] & smask);
  382|  35.7k|                y_vmask[2][sidx] &= ~smask;
  383|  35.7k|                y_vmask[1][sidx] &= ~smask;
  384|  35.7k|                y_vmask[0][sidx] &= ~smask;
  385|  35.7k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  35.7k|            }
  387|       |
  388|  1.35k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 1.10k, False: 242]
  ------------------
  389|  1.10k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  1.10k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  29.1k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 28.0k, False: 1.10k]
  ------------------
  392|  28.0k|                    const int sidx = uv_mask >= hmax;
  393|  28.0k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  28.0k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  28.0k|                    uv_vmask[1][sidx] &= ~smask;
  396|  28.0k|                    uv_vmask[0][sidx] &= ~smask;
  397|  28.0k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  28.0k|                }
  399|  1.10k|            }
  400|  1.35k|        }
  401|    375|    }
  402|       |
  403|  30.5k|    pixel *ptr;
  404|  30.5k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|  75.0k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 44.5k, False: 30.5k]
  ------------------
  406|  44.5k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|  44.5k|    {
  408|  44.5k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|  44.5k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|  44.5k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|  44.5k|    }
  412|       |
  413|  30.5k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 22.7k, False: 7.86k]
  |  Branch (413:46): [True: 19.4k, False: 3.28k]
  ------------------
  414|  19.4k|        return;
  415|       |
  416|  11.1k|    ptrdiff_t uv_off;
  417|  11.1k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  28.9k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 17.7k, False: 11.1k]
  ------------------
  419|  17.7k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  17.7k|    {
  421|  17.7k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  17.7k|                             lflvl[x].filter_uv[0],
  423|  17.7k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  17.7k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  17.7k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  17.7k|    }
  427|  11.1k|}
dav1d_loopfilter_sbrow_rows_8bpc:
  432|  30.5k|{
  433|  30.5k|    int x;
  434|       |    // Don't filter outside the frame
  435|  30.5k|    const int have_top = sby > 0;
  436|  30.5k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|  30.5k|    const int starty4 = (sby & is_sb64) << 4;
  438|  30.5k|    const int sbsz = 32 >> is_sb64;
  439|  30.5k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|  30.5k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|  30.5k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|  30.5k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|  30.5k|    pixel *ptr;
  445|  30.5k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|  75.0k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 44.5k, False: 30.5k]
  ------------------
  447|  44.5k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|  44.5k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|  44.5k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|  44.5k|    }
  451|       |
  452|  30.5k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 22.7k, False: 7.86k]
  |  Branch (452:46): [True: 19.4k, False: 3.28k]
  ------------------
  453|  19.4k|        return;
  454|       |
  455|  11.1k|    ptrdiff_t uv_off;
  456|  11.1k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  28.9k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 17.7k, False: 11.1k]
  ------------------
  458|  17.7k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  17.7k|    {
  460|  17.7k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  17.7k|                             lflvl[x].filter_uv[1],
  462|  17.7k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  17.7k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  17.7k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  17.7k|    }
  466|  11.1k|}
lf_apply_tmpl.c:backup_lpf:
   47|   114k|{
   48|   114k|    const int cdef_backup = !lr_backup;
   49|   114k|    const int dst_w = f->frame_hdr->super_res.enabled ?
  ------------------
  |  Branch (49:23): [True: 11.0k, False: 103k]
  ------------------
   50|   103k|                      (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
   51|       |
   52|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   53|   114k|    int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
   54|   114k|    src += (stripe_h - 2) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|   114k|#define PXSTRIDE(x) (x)
  ------------------
   55|       |
   56|   114k|    if (f->c->n_tc == 1) {
  ------------------
  |  Branch (56:9): [True: 114k, False: 0]
  ------------------
   57|   114k|        if (row) {
  ------------------
  |  Branch (57:13): [True: 92.4k, False: 22.0k]
  ------------------
   58|  92.4k|            const int top = 4 << sb128;
   59|       |            // Copy the top part of the stored loop filtered pixels from the
   60|       |            // previous sb row needed above the first stripe of this sb row.
   61|  92.4k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   47|  92.4k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   62|  92.4k|                       &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   63|  92.4k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   47|  92.4k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   64|  92.4k|                       &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   65|  92.4k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   47|  92.4k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   66|  92.4k|                       &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   67|  92.4k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   47|  92.4k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   68|  92.4k|                       &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
  ------------------
  |  |   53|  92.4k|#define PXSTRIDE(x) (x)
  ------------------
   69|  92.4k|        }
   70|   114k|        dst += 4 * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   114k|#define PXSTRIDE(x) (x)
  ------------------
   71|   114k|    }
   72|       |
   73|   114k|    if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
  ------------------
  |  Branch (73:9): [True: 114k, False: 0]
  |  Branch (73:22): [True: 7.96k, False: 106k]
  ------------------
   74|  18.5k|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (74:16): [True: 10.5k, False: 7.96k]
  ------------------
   75|  10.5k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   76|  10.5k|            f->dsp->mc.resize(dst, dst_stride, src, src_stride,
   77|  10.5k|                              dst_w, n_lines, src_w, f->resize_step[ss_hor],
   78|  10.5k|                              f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
   79|  10.5k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   80|  10.5k|            stripe_h = 64 >> ss_ver;
   81|  10.5k|            src += stripe_h * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  10.5k|#define PXSTRIDE(x) (x)
  ------------------
   82|  10.5k|            dst += n_lines * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  10.5k|#define PXSTRIDE(x) (x)
  ------------------
   83|  10.5k|            if (n_lines == 3) {
  ------------------
  |  Branch (83:17): [True: 1.14k, False: 9.44k]
  ------------------
   84|  1.14k|                pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   47|  1.14k|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   53|  1.14k|#define PXSTRIDE(x) (x)
  ------------------
   85|  1.14k|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  1.14k|#define PXSTRIDE(x) (x)
  ------------------
   86|  1.14k|            }
   87|  10.5k|        }
   88|   106k|    } else {
   89|   229k|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (89:16): [True: 123k, False: 106k]
  ------------------
   90|   123k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   91|   616k|            for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (91:29): [True: 493k, False: 123k]
  ------------------
   92|   493k|                pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   47|   493k|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   53|    864|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (92:33): [True: 864, False: 492k]
  ------------------
   93|   493k|                                               src, src_w);
   94|   493k|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   493k|#define PXSTRIDE(x) (x)
  ------------------
   95|   493k|                src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|   493k|#define PXSTRIDE(x) (x)
  ------------------
   96|   493k|            }
   97|   123k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   98|   123k|            stripe_h = 64 >> ss_ver;
   99|   123k|            src += (stripe_h - 4) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|   123k|#define PXSTRIDE(x) (x)
  ------------------
  100|   123k|        }
  101|   106k|    }
  102|   114k|}
lf_apply_tmpl.c:filter_plane_cols_y:
  184|  86.4k|{
  185|  86.4k|    const Dav1dDSPContext *const dsp = f->dsp;
  186|       |
  187|       |    // filter edges between columns (e.g. block1 | block2)
  188|  2.25M|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (188:21): [True: 2.17M, False: 86.4k]
  ------------------
  189|  2.17M|        if (!have_left && !x) continue;
  ------------------
  |  Branch (189:13): [True: 1.11M, False: 1.05M]
  |  Branch (189:27): [True: 52.5k, False: 1.06M]
  ------------------
  190|  2.11M|        uint32_t hmask[4];
  191|  2.11M|        if (!starty4) {
  ------------------
  |  Branch (191:13): [True: 1.82M, False: 292k]
  ------------------
  192|  1.82M|            hmask[0] = mask[x][0][0];
  193|  1.82M|            hmask[1] = mask[x][1][0];
  194|  1.82M|            hmask[2] = mask[x][2][0];
  195|  1.82M|            if (endy4 > 16) {
  ------------------
  |  Branch (195:17): [True: 1.42M, False: 404k]
  ------------------
  196|  1.42M|                hmask[0] |= (unsigned) mask[x][0][1] << 16;
  197|  1.42M|                hmask[1] |= (unsigned) mask[x][1][1] << 16;
  198|  1.42M|                hmask[2] |= (unsigned) mask[x][2][1] << 16;
  199|  1.42M|            }
  200|  1.82M|        } else {
  201|   292k|            hmask[0] = mask[x][0][1];
  202|   292k|            hmask[1] = mask[x][1][1];
  203|   292k|            hmask[2] = mask[x][2][1];
  204|   292k|        }
  205|  2.11M|        hmask[3] = 0;
  206|  2.11M|        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
  207|  2.11M|                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
  208|  2.11M|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  209|  2.11M|    }
  210|  86.4k|}
lf_apply_tmpl.c:filter_plane_cols_uv:
  251|  29.5k|{
  252|  29.5k|    const Dav1dDSPContext *const dsp = f->dsp;
  253|       |
  254|       |    // filter edges between columns (e.g. block1 | block2)
  255|   579k|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (255:21): [True: 549k, False: 29.5k]
  ------------------
  256|   549k|        if (!have_left && !x) continue;
  ------------------
  |  Branch (256:13): [True: 277k, False: 272k]
  |  Branch (256:27): [True: 17.9k, False: 259k]
  ------------------
  257|   531k|        uint32_t hmask[3];
  258|   531k|        if (!starty4) {
  ------------------
  |  Branch (258:13): [True: 483k, False: 47.9k]
  ------------------
  259|   483k|            hmask[0] = mask[x][0][0];
  260|   483k|            hmask[1] = mask[x][1][0];
  261|   483k|            if (endy4 > (16 >> ss_ver)) {
  ------------------
  |  Branch (261:17): [True: 384k, False: 98.9k]
  ------------------
  262|   384k|                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
  263|   384k|                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
  264|   384k|            }
  265|   483k|        } else {
  266|  47.9k|            hmask[0] = mask[x][0][1];
  267|  47.9k|            hmask[1] = mask[x][1][1];
  268|  47.9k|        }
  269|   531k|        hmask[2] = 0;
  270|   531k|        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
  271|   531k|                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
  272|   531k|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  273|   531k|        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
  274|   531k|                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
  275|   531k|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  276|   531k|    }
  277|  29.5k|}
lf_apply_tmpl.c:filter_plane_rows_y:
  220|  86.4k|{
  221|  86.4k|    const Dav1dDSPContext *const dsp = f->dsp;
  222|       |
  223|       |    //                                 block1
  224|       |    // filter edges between rows (e.g. ------)
  225|       |    //                                 block2
  226|  2.06M|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (226:27): [True: 1.98M, False: 86.4k]
  ------------------
  227|  1.98M|         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|  1.98M|#define PXSTRIDE(x) (x)
  ------------------
  228|  1.98M|    {
  229|  1.98M|        if (!have_top && !y) continue;
  ------------------
  |  Branch (229:13): [True: 345k, False: 1.63M]
  |  Branch (229:26): [True: 22.3k, False: 322k]
  ------------------
  230|  1.96M|        const uint32_t vmask[4] = {
  231|  1.96M|            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
  232|  1.96M|            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
  233|  1.96M|            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
  234|  1.96M|            0,
  235|  1.96M|        };
  236|  1.96M|        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
  237|  1.96M|                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
  238|  1.96M|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  239|  1.96M|    }
  240|  86.4k|}
lf_apply_tmpl.c:filter_plane_rows_uv:
  288|  29.5k|{
  289|  29.5k|    const Dav1dDSPContext *const dsp = f->dsp;
  290|  29.5k|    ptrdiff_t off_l = 0;
  291|       |
  292|       |    //                                 block1
  293|       |    // filter edges between rows (e.g. ------)
  294|       |    //                                 block2
  295|   585k|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (295:27): [True: 555k, False: 29.5k]
  ------------------
  296|   555k|         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|   555k|#define PXSTRIDE(x) (x)
  ------------------
  297|   555k|    {
  298|   555k|        if (!have_top && !y) continue;
  ------------------
  |  Branch (298:13): [True: 133k, False: 422k]
  |  Branch (298:26): [True: 6.67k, False: 126k]
  ------------------
  299|   549k|        const uint32_t vmask[3] = {
  300|   549k|            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
  301|   549k|            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
  302|   549k|            0,
  303|   549k|        };
  304|   549k|        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
  305|   549k|                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
  306|   549k|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  307|   549k|        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
  308|   549k|                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
  309|   549k|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  310|   549k|    }
  311|  29.5k|}
dav1d_copy_lpf_16bpc:
  106|  32.0k|{
  107|  32.0k|    const int have_tt = f->c->n_tc > 1;
  108|  32.0k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|  32.0k|    const int offset = 8 * !!sby;
  110|  32.0k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|  32.0k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|  32.0k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|  32.0k|    pixel *const dst[3] = {
  114|  32.0k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  115|  32.0k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  116|  32.0k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  117|  32.0k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|  32.0k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|  32.0k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 26.5k, False: 5.55k]
  |  Branch (122:29): [True: 4.54k, False: 1.01k]
  ------------------
  123|  31.0k|        const int h = f->cur.p.h;
  124|  31.0k|        const int w = f->bw << 2;
  125|  31.0k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|  31.0k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|  31.0k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 13.5k, False: 17.4k]
  |  Branch (127:46): [True: 16.7k, False: 727]
  ------------------
  128|  30.3k|            backup_lpf(f, dst[0], lr_stride[0],
  129|  30.3k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  130|  30.3k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|  31.0k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 0, False: 31.0k]
  |  Branch (131:24): [True: 0, False: 0]
  ------------------
  132|      0|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  133|      0|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|      0|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  135|      0|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|      0|        }
  137|  31.0k|    }
  138|  32.0k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 26.5k, False: 5.55k]
  |  Branch (138:30): [True: 2.27k, False: 3.28k]
  ------------------
  139|  28.7k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 11.7k, False: 17.0k]
  ------------------
  140|  11.7k|    {
  141|  11.7k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|  11.7k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|  11.7k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|  11.7k|        const int w = f->bw << (2 - ss_hor);
  145|  11.7k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|  11.7k|        const int offset_uv = offset >> ss_ver;
  147|  11.7k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|  11.7k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  149|  11.7k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 9.49k, False: 2.27k]
  |  Branch (149:33): [True: 1.54k, False: 733]
  ------------------
  150|  11.0k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 2.70k, False: 8.34k]
  |  Branch (150:50): [True: 7.42k, False: 920]
  ------------------
  151|  10.1k|                backup_lpf(f, dst[1], lr_stride[1],
  152|  10.1k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  153|  10.1k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|  10.1k|                           row_h, w, h, ss_hor, 1);
  155|  11.0k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 0, False: 11.0k]
  |  Branch (155:28): [True: 0, False: 0]
  ------------------
  156|      0|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|      0|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  158|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|      0|                           row_h, w, h, ss_hor, 0);
  160|  11.0k|        }
  161|  11.7k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 9.49k, False: 2.27k]
  |  Branch (161:33): [True: 1.55k, False: 725]
  ------------------
  162|  11.0k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 3.05k, False: 7.99k]
  |  Branch (162:50): [True: 6.85k, False: 1.14k]
  ------------------
  163|  9.90k|                backup_lpf(f, dst[2], lr_stride[1],
  164|  9.90k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  165|  9.90k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|  9.90k|                           row_h, w, h, ss_hor, 1);
  167|  11.0k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 0, False: 11.0k]
  |  Branch (167:28): [True: 0, False: 0]
  ------------------
  168|      0|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|      0|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  170|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|      0|                           row_h, w, h, ss_hor, 0);
  172|  11.0k|        }
  173|  11.7k|    }
  174|  32.0k|}
dav1d_loopfilter_sbrow_cols_16bpc:
  316|  21.9k|{
  317|  21.9k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|  21.9k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|  21.9k|    const int starty4 = (sby & is_sb64) << 4;
  321|  21.9k|    const int sbsz = 32 >> is_sb64;
  322|  21.9k|    const int sbl2 = 5 - is_sb64;
  323|  21.9k|    const int halign = (f->bh + 31) & ~31;
  324|  21.9k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|  21.9k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|  21.9k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|  21.9k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|  21.9k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|  21.9k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|  21.9k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|  21.9k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|  23.0k|    for (int tile_col = 1;; tile_col++) {
  335|  23.0k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|  23.0k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 21.9k, False: 1.07k]
  ------------------
  337|  1.07k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 483, False: 595]
  ------------------
  338|  1.07k|        x >>= is_sb64;
  339|       |
  340|  1.07k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|  26.1k|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 25.0k, False: 1.07k]
  ------------------
  342|  25.0k|            const int sidx = mask >= 0x10000U;
  343|  25.0k|            const unsigned smask = mask >> (sidx << 4);
  344|  25.0k|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|  25.0k|                                !!(y_hmask[1][sidx] & smask);
  346|  25.0k|            y_hmask[2][sidx] &= ~smask;
  347|  25.0k|            y_hmask[1][sidx] &= ~smask;
  348|  25.0k|            y_hmask[0][sidx] &= ~smask;
  349|  25.0k|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|  25.0k|        }
  351|       |
  352|  1.07k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 387, False: 691]
  ------------------
  353|    387|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  4.18k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 3.79k, False: 387]
  ------------------
  355|  3.79k|                 y++, uv_mask <<= 1)
  356|  3.79k|            {
  357|  3.79k|                const int sidx = uv_mask >= vmax;
  358|  3.79k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  3.79k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  3.79k|                uv_hmask[1][sidx] &= ~smask;
  361|  3.79k|                uv_hmask[0][sidx] &= ~smask;
  362|  3.79k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  3.79k|            }
  364|    387|        }
  365|  1.07k|        lpf_y  += halign;
  366|  1.07k|        lpf_uv += halign >> ss_ver;
  367|  1.07k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|  21.9k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 509, False: 21.4k]
  ------------------
  371|    509|        const BlockContext *a;
  372|    509|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  2.17k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 1.66k, False: 509]
  ------------------
  374|  1.66k|        {
  375|  1.66k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  1.66k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  43.4k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 41.7k, False: 1.66k]
  ------------------
  378|  41.7k|                const int sidx = mask >= 0x10000U;
  379|  41.7k|                const unsigned smask = mask >> (sidx << 4);
  380|  41.7k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  41.7k|                                    !!(y_vmask[1][sidx] & smask);
  382|  41.7k|                y_vmask[2][sidx] &= ~smask;
  383|  41.7k|                y_vmask[1][sidx] &= ~smask;
  384|  41.7k|                y_vmask[0][sidx] &= ~smask;
  385|  41.7k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  41.7k|            }
  387|       |
  388|  1.66k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 1.29k, False: 368]
  ------------------
  389|  1.29k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  1.29k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  31.7k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 30.4k, False: 1.29k]
  ------------------
  392|  30.4k|                    const int sidx = uv_mask >= hmax;
  393|  30.4k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  30.4k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  30.4k|                    uv_vmask[1][sidx] &= ~smask;
  396|  30.4k|                    uv_vmask[0][sidx] &= ~smask;
  397|  30.4k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  30.4k|                }
  399|  1.29k|            }
  400|  1.66k|        }
  401|    509|    }
  402|       |
  403|  21.9k|    pixel *ptr;
  404|  21.9k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|  63.9k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 41.9k, False: 21.9k]
  ------------------
  406|  41.9k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|  41.9k|    {
  408|  41.9k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|  41.9k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|  41.9k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|  41.9k|    }
  412|       |
  413|  21.9k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 18.0k, False: 3.92k]
  |  Branch (413:46): [True: 15.1k, False: 2.91k]
  ------------------
  414|  15.1k|        return;
  415|       |
  416|  6.84k|    ptrdiff_t uv_off;
  417|  6.84k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  18.6k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 11.7k, False: 6.84k]
  ------------------
  419|  11.7k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  11.7k|    {
  421|  11.7k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  11.7k|                             lflvl[x].filter_uv[0],
  423|  11.7k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  11.7k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  11.7k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  11.7k|    }
  427|  6.84k|}
dav1d_loopfilter_sbrow_rows_16bpc:
  432|  21.9k|{
  433|  21.9k|    int x;
  434|       |    // Don't filter outside the frame
  435|  21.9k|    const int have_top = sby > 0;
  436|  21.9k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|  21.9k|    const int starty4 = (sby & is_sb64) << 4;
  438|  21.9k|    const int sbsz = 32 >> is_sb64;
  439|  21.9k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|  21.9k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|  21.9k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|  21.9k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|  21.9k|    pixel *ptr;
  445|  21.9k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|  63.9k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 41.9k, False: 21.9k]
  ------------------
  447|  41.9k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|  41.9k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|  41.9k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|  41.9k|    }
  451|       |
  452|  21.9k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 18.0k, False: 3.92k]
  |  Branch (452:46): [True: 15.1k, False: 2.91k]
  ------------------
  453|  15.1k|        return;
  454|       |
  455|  6.84k|    ptrdiff_t uv_off;
  456|  6.84k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  18.6k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 11.7k, False: 6.84k]
  ------------------
  458|  11.7k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  11.7k|    {
  460|  11.7k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  11.7k|                             lflvl[x].filter_uv[1],
  462|  11.7k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  11.7k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  11.7k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  11.7k|    }
  466|  6.84k|}

dav1d_create_lf_mask_intra:
  271|   447k|{
  272|   447k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  273|   447k|    const int bw4 = imin(iw - bx, b_dim[0]);
  274|   447k|    const int bh4 = imin(ih - by, b_dim[1]);
  275|   447k|    const int bx4 = bx & 31;
  276|   447k|    const int by4 = by & 31;
  277|   447k|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (277:5): [True: 447k, False: 0]
  |  Branch (277:5): [True: 447k, False: 0]
  ------------------
  278|       |
  279|   447k|    if (bw4 && bh4) {
  ------------------
  |  Branch (279:9): [True: 445k, False: 1.31k]
  |  Branch (279:16): [True: 425k, False: 20.4k]
  ------------------
  280|   425k|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  281|  2.47M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (281:25): [True: 2.04M, False: 425k]
  ------------------
  282|  24.6M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (282:29): [True: 22.5M, False: 2.04M]
  ------------------
  283|  22.5M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  284|  22.5M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  285|  22.5M|            }
  286|  2.04M|            level_cache_ptr += b4_stride;
  287|  2.04M|        }
  288|       |
  289|   425k|        mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
  290|   425k|    }
  291|       |
  292|   447k|    if (!auv) return;
  ------------------
  |  Branch (292:9): [True: 178k, False: 268k]
  ------------------
  293|       |
  294|   268k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  295|   268k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  296|   268k|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  297|   268k|                          (b_dim[0] + ss_hor) >> ss_hor);
  298|   268k|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  299|   268k|                          (b_dim[1] + ss_ver) >> ss_ver);
  300|   268k|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (300:5): [True: 268k, False: 0]
  |  Branch (300:5): [True: 268k, False: 0]
  ------------------
  301|       |
  302|   268k|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (302:9): [True: 224, False: 268k]
  |  Branch (302:18): [True: 10.9k, False: 257k]
  ------------------
  303|       |
  304|   257k|    const int cbx4 = bx4 >> ss_hor;
  305|   257k|    const int cby4 = by4 >> ss_ver;
  306|       |
  307|   257k|    uint8_t (*level_cache_ptr)[4] =
  308|   257k|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  309|  1.23M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (309:21): [True: 979k, False: 257k]
  ------------------
  310|  9.54M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (310:25): [True: 8.57M, False: 979k]
  ------------------
  311|  8.57M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  312|  8.57M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  313|  8.57M|        }
  314|   979k|        level_cache_ptr += b4_stride;
  315|   979k|    }
  316|       |
  317|   257k|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
  318|   257k|                      auv, luv, ss_hor, ss_ver);
  319|   257k|}
dav1d_create_lf_mask_inter:
  334|   798k|{
  335|   798k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  336|   798k|    const int bw4 = imin(iw - bx, b_dim[0]);
  337|   798k|    const int bh4 = imin(ih - by, b_dim[1]);
  338|   798k|    const int bx4 = bx & 31;
  339|   798k|    const int by4 = by & 31;
  340|   798k|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (340:5): [True: 798k, False: 0]
  |  Branch (340:5): [True: 798k, False: 0]
  ------------------
  341|       |
  342|   798k|    if (bw4 && bh4) {
  ------------------
  |  Branch (342:9): [True: 795k, False: 2.53k]
  |  Branch (342:16): [True: 792k, False: 3.77k]
  ------------------
  343|   792k|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  344|  5.34M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (344:25): [True: 4.55M, False: 792k]
  ------------------
  345|  57.7M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (345:29): [True: 53.2M, False: 4.55M]
  ------------------
  346|  53.2M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  347|  53.2M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  348|  53.2M|            }
  349|  4.55M|            level_cache_ptr += b4_stride;
  350|  4.55M|        }
  351|       |
  352|   792k|        mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
  353|   792k|                         max_ytx, tx_masks, ay, ly);
  354|   792k|    }
  355|       |
  356|   798k|    if (!auv) return;
  ------------------
  |  Branch (356:9): [True: 539k, False: 258k]
  ------------------
  357|       |
  358|   258k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  359|   258k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  360|   258k|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  361|   258k|                          (b_dim[0] + ss_hor) >> ss_hor);
  362|   258k|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  363|   258k|                          (b_dim[1] + ss_ver) >> ss_ver);
  364|   258k|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (364:5): [True: 258k, False: 0]
  |  Branch (364:5): [True: 258k, False: 0]
  ------------------
  365|       |
  366|   258k|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (366:9): [True: 1.07k, False: 257k]
  |  Branch (366:18): [True: 1.00k, False: 256k]
  ------------------
  367|       |
  368|   256k|    const int cbx4 = bx4 >> ss_hor;
  369|   256k|    const int cby4 = by4 >> ss_ver;
  370|       |
  371|   256k|    uint8_t (*level_cache_ptr)[4] =
  372|   256k|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  373|  1.28M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (373:21): [True: 1.02M, False: 256k]
  ------------------
  374|  10.8M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (374:25): [True: 9.84M, False: 1.02M]
  ------------------
  375|  9.84M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  376|  9.84M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  377|  9.84M|        }
  378|  1.02M|        level_cache_ptr += b4_stride;
  379|  1.02M|    }
  380|       |
  381|   256k|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
  382|   256k|                      auv, luv, ss_hor, ss_ver);
  383|   256k|}
dav1d_calc_eih:
  385|  15.1k|void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
  386|       |    // set E/I/H values from loopfilter level
  387|  15.1k|    const int sharp = filter_sharpness;
  388|   983k|    for (int level = 0; level < 64; level++) {
  ------------------
  |  Branch (388:25): [True: 968k, False: 15.1k]
  ------------------
  389|   968k|        int limit = level;
  390|       |
  391|   968k|        if (sharp > 0) {
  ------------------
  |  Branch (391:13): [True: 492k, False: 475k]
  ------------------
  392|   492k|            limit >>= (sharp + 3) >> 2;
  393|   492k|            limit = imin(limit, 9 - sharp);
  394|   492k|        }
  395|   968k|        limit = imax(limit, 1);
  396|       |
  397|   968k|        lim_lut->i[level] = limit;
  398|   968k|        lim_lut->e[level] = 2 * (level + 2) + limit;
  399|   968k|    }
  400|  15.1k|    lim_lut->sharp[0] = (sharp + 3) >> 2;
  401|  15.1k|    lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
  ------------------
  |  Branch (401:25): [True: 7.69k, False: 7.43k]
  ------------------
  402|  15.1k|}
dav1d_calc_lf_values:
  441|  56.9k|{
  442|  56.9k|    const int n_seg = hdr->segmentation.enabled ? 8 : 1;
  ------------------
  |  Branch (442:23): [True: 10.4k, False: 46.5k]
  ------------------
  443|       |
  444|  56.9k|    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
  ------------------
  |  Branch (444:9): [True: 37.9k, False: 19.0k]
  |  Branch (444:40): [True: 25.8k, False: 12.0k]
  ------------------
  445|  25.8k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
  446|  25.8k|        return;
  447|  25.8k|    }
  448|       |
  449|  31.1k|    const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
  450|  31.1k|        hdr->loopfilter.mode_ref_delta_enabled ?
  ------------------
  |  Branch (450:9): [True: 20.9k, False: 10.1k]
  ------------------
  451|  31.1k|        &hdr->loopfilter.mode_ref_deltas : NULL;
  452|   118k|    for (int s = 0; s < n_seg; s++) {
  ------------------
  |  Branch (452:21): [True: 87.3k, False: 31.1k]
  ------------------
  453|  87.3k|        const Dav1dSegmentationData *const segd =
  454|  87.3k|            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
  ------------------
  |  Branch (454:13): [True: 64.2k, False: 23.0k]
  ------------------
  455|       |
  456|  87.3k|        calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0],
  457|  87.3k|                      lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
  ------------------
  |  Branch (457:36): [True: 64.2k, False: 23.0k]
  ------------------
  458|  87.3k|        calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1],
  459|  87.3k|                      lf_delta[hdr->delta.lf.multi ? 1 : 0],
  ------------------
  |  Branch (459:32): [True: 29.9k, False: 57.3k]
  ------------------
  460|  87.3k|                      segd ? segd->delta_lf_y_h : 0, mr_deltas);
  ------------------
  |  Branch (460:23): [True: 64.2k, False: 23.0k]
  ------------------
  461|  87.3k|        calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u,
  462|  87.3k|                             lf_delta[hdr->delta.lf.multi ? 2 : 0],
  ------------------
  |  Branch (462:39): [True: 29.9k, False: 57.3k]
  ------------------
  463|  87.3k|                             segd ? segd->delta_lf_u : 0, mr_deltas);
  ------------------
  |  Branch (463:30): [True: 64.2k, False: 23.0k]
  ------------------
  464|  87.3k|        calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v,
  465|  87.3k|                             lf_delta[hdr->delta.lf.multi ? 3 : 0],
  ------------------
  |  Branch (465:39): [True: 29.9k, False: 57.3k]
  ------------------
  466|  87.3k|                             segd ? segd->delta_lf_v : 0, mr_deltas);
  ------------------
  |  Branch (466:30): [True: 64.2k, False: 23.0k]
  ------------------
  467|  87.3k|    }
  468|  31.1k|}
lf_mask.c:mask_edges_intra:
  152|   425k|{
  153|   425k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  154|   425k|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  155|   425k|    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
  156|   425k|    int y, x;
  157|       |
  158|       |    // left block edge
  159|   425k|    unsigned mask = 1U << by4;
  160|  2.47M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (160:17): [True: 2.04M, False: 425k]
  ------------------
  161|  2.04M|        const int sidx = mask >= 0x10000;
  162|  2.04M|        const unsigned smask = mask >> (sidx << 4);
  163|  2.04M|        masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask;
  164|  2.04M|    }
  165|       |
  166|       |    // top block edge
  167|  2.85M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (167:35): [True: 2.42M, False: 425k]
  ------------------
  168|  2.42M|        const int sidx = mask >= 0x10000;
  169|  2.42M|        const unsigned smask = mask >> (sidx << 4);
  170|  2.42M|        masks[1][by4][imin(thl4c, a[x])][sidx] |= smask;
  171|  2.42M|    }
  172|       |
  173|       |    // inner (tx) left|right edges
  174|   425k|    const int hstep = t_dim->w;
  175|   425k|    unsigned t = 1U << by4;
  176|   425k|    unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
  177|   425k|    unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
  178|   569k|    for (x = hstep; x < w4; x += hstep) {
  ------------------
  |  Branch (178:21): [True: 143k, False: 425k]
  ------------------
  179|   143k|        if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (179:13): [True: 127k, False: 16.3k]
  ------------------
  180|   143k|        if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (180:13): [True: 37.4k, False: 106k]
  ------------------
  181|   143k|    }
  182|       |
  183|       |    //            top
  184|       |    // inner (tx) --- edges
  185|       |    //           bottom
  186|   425k|    const int vstep = t_dim->h;
  187|   425k|    t = 1U << bx4;
  188|   425k|    inner = (unsigned) ((((uint64_t) t) << w4) - t);
  189|   425k|    inner1 = inner & 0xffff;
  190|   425k|    inner2 = inner >> 16;
  191|   531k|    for (y = vstep; y < h4; y += vstep) {
  ------------------
  |  Branch (191:21): [True: 106k, False: 425k]
  ------------------
  192|   106k|        if (inner1) masks[1][by4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (192:13): [True: 67.1k, False: 38.8k]
  ------------------
  193|   106k|        if (inner2) masks[1][by4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (193:13): [True: 56.0k, False: 49.9k]
  ------------------
  194|   106k|    }
  195|       |
  196|   425k|    dav1d_memset_likely_pow2(a, thl4c, w4);
  197|   425k|    dav1d_memset_likely_pow2(l, twl4c, h4);
  198|   425k|}
lf_mask.c:mask_edges_chroma:
  207|   513k|{
  208|   513k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  209|   513k|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  210|   513k|    const int twl4c = !!twl4, thl4c = !!thl4;
  211|   513k|    int y, x;
  212|   513k|    const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
  213|   513k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  214|   513k|    const unsigned vmax = 1 << vmask, hmax = 1 << hmask;
  215|       |
  216|       |    // left block edge
  217|   513k|    unsigned mask = 1U << cby4;
  218|  2.52M|    for (y = 0; y < ch4; y++, mask <<= 1) {
  ------------------
  |  Branch (218:17): [True: 2.00M, False: 513k]
  ------------------
  219|  2.00M|        const int sidx = mask >= vmax;
  220|  2.00M|        const unsigned smask = mask >> (sidx << vbits);
  221|  2.00M|        masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask;
  222|  2.00M|    }
  223|       |
  224|       |    // top block edge
  225|  2.80M|    for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
  ------------------
  |  Branch (225:36): [True: 2.29M, False: 513k]
  ------------------
  226|  2.29M|        const int sidx = mask >= hmax;
  227|  2.29M|        const unsigned smask = mask >> (sidx << hbits);
  228|  2.29M|        masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask;
  229|  2.29M|    }
  230|       |
  231|   513k|    if (!skip_inter) {
  ------------------
  |  Branch (231:9): [True: 381k, False: 132k]
  ------------------
  232|       |        // inner (tx) left|right edges
  233|   381k|        const int hstep = t_dim->w;
  234|   381k|        unsigned t = 1U << cby4;
  235|   381k|        unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
  236|   381k|        unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
  237|   417k|        for (x = hstep; x < cw4; x += hstep) {
  ------------------
  |  Branch (237:25): [True: 35.7k, False: 381k]
  ------------------
  238|  35.7k|            if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (238:17): [True: 34.2k, False: 1.52k]
  ------------------
  239|  35.7k|            if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (239:17): [True: 17.8k, False: 17.9k]
  ------------------
  240|  35.7k|        }
  241|       |
  242|       |        //            top
  243|       |        // inner (tx) --- edges
  244|       |        //           bottom
  245|   381k|        const int vstep = t_dim->h;
  246|   381k|        t = 1U << cbx4;
  247|   381k|        inner = (unsigned) ((((uint64_t) t) << cw4) - t);
  248|   381k|        inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
  249|   422k|        for (y = vstep; y < ch4; y += vstep) {
  ------------------
  |  Branch (249:25): [True: 40.8k, False: 381k]
  ------------------
  250|  40.8k|            if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (250:17): [True: 32.0k, False: 8.87k]
  ------------------
  251|  40.8k|            if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (251:17): [True: 27.6k, False: 13.2k]
  ------------------
  252|  40.8k|        }
  253|   381k|    }
  254|       |
  255|   513k|    dav1d_memset_likely_pow2(a, thl4c, cw4);
  256|   513k|    dav1d_memset_likely_pow2(l, twl4c, ch4);
  257|   513k|}
lf_mask.c:mask_edges_inter:
   85|   792k|{
   86|   792k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
   87|   792k|    int y, x;
   88|       |
   89|   792k|    ALIGN_STK_16(uint8_t, txa, 2 /* edge */, [2 /* txsz, step */][32 /* y */][32 /* x */]);
  ------------------
  |  |  100|   792k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   792k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
   90|  1.63M|    for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
  ------------------
  |  Branch (90:32): [True: 847k, False: 792k]
  ------------------
   91|  1.93M|        for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
  ------------------
  |  Branch (91:36): [True: 1.09M, False: 847k]
  ------------------
   92|  1.09M|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
   93|  1.09M|                      max_tx, 0, y_off, x_off, tx_masks);
   94|       |
   95|       |    // left block edge
   96|   792k|    unsigned mask = 1U << by4;
   97|  5.34M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (97:17): [True: 4.55M, False: 792k]
  ------------------
   98|  4.55M|        const int sidx = mask >= 0x10000;
   99|  4.55M|        const unsigned smask = mask >> (sidx << 4);
  100|  4.55M|        masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask;
  101|  4.55M|    }
  102|       |
  103|       |    // top block edge
  104|  4.94M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (104:35): [True: 4.15M, False: 792k]
  ------------------
  105|  4.15M|        const int sidx = mask >= 0x10000;
  106|  4.15M|        const unsigned smask = mask >> (sidx << 4);
  107|  4.15M|        masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask;
  108|  4.15M|    }
  109|       |
  110|   792k|    if (!skip) {
  ------------------
  |  Branch (110:9): [True: 244k, False: 547k]
  ------------------
  111|       |        // inner (tx) left|right edges
  112|  1.20M|        for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (112:39): [True: 963k, False: 244k]
  ------------------
  113|   963k|            const int sidx = mask >= 0x10000U;
  114|   963k|            const unsigned smask = mask >> (sidx << 4);
  115|   963k|            int ltx = txa[0][0][y][0];
  116|   963k|            int step = txa[0][1][y][0];
  117|  1.27M|            for (x = step; x < w4; x += step) {
  ------------------
  |  Branch (117:28): [True: 314k, False: 963k]
  ------------------
  118|   314k|                const int rtx = txa[0][0][y][x];
  119|   314k|                masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask;
  120|   314k|                ltx = rtx;
  121|   314k|                step = txa[0][1][y][x];
  122|   314k|            }
  123|   963k|        }
  124|       |
  125|       |        //            top
  126|       |        // inner (tx) --- edges
  127|       |        //           bottom
  128|  1.27M|        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (128:39): [True: 1.02M, False: 244k]
  ------------------
  129|  1.02M|            const int sidx = mask >= 0x10000U;
  130|  1.02M|            const unsigned smask = mask >> (sidx << 4);
  131|  1.02M|            int ttx = txa[1][0][0][x];
  132|  1.02M|            int step = txa[1][1][0][x];
  133|  1.33M|            for (y = step; y < h4; y += step) {
  ------------------
  |  Branch (133:28): [True: 307k, False: 1.02M]
  ------------------
  134|   307k|                const int btx = txa[1][0][y][x];
  135|   307k|                masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask;
  136|   307k|                ttx = btx;
  137|   307k|                step = txa[1][1][y][x];
  138|   307k|            }
  139|  1.02M|        }
  140|   244k|    }
  141|       |
  142|  5.34M|    for (y = 0; y < h4; y++)
  ------------------
  |  Branch (142:17): [True: 4.55M, False: 792k]
  ------------------
  143|  4.55M|        l[y] = txa[0][0][y][w4 - 1];
  144|   792k|    memcpy(a, txa[1][0][h4 - 1], w4);
  145|   792k|}
lf_mask.c:decomp_tx:
   44|  1.26M|{
   45|  1.26M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
   46|  1.26M|    const int is_split = (from == (int) TX_4X4 || depth > 1) ? 0 :
  ------------------
  |  Branch (46:27): [True: 325k, False: 943k]
  |  Branch (46:51): [True: 45.9k, False: 897k]
  ------------------
   47|  1.26M|        (tx_masks[depth] >> (y_off * 4 + x_off)) & 1;
   48|       |
   49|  1.26M|    if (is_split) {
  ------------------
  |  Branch (49:9): [True: 58.5k, False: 1.20M]
  ------------------
   50|  58.5k|        const enum RectTxfmSize sub = t_dim->sub;
   51|  58.5k|        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;
   52|       |
   53|  58.5k|        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
   54|  58.5k|        if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (54:13): [True: 46.5k, False: 11.9k]
  ------------------
   55|  46.5k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
   56|  46.5k|                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
   57|  58.5k|        if (t_dim->h >= t_dim->w) {
  ------------------
  |  Branch (57:13): [True: 42.0k, False: 16.4k]
  ------------------
   58|  42.0k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
   59|  42.0k|                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
   60|  42.0k|            if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (60:17): [True: 30.1k, False: 11.9k]
  ------------------
   61|  30.1k|                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
   62|  30.1k|                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
   63|  42.0k|        }
   64|  1.20M|    } else {
   65|  1.20M|        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);
   66|       |
   67|  1.20M|#define set_ctx(rep_macro) \
   68|  1.20M|        for (int y = 0; y < t_dim->h; y++) { \
   69|  1.20M|            rep_macro(txa[0][0][y], 0, lw); \
   70|  1.20M|            rep_macro(txa[1][0][y], 0, lh); \
   71|  1.20M|            txa[0][1][y][0] = t_dim->w; \
   72|  1.20M|        }
   73|  1.20M|        case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  1.20M|    switch (var) { \
  |  |   81|   400k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |   68|   940k|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 540k, False: 400k]
  |  |  |  |  ------------------
  |  |  |  |   69|   540k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   540k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   540k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   540k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   540k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   540k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   540k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   540k|        }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 400k, False: 809k]
  |  |  ------------------
  |  |   82|   297k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |   68|  1.10M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 807k, False: 297k]
  |  |  |  |  ------------------
  |  |  |  |   69|   807k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   807k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   807k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   807k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   807k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   807k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   807k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   807k|        }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 297k, False: 911k]
  |  |  ------------------
  |  |   83|   251k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |   68|  1.32M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 1.07M, False: 251k]
  |  |  |  |  ------------------
  |  |  |  |   69|  1.07M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|  1.07M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.07M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  1.07M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|  1.07M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.07M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  1.07M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  1.07M|        }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 251k, False: 957k]
  |  |  ------------------
  |  |   84|  74.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |   68|   532k|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 458k, False: 74.1k]
  |  |  |  |  ------------------
  |  |  |  |   69|   458k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   458k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   458k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   458k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   458k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   458k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   458k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   458k|        }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 74.1k, False: 1.13M]
  |  |  ------------------
  |  |   85|   185k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |   68|  3.07M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 2.88M, False: 185k]
  |  |  |  |  ------------------
  |  |  |  |   69|  2.88M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  2.88M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  2.88M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  2.88M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  2.88M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 2.88M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  2.88M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  2.88M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  2.88M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  2.88M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  2.88M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 2.88M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  2.88M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  2.88M|        }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 185k, False: 1.02M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 1.20M]
  |  |  ------------------
  |  |   87|  1.20M|    }
  ------------------
  |  Branch (73:9): [Folded, False: 0]
  ------------------
   74|  1.20M|#undef set_ctx
   75|  1.20M|        dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h);
   76|  1.20M|    }
   77|  1.26M|}
lf_mask.c:calc_lf_value:
  408|   233k|{
  409|   233k|    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
  410|       |
  411|   233k|    if (!mr_delta) {
  ------------------
  |  Branch (411:9): [True: 83.4k, False: 150k]
  ------------------
  412|  83.4k|        memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
  413|   150k|    } else {
  414|   150k|        const int sh = base >= 32;
  415|   150k|        lflvl_values[0][0] = lflvl_values[0][1] =
  416|   150k|            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
  417|  1.20M|        for (int r = 1; r < 8; r++) {
  ------------------
  |  Branch (417:25): [True: 1.05M, False: 150k]
  ------------------
  418|  3.16M|            for (int m = 0; m < 2; m++) {
  ------------------
  |  Branch (418:29): [True: 2.10M, False: 1.05M]
  ------------------
  419|  2.10M|                const int delta =
  420|  2.10M|                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
  421|  2.10M|                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
  422|  2.10M|            }
  423|  1.05M|        }
  424|   150k|    }
  425|   233k|}
lf_mask.c:calc_lf_value_chroma:
  431|   174k|{
  432|   174k|    if (!base_lvl)
  ------------------
  |  Branch (432:9): [True: 115k, False: 59.3k]
  ------------------
  433|   115k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
  434|  59.3k|    else
  435|  59.3k|        calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
  436|   174k|}

dav1d_version:
   61|  9.52k|COLD const char *dav1d_version(void) {
   62|  9.52k|    return DAV1D_VERSION;
  ------------------
  |  |    2|  9.52k|#define DAV1D_VERSION "1718ff9"
  ------------------
   63|  9.52k|}
dav1d_default_settings:
   71|  9.51k|COLD void dav1d_default_settings(Dav1dSettings *const s) {
   72|  9.51k|    s->n_threads = 0;
   73|  9.51k|    s->max_frame_delay = 0;
   74|  9.51k|    s->apply_grain = 1;
   75|  9.51k|    s->allocator.cookie = NULL;
   76|  9.51k|    s->allocator.alloc_picture_callback = dav1d_default_picture_alloc;
   77|  9.51k|    s->allocator.release_picture_callback = dav1d_default_picture_release;
   78|  9.51k|    s->logger.cookie = NULL;
   79|       |    s->logger.callback = dav1d_log_default_callback;
  ------------------
  |  |   43|  9.51k|#define dav1d_log_default_callback NULL
  ------------------
   80|  9.51k|    s->operating_point = 0;
   81|  9.51k|    s->all_layers = 1; // just until the tests are adjusted
   82|  9.51k|    s->frame_size_limit = 0;
   83|  9.51k|    s->strict_std_compliance = 0;
   84|  9.51k|    s->output_invisible_frames = 0;
   85|  9.51k|    s->inloop_filters = DAV1D_INLOOPFILTER_ALL;
   86|  9.51k|    s->decode_frame_type = DAV1D_DECODEFRAMETYPE_ALL;
   87|  9.51k|}
dav1d_open:
  140|  9.51k|COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
  141|  9.51k|    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  142|  9.51k|    pthread_once(&initted, init_internal);
  143|       |
  144|  9.51k|    validate_input_or_ret(c_out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  9.51k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  145|  9.51k|    validate_input_or_ret(s != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  9.51k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  146|  9.51k|    validate_input_or_ret(s->n_threads >= 0 &&
  ------------------
  |  |   52|  19.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  147|  9.51k|                          s->n_threads <= DAV1D_MAX_THREADS, DAV1D_ERR(EINVAL));
  148|  9.51k|    validate_input_or_ret(s->max_frame_delay >= 0 &&
  ------------------
  |  |   52|  19.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  149|  9.51k|                          s->max_frame_delay <= DAV1D_MAX_FRAME_DELAY, DAV1D_ERR(EINVAL));
  150|  9.51k|    validate_input_or_ret(s->allocator.alloc_picture_callback != NULL,
  ------------------
  |  |   52|  9.51k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  151|  9.51k|                          DAV1D_ERR(EINVAL));
  152|  9.51k|    validate_input_or_ret(s->allocator.release_picture_callback != NULL,
  ------------------
  |  |   52|  9.51k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  153|  9.51k|                          DAV1D_ERR(EINVAL));
  154|  9.51k|    validate_input_or_ret(s->operating_point >= 0 &&
  ------------------
  |  |   52|  19.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  155|  9.51k|                          s->operating_point <= 31, DAV1D_ERR(EINVAL));
  156|  9.51k|    validate_input_or_ret(s->decode_frame_type >= DAV1D_DECODEFRAMETYPE_ALL &&
  ------------------
  |  |   52|  19.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  |  Branch (52:11): [True: 9.51k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  157|  9.51k|                          s->decode_frame_type <= DAV1D_DECODEFRAMETYPE_KEY, DAV1D_ERR(EINVAL));
  158|       |
  159|  9.51k|    pthread_attr_t thread_attr;
  160|  9.51k|    if (pthread_attr_init(&thread_attr)) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (160:9): [True: 0, False: 9.51k]
  ------------------
  161|  9.51k|    size_t stack_size = 1024 * 1024 + get_stack_size_internal(&thread_attr);
  162|       |
  163|  9.51k|    pthread_attr_setstacksize(&thread_attr, stack_size);
  164|       |
  165|  9.51k|    Dav1dContext *const c = *c_out = dav1d_alloc_aligned(ALLOC_COMMON_CTX, sizeof(*c), 64);
  ------------------
  |  |  134|  9.51k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  166|  9.51k|    if (!c) goto error;
  ------------------
  |  Branch (166:9): [True: 0, False: 9.51k]
  ------------------
  167|  9.51k|    memset(c, 0, sizeof(*c));
  168|       |
  169|  9.51k|    c->allocator = s->allocator;
  170|  9.51k|    c->logger = s->logger;
  171|  9.51k|    c->apply_grain = s->apply_grain;
  172|  9.51k|    c->operating_point = s->operating_point;
  173|  9.51k|    c->all_layers = s->all_layers;
  174|  9.51k|    c->frame_size_limit = s->frame_size_limit;
  175|  9.51k|    c->strict_std_compliance = s->strict_std_compliance;
  176|  9.51k|    c->output_invisible_frames = s->output_invisible_frames;
  177|  9.51k|    c->inloop_filters = s->inloop_filters;
  178|  9.51k|    c->decode_frame_type = s->decode_frame_type;
  179|       |
  180|  9.51k|    dav1d_data_props_set_defaults(&c->cached_error_props);
  181|       |
  182|  9.51k|    if (dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->seq_hdr_pool) ||
  ------------------
  |  |  131|  19.0k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  183|  9.51k|        dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->frame_hdr_pool) ||
  ------------------
  |  |  131|  19.0k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  184|  9.51k|        dav1d_mem_pool_init(ALLOC_SEGMAP, &c->segmap_pool) ||
  ------------------
  |  |  131|  19.0k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  185|  9.51k|        dav1d_mem_pool_init(ALLOC_REFMVS, &c->refmvs_pool) ||
  ------------------
  |  |  131|  19.0k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  186|  9.51k|        dav1d_mem_pool_init(ALLOC_PIC_CTX, &c->pic_ctx_pool) ||
  ------------------
  |  |  131|  19.0k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  187|  9.51k|        dav1d_mem_pool_init(ALLOC_CDF, &c->cdf_pool))
  ------------------
  |  |  131|  9.51k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  188|      0|    {
  189|      0|        goto error;
  190|      0|    }
  191|       |
  192|  9.51k|    if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc &&
  ------------------
  |  Branch (192:9): [True: 9.51k, False: 0]
  ------------------
  193|  9.51k|        c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (193:9): [True: 9.51k, False: 0]
  ------------------
  194|  9.51k|    {
  195|  9.51k|        if (c->allocator.cookie) goto error;
  ------------------
  |  Branch (195:13): [True: 0, False: 9.51k]
  ------------------
  196|  9.51k|        if (dav1d_mem_pool_init(ALLOC_PIC, &c->picture_pool)) goto error;
  ------------------
  |  |  131|  9.51k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.51k]
  |  |  ------------------
  ------------------
  197|  9.51k|        c->allocator.cookie = c->picture_pool;
  198|  9.51k|    } else if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc ||
  ------------------
  |  Branch (198:16): [True: 0, False: 0]
  ------------------
  199|      0|               c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (199:16): [True: 0, False: 0]
  ------------------
  200|      0|    {
  201|      0|        goto error;
  202|      0|    }
  203|       |
  204|       |    /* On 32-bit systems extremely large frame sizes can cause overflows in
  205|       |     * dav1d_decode_frame() malloc size calculations. Prevent that from occuring
  206|       |     * by enforcing a maximum frame size limit, chosen to roughly correspond to
  207|       |     * the largest size possible to decode without exhausting virtual memory. */
  208|  9.51k|    if (sizeof(size_t) < 8 && s->frame_size_limit - 1 >= 8192 * 8192) {
  ------------------
  |  Branch (208:9): [Folded, False: 9.51k]
  |  Branch (208:31): [True: 0, False: 0]
  ------------------
  209|      0|        c->frame_size_limit = 8192 * 8192;
  210|      0|        if (s->frame_size_limit)
  ------------------
  |  Branch (210:13): [True: 0, False: 0]
  ------------------
  211|      0|            dav1d_log(c, "Frame size limit reduced from %u to %u.\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  212|      0|                      s->frame_size_limit, c->frame_size_limit);
  213|      0|    }
  214|       |
  215|  9.51k|    c->flush = &c->flush_mem;
  216|  9.51k|    atomic_init(c->flush, 0);
  217|       |
  218|  9.51k|    get_num_threads(c, s, &c->n_tc, &c->n_fc);
  219|       |
  220|  9.51k|    c->fc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->fc) * c->n_fc, 32);
  ------------------
  |  |  134|  9.51k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  221|  9.51k|    if (!c->fc) goto error;
  ------------------
  |  Branch (221:9): [True: 0, False: 9.51k]
  ------------------
  222|  9.51k|    memset(c->fc, 0, sizeof(*c->fc) * c->n_fc);
  223|       |
  224|  9.51k|    c->tc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->tc) * c->n_tc, 64);
  ------------------
  |  |  134|  9.51k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  225|  9.51k|    if (!c->tc) goto error;
  ------------------
  |  Branch (225:9): [True: 0, False: 9.51k]
  ------------------
  226|  9.51k|    memset(c->tc, 0, sizeof(*c->tc) * c->n_tc);
  227|  9.51k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (227:9): [True: 0, False: 9.51k]
  ------------------
  228|      0|        if (pthread_mutex_init(&c->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (228:13): [True: 0, False: 0]
  ------------------
  229|      0|        if (pthread_cond_init(&c->task_thread.cond, NULL)) {
  ------------------
  |  Branch (229:13): [True: 0, False: 0]
  ------------------
  230|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  231|      0|            goto error;
  232|      0|        }
  233|      0|        if (pthread_cond_init(&c->task_thread.delayed_fg.cond, NULL)) {
  ------------------
  |  Branch (233:13): [True: 0, False: 0]
  ------------------
  234|      0|            pthread_cond_destroy(&c->task_thread.cond);
  235|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  236|      0|            goto error;
  237|      0|        }
  238|      0|        c->task_thread.cur = c->n_fc;
  239|      0|        atomic_init(&c->task_thread.reset_task_cur, UINT_MAX);
  240|      0|        atomic_init(&c->task_thread.cond_signaled, 0);
  241|      0|        c->task_thread.inited = 1;
  242|      0|    }
  243|       |
  244|  9.51k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (244:9): [True: 0, False: 9.51k]
  ------------------
  245|      0|        const size_t out_delayed_sz = sizeof(*c->frame_thread.out_delayed) * c->n_fc;
  246|      0|        c->frame_thread.out_delayed =
  247|      0|            dav1d_malloc(ALLOC_THREAD_CTX, out_delayed_sz);
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  248|      0|        if (!c->frame_thread.out_delayed) goto error;
  ------------------
  |  Branch (248:13): [True: 0, False: 0]
  ------------------
  249|      0|        memset(c->frame_thread.out_delayed, 0, out_delayed_sz);
  250|      0|    }
  251|  19.0k|    for (unsigned n = 0; n < c->n_fc; n++) {
  ------------------
  |  Branch (251:26): [True: 9.51k, False: 9.51k]
  ------------------
  252|  9.51k|        Dav1dFrameContext *const f = &c->fc[n];
  253|  9.51k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (253:13): [True: 0, False: 9.51k]
  ------------------
  254|      0|            if (pthread_mutex_init(&f->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (254:17): [True: 0, False: 0]
  ------------------
  255|      0|            if (pthread_cond_init(&f->task_thread.cond, NULL)) {
  ------------------
  |  Branch (255:17): [True: 0, False: 0]
  ------------------
  256|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  257|      0|                goto error;
  258|      0|            }
  259|      0|            if (pthread_mutex_init(&f->task_thread.pending_tasks.lock, NULL)) {
  ------------------
  |  Branch (259:17): [True: 0, False: 0]
  ------------------
  260|      0|                pthread_cond_destroy(&f->task_thread.cond);
  261|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  262|      0|                goto error;
  263|      0|            }
  264|      0|        }
  265|  9.51k|        f->c = c;
  266|  9.51k|        f->task_thread.ttd = &c->task_thread;
  267|  9.51k|        f->lf.last_sharpness = -1;
  268|  9.51k|    }
  269|       |
  270|  19.0k|    for (unsigned m = 0; m < c->n_tc; m++) {
  ------------------
  |  Branch (270:26): [True: 9.51k, False: 9.51k]
  ------------------
  271|  9.51k|        Dav1dTaskContext *const t = &c->tc[m];
  272|  9.51k|        t->f = &c->fc[0];
  273|  9.51k|        t->task_thread.ttd = &c->task_thread;
  274|  9.51k|        t->c = c;
  275|  9.51k|        memset(t->cf_16bpc, 0, sizeof(t->cf_16bpc));
  276|  9.51k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (276:13): [True: 0, False: 9.51k]
  ------------------
  277|      0|            if (pthread_mutex_init(&t->task_thread.td.lock, NULL)) goto error;
  ------------------
  |  Branch (277:17): [True: 0, False: 0]
  ------------------
  278|      0|            if (pthread_cond_init(&t->task_thread.td.cond, NULL)) {
  ------------------
  |  Branch (278:17): [True: 0, False: 0]
  ------------------
  279|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  280|      0|                goto error;
  281|      0|            }
  282|      0|            if (pthread_create(&t->task_thread.td.thread, &thread_attr, dav1d_worker_task, t)) {
  ------------------
  |  Branch (282:17): [True: 0, False: 0]
  ------------------
  283|      0|                pthread_cond_destroy(&t->task_thread.td.cond);
  284|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  285|      0|                goto error;
  286|      0|            }
  287|      0|            t->task_thread.td.inited = 1;
  288|      0|        }
  289|  9.51k|    }
  290|  9.51k|    dav1d_pal_dsp_init(&c->pal_dsp);
  291|  9.51k|    dav1d_refmvs_dsp_init(&c->refmvs_dsp);
  292|       |
  293|  9.51k|    pthread_attr_destroy(&thread_attr);
  294|       |
  295|  9.51k|    return 0;
  296|       |
  297|      0|error:
  298|      0|    if (c) close_internal(c_out, 0);
  ------------------
  |  Branch (298:9): [True: 0, False: 0]
  ------------------
  299|      0|    pthread_attr_destroy(&thread_attr);
  300|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  301|  9.51k|}
dav1d_send_data:
  439|  75.6k|{
  440|  75.6k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  75.6k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 75.6k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  441|  75.6k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  75.6k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 75.6k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  442|       |
  443|  75.6k|    if (in->data) {
  ------------------
  |  Branch (443:9): [True: 75.6k, False: 0]
  ------------------
  444|  75.6k|        validate_input_or_ret(in->sz > 0 && in->sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   151k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 75.6k, False: 0]
  |  |  |  Branch (52:11): [True: 75.6k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  445|  75.6k|        c->drain = 0;
  446|  75.6k|    }
  447|  75.6k|    if (c->in.data)
  ------------------
  |  Branch (447:9): [True: 3.87k, False: 71.8k]
  ------------------
  448|  3.87k|        return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  3.87k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  449|  71.8k|    dav1d_data_ref(&c->in, in);
  450|       |
  451|  71.8k|    int res = gen_picture(c);
  452|  71.8k|    if (!res)
  ------------------
  |  Branch (452:9): [True: 22.5k, False: 49.2k]
  ------------------
  453|  22.5k|        dav1d_data_unref_internal(in);
  454|       |
  455|  71.8k|    return res;
  456|  75.6k|}
dav1d_get_picture:
  459|  37.7k|{
  460|  37.7k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  37.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 37.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  461|  37.7k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  37.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 37.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  462|       |
  463|  37.7k|    const int drain = c->drain;
  464|  37.7k|    c->drain = 1;
  465|       |
  466|  37.7k|    int res = gen_picture(c);
  467|  37.7k|    if (res < 0)
  ------------------
  |  Branch (467:9): [True: 1.75k, False: 36.0k]
  ------------------
  468|  1.75k|        return res;
  469|       |
  470|  36.0k|    if (c->cached_error) {
  ------------------
  |  Branch (470:9): [True: 0, False: 36.0k]
  ------------------
  471|      0|        const int res = c->cached_error;
  472|      0|        c->cached_error = 0;
  473|      0|        return res;
  474|      0|    }
  475|       |
  476|  36.0k|    if (output_picture_ready(c, c->n_fc == 1))
  ------------------
  |  Branch (476:9): [True: 21.5k, False: 14.4k]
  ------------------
  477|  21.5k|        return output_image(c, out);
  478|       |
  479|  14.4k|    if (c->n_fc > 1 && drain)
  ------------------
  |  Branch (479:9): [True: 0, False: 14.4k]
  |  Branch (479:24): [True: 0, False: 0]
  ------------------
  480|      0|        return drain_picture(c, out);
  481|       |
  482|  14.4k|    return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  14.4k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  483|  14.4k|}
dav1d_apply_grain:
  487|  4.61k|{
  488|  4.61k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  4.61k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 4.61k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  489|  4.61k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  4.61k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 4.61k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  490|  4.61k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  4.61k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 4.61k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  491|       |
  492|  4.61k|    if (!has_grain(in)) {
  ------------------
  |  Branch (492:9): [True: 0, False: 4.61k]
  ------------------
  493|      0|        dav1d_picture_ref(out, in);
  494|      0|        return 0;
  495|      0|    }
  496|       |
  497|  4.61k|    int res = dav1d_picture_alloc_copy(c, out, in->p.w, in);
  498|  4.61k|    if (res < 0) goto error;
  ------------------
  |  Branch (498:9): [True: 0, False: 4.61k]
  ------------------
  499|       |
  500|  4.61k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (500:9): [True: 0, False: 4.61k]
  ------------------
  501|      0|        dav1d_task_delayed_fg(c, out, in);
  502|  4.61k|    } else {
  503|  4.61k|        switch (out->p.bpc) {
  504|      0|#if CONFIG_8BPC
  505|  1.56k|        case 8:
  ------------------
  |  Branch (505:9): [True: 1.56k, False: 3.05k]
  ------------------
  506|  1.56k|            dav1d_apply_grain_8bpc(&c->dsp[0].fg, out, in);
  507|  1.56k|            break;
  508|      0|#endif
  509|      0|#if CONFIG_16BPC
  510|  2.49k|        case 10:
  ------------------
  |  Branch (510:9): [True: 2.49k, False: 2.12k]
  ------------------
  511|  3.05k|        case 12:
  ------------------
  |  Branch (511:9): [True: 562, False: 4.05k]
  ------------------
  512|  3.05k|            dav1d_apply_grain_16bpc(&c->dsp[(out->p.bpc >> 1) - 4].fg, out, in);
  513|  3.05k|            break;
  514|      0|#endif
  515|      0|        default: abort();
  ------------------
  |  Branch (515:9): [True: 0, False: 4.61k]
  ------------------
  516|  4.61k|        }
  517|  4.61k|    }
  518|       |
  519|  4.61k|    return 0;
  520|       |
  521|      0|error:
  522|      0|    dav1d_picture_unref_internal(out);
  523|      0|    return res;
  524|  4.61k|}
dav1d_flush:
  526|  9.51k|void dav1d_flush(Dav1dContext *const c) {
  527|  9.51k|    dav1d_data_unref_internal(&c->in);
  528|  9.51k|    if (c->out.p.frame_hdr)
  ------------------
  |  Branch (528:9): [True: 0, False: 9.51k]
  ------------------
  529|      0|        dav1d_thread_picture_unref(&c->out);
  530|  9.51k|    if (c->cache.p.frame_hdr)
  ------------------
  |  Branch (530:9): [True: 0, False: 9.51k]
  ------------------
  531|      0|        dav1d_thread_picture_unref(&c->cache);
  532|       |
  533|  9.51k|    c->drain = 0;
  534|  9.51k|    c->cached_error = 0;
  535|       |
  536|  85.6k|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (536:21): [True: 76.1k, False: 9.51k]
  ------------------
  537|  76.1k|        if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (537:13): [True: 21.3k, False: 54.7k]
  ------------------
  538|  21.3k|            dav1d_thread_picture_unref(&c->refs[i].p);
  539|  76.1k|        dav1d_ref_dec(&c->refs[i].segmap);
  540|  76.1k|        dav1d_ref_dec(&c->refs[i].refmvs);
  541|  76.1k|        dav1d_cdf_thread_unref(&c->cdf[i]);
  542|  76.1k|    }
  543|  9.51k|    c->frame_hdr = NULL;
  544|  9.51k|    c->seq_hdr = NULL;
  545|  9.51k|    dav1d_ref_dec(&c->seq_hdr_ref);
  546|       |
  547|  9.51k|    c->mastering_display = NULL;
  548|  9.51k|    c->content_light = NULL;
  549|  9.51k|    c->itut_t35 = NULL;
  550|  9.51k|    c->n_itut_t35 = 0;
  551|  9.51k|    dav1d_ref_dec(&c->mastering_display_ref);
  552|  9.51k|    dav1d_ref_dec(&c->content_light_ref);
  553|  9.51k|    dav1d_ref_dec(&c->itut_t35_ref);
  554|       |
  555|  9.51k|    dav1d_data_props_unref_internal(&c->cached_error_props);
  556|       |
  557|  9.51k|    if (c->n_fc == 1 && c->n_tc == 1) return;
  ------------------
  |  Branch (557:9): [True: 9.51k, False: 0]
  |  Branch (557:25): [True: 9.51k, False: 0]
  ------------------
  558|  9.51k|    atomic_store(c->flush, 1);
  559|       |
  560|      0|    if (c->n_tc > 1) {
  ------------------
  |  Branch (560:9): [True: 0, False: 0]
  ------------------
  561|      0|        pthread_mutex_lock(&c->task_thread.lock);
  562|       |        // stop running tasks in worker threads
  563|      0|        for (unsigned i = 0; i < c->n_tc; i++) {
  ------------------
  |  Branch (563:30): [True: 0, False: 0]
  ------------------
  564|      0|            Dav1dTaskContext *const tc = &c->tc[i];
  565|      0|            while (!tc->task_thread.flushed) {
  ------------------
  |  Branch (565:20): [True: 0, False: 0]
  ------------------
  566|      0|                pthread_cond_wait(&tc->task_thread.td.cond, &c->task_thread.lock);
  567|      0|            }
  568|      0|        }
  569|      0|        for (unsigned i = 0; i < c->n_fc; i++) {
  ------------------
  |  Branch (569:30): [True: 0, False: 0]
  ------------------
  570|      0|            c->fc[i].task_thread.task_head = NULL;
  571|      0|            c->fc[i].task_thread.task_tail = NULL;
  572|      0|            c->fc[i].task_thread.task_cur_prev = NULL;
  573|      0|            c->fc[i].task_thread.pending_tasks.head = NULL;
  574|      0|            c->fc[i].task_thread.pending_tasks.tail = NULL;
  575|      0|            atomic_init(&c->fc[i].task_thread.pending_tasks.merge, 0);
  576|      0|        }
  577|      0|        atomic_init(&c->task_thread.first, 0);
  578|      0|        c->task_thread.cur = c->n_fc;
  579|      0|        atomic_store(&c->task_thread.reset_task_cur, UINT_MAX);
  580|      0|        atomic_store(&c->task_thread.cond_signaled, 0);
  581|      0|        pthread_mutex_unlock(&c->task_thread.lock);
  582|      0|    }
  583|       |
  584|      0|    if (c->n_fc > 1) {
  ------------------
  |  Branch (584:9): [True: 0, False: 0]
  ------------------
  585|      0|        for (unsigned n = 0, next = c->frame_thread.next; n < c->n_fc; n++, next++) {
  ------------------
  |  Branch (585:59): [True: 0, False: 0]
  ------------------
  586|      0|            if (next == c->n_fc) next = 0;
  ------------------
  |  Branch (586:17): [True: 0, False: 0]
  ------------------
  587|      0|            Dav1dFrameContext *const f = &c->fc[next];
  588|      0|            dav1d_decode_frame_exit(f, -1);
  589|      0|            f->n_tile_data = 0;
  590|      0|            f->task_thread.retval = 0;
  591|      0|            f->task_thread.error = 0;
  592|      0|            Dav1dThreadPicture *out_delayed = &c->frame_thread.out_delayed[next];
  593|      0|            if (out_delayed->p.frame_hdr) {
  ------------------
  |  Branch (593:17): [True: 0, False: 0]
  ------------------
  594|      0|                dav1d_thread_picture_unref(out_delayed);
  595|      0|            }
  596|      0|        }
  597|      0|        c->frame_thread.next = 0;
  598|      0|    }
  599|       |    atomic_store(c->flush, 0);
  600|      0|}
dav1d_close:
  602|  9.51k|COLD void dav1d_close(Dav1dContext **const c_out) {
  603|  9.51k|    validate_input(c_out != NULL);
  ------------------
  |  |   59|  9.51k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  9.51k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 9.51k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  604|       |#if TRACK_HEAP_ALLOCATIONS
  605|       |    dav1d_log_alloc_stats(*c_out);
  606|       |#endif
  607|  9.51k|    close_internal(c_out, 1);
  608|  9.51k|}
dav1d_picture_unref:
  727|  21.5k|void dav1d_picture_unref(Dav1dPicture *const p) {
  728|  21.5k|    dav1d_picture_unref_internal(p);
  729|  21.5k|}
dav1d_data_create:
  731|  71.9k|uint8_t *dav1d_data_create(Dav1dData *const buf, const size_t sz) {
  732|  71.9k|    return dav1d_data_create_internal(buf, sz);
  733|  71.9k|}
dav1d_data_unref:
  756|  49.3k|void dav1d_data_unref(Dav1dData *const buf) {
  757|  49.3k|    dav1d_data_unref_internal(buf);
  758|  49.3k|}
lib.c:get_num_threads:
  111|  9.51k|{
  112|       |    /* ceil(sqrt(n)) */
  113|  9.51k|    static const uint8_t fc_lut[49] = {
  114|  9.51k|        1,                                     /*     1 */
  115|  9.51k|        2, 2, 2,                               /*  2- 4 */
  116|  9.51k|        3, 3, 3, 3, 3,                         /*  5- 9 */
  117|  9.51k|        4, 4, 4, 4, 4, 4, 4,                   /* 10-16 */
  118|  9.51k|        5, 5, 5, 5, 5, 5, 5, 5, 5,             /* 17-25 */
  119|  9.51k|        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,       /* 26-36 */
  120|  9.51k|        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* 37-49 */
  121|  9.51k|    };
  122|  9.51k|    *n_tc = s->n_threads ? s->n_threads :
  ------------------
  |  Branch (122:13): [True: 9.51k, False: 0]
  ------------------
  123|  9.51k|        iclip(dav1d_num_logical_processors(c), 1, DAV1D_MAX_THREADS);
  ------------------
  |  |   46|      0|#define DAV1D_MAX_THREADS 256
  ------------------
  124|  9.51k|    *n_fc = s->max_frame_delay ? umin(s->max_frame_delay, *n_tc) :
  ------------------
  |  Branch (124:13): [True: 9.51k, False: 0]
  ------------------
  125|  9.51k|            *n_tc < 50 ? fc_lut[*n_tc - 1] : 8; // min(8, ceil(sqrt(n)))
  ------------------
  |  Branch (125:13): [True: 0, False: 0]
  ------------------
  126|  9.51k|}
lib.c:init_internal:
   53|      1|static COLD void init_internal(void) {
   54|      1|    dav1d_init_cpu();
   55|      1|    dav1d_init_ii_wedge_masks();
   56|      1|    dav1d_init_intra_edge_tree();
   57|      1|    dav1d_init_qm_tables();
   58|      1|    dav1d_init_thread();
  ------------------
  |  |  144|      1|#define dav1d_init_thread() do {} while (0)
  |  |  ------------------
  |  |  |  Branch (144:42): [Folded, False: 1]
  |  |  ------------------
  ------------------
   59|      1|}
lib.c:get_stack_size_internal:
   93|  9.51k|static COLD size_t get_stack_size_internal(const pthread_attr_t *const thread_attr) {
   94|       |    /* glibc has an issue where the size of the TLS is subtracted from the stack
   95|       |     * size instead of allocated separately. As a result the specified stack
   96|       |     * size may be insufficient when used in an application with large amounts
   97|       |     * of TLS data. The following is a workaround to compensate for that.
   98|       |     * See https://sourceware.org/bugzilla/show_bug.cgi?id=11787 */
   99|  9.51k|    size_t (*const get_minstack)(const pthread_attr_t*) =
  100|  9.51k|        dlsym(RTLD_DEFAULT, "__pthread_get_minstack");
  101|  9.51k|    if (get_minstack)
  ------------------
  |  Branch (101:9): [True: 9.51k, False: 0]
  ------------------
  102|  9.51k|        return get_minstack(thread_attr) - PTHREAD_STACK_MIN;
  103|      0|    return 0;
  104|  9.51k|}
lib.c:gen_picture:
  413|   109k|{
  414|   109k|    Dav1dData *const in = &c->in;
  415|       |
  416|   109k|    if (output_picture_ready(c, 0))
  ------------------
  |  Branch (416:9): [True: 16.9k, False: 92.5k]
  ------------------
  417|  16.9k|        return 0;
  418|       |
  419|   125k|    while (in->sz > 0) {
  ------------------
  |  Branch (419:12): [True: 105k, False: 20.0k]
  ------------------
  420|   105k|        const ptrdiff_t res = dav1d_parse_obus(c, in);
  421|   105k|        if (res < 0) {
  ------------------
  |  Branch (421:13): [True: 50.9k, False: 54.8k]
  ------------------
  422|  50.9k|            dav1d_data_unref_internal(in);
  423|  54.8k|        } else {
  424|  54.8k|            assert((size_t)res <= in->sz);
  ------------------
  |  Branch (424:13): [True: 54.8k, False: 0]
  ------------------
  425|  54.8k|            in->sz -= res;
  426|  54.8k|            in->data += res;
  427|  54.8k|            if (!in->sz) dav1d_data_unref_internal(in);
  ------------------
  |  Branch (427:17): [True: 20.8k, False: 34.0k]
  ------------------
  428|  54.8k|        }
  429|   105k|        if (output_picture_ready(c, 0))
  ------------------
  |  Branch (429:13): [True: 21.5k, False: 84.3k]
  ------------------
  430|  21.5k|            break;
  431|  84.3k|        if (res < 0)
  ------------------
  |  Branch (431:13): [True: 50.9k, False: 33.3k]
  ------------------
  432|  50.9k|            return (int)res;
  433|  84.3k|    }
  434|       |
  435|  41.6k|    return 0;
  436|  92.5k|}
lib.c:output_picture_ready:
  332|   251k|static int output_picture_ready(Dav1dContext *const c, const int drain) {
  333|   251k|    if (c->cached_error) return 1;
  ------------------
  |  Branch (333:9): [True: 0, False: 251k]
  ------------------
  334|   251k|    if (!c->all_layers && c->max_spatial_id) {
  ------------------
  |  Branch (334:9): [True: 0, False: 251k]
  |  Branch (334:27): [True: 0, False: 0]
  ------------------
  335|      0|        if (c->out.p.data[0] && c->cache.p.data[0]) {
  ------------------
  |  Branch (335:13): [True: 0, False: 0]
  |  Branch (335:33): [True: 0, False: 0]
  ------------------
  336|      0|            if (c->max_spatial_id == c->cache.p.frame_hdr->spatial_id ||
  ------------------
  |  Branch (336:17): [True: 0, False: 0]
  ------------------
  337|      0|                c->out.flags & PICTURE_FLAG_NEW_TEMPORAL_UNIT)
  ------------------
  |  Branch (337:17): [True: 0, False: 0]
  ------------------
  338|      0|                return 1;
  339|      0|            dav1d_thread_picture_unref(&c->cache);
  340|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  341|      0|            return 0;
  342|      0|        } else if (c->cache.p.data[0] && drain) {
  ------------------
  |  Branch (342:20): [True: 0, False: 0]
  |  Branch (342:42): [True: 0, False: 0]
  ------------------
  343|      0|            return 1;
  344|      0|        } else if (c->out.p.data[0]) {
  ------------------
  |  Branch (344:20): [True: 0, False: 0]
  ------------------
  345|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  346|      0|            return 0;
  347|      0|        }
  348|      0|    }
  349|       |
  350|   251k|    return !!c->out.p.data[0];
  351|   251k|}
lib.c:output_image:
  312|  21.5k|{
  313|  21.5k|    int res = 0;
  314|       |
  315|  21.5k|    Dav1dThreadPicture *const in = (c->all_layers || !c->max_spatial_id)
  ------------------
  |  Branch (315:37): [True: 21.5k, False: 0]
  |  Branch (315:54): [True: 0, False: 0]
  ------------------
  316|  21.5k|                                   ? &c->out : &c->cache;
  317|  21.5k|    if (!c->apply_grain || !has_grain(&in->p)) {
  ------------------
  |  Branch (317:9): [True: 0, False: 21.5k]
  |  Branch (317:28): [True: 16.9k, False: 4.61k]
  ------------------
  318|  16.9k|        dav1d_picture_move_ref(out, &in->p);
  319|  16.9k|        dav1d_thread_picture_unref(in);
  320|  16.9k|        goto end;
  321|  16.9k|    }
  322|       |
  323|  4.61k|    res = dav1d_apply_grain(c, out, &in->p);
  324|  4.61k|    dav1d_thread_picture_unref(in);
  325|  21.5k|end:
  326|  21.5k|    if (!c->all_layers && c->max_spatial_id && c->out.p.data[0]) {
  ------------------
  |  Branch (326:9): [True: 0, False: 21.5k]
  |  Branch (326:27): [True: 0, False: 0]
  |  Branch (326:48): [True: 0, False: 0]
  ------------------
  327|      0|        dav1d_thread_picture_move_ref(in, &c->out);
  328|      0|    }
  329|  21.5k|    return res;
  330|  4.61k|}
lib.c:has_grain:
  304|  26.1k|{
  305|  26.1k|    const Dav1dFilmGrainData *fgdata = &pic->frame_hdr->film_grain.data;
  306|  26.1k|    return fgdata->num_y_points || fgdata->num_uv_points[0] ||
  ------------------
  |  Branch (306:12): [True: 6.56k, False: 19.5k]
  |  Branch (306:36): [True: 1.10k, False: 18.4k]
  ------------------
  307|  18.4k|           fgdata->num_uv_points[1] || (fgdata->clip_to_restricted_range &&
  ------------------
  |  Branch (307:12): [True: 624, False: 17.8k]
  |  Branch (307:41): [True: 1.26k, False: 16.5k]
  ------------------
  308|  1.26k|                                        fgdata->chroma_scaling_from_luma);
  ------------------
  |  Branch (308:41): [True: 938, False: 327]
  ------------------
  309|  26.1k|}
lib.c:close_internal:
  610|  9.51k|static COLD void close_internal(Dav1dContext **const c_out, int flush) {
  611|  9.51k|    Dav1dContext *const c = *c_out;
  612|  9.51k|    if (!c) return;
  ------------------
  |  Branch (612:9): [True: 0, False: 9.51k]
  ------------------
  613|       |
  614|  9.51k|    if (flush) dav1d_flush(c);
  ------------------
  |  Branch (614:9): [True: 9.51k, False: 0]
  ------------------
  615|       |
  616|  9.51k|    if (c->tc) {
  ------------------
  |  Branch (616:9): [True: 9.51k, False: 0]
  ------------------
  617|  9.51k|        struct TaskThreadData *ttd = &c->task_thread;
  618|  9.51k|        if (ttd->inited) {
  ------------------
  |  Branch (618:13): [True: 0, False: 9.51k]
  ------------------
  619|      0|            pthread_mutex_lock(&ttd->lock);
  620|      0|            for (unsigned n = 0; n < c->n_tc && c->tc[n].task_thread.td.inited; n++)
  ------------------
  |  Branch (620:34): [True: 0, False: 0]
  |  Branch (620:49): [True: 0, False: 0]
  ------------------
  621|      0|                c->tc[n].task_thread.die = 1;
  622|      0|            pthread_cond_broadcast(&ttd->cond);
  623|      0|            pthread_mutex_unlock(&ttd->lock);
  624|      0|            for (unsigned n = 0; n < c->n_tc; n++) {
  ------------------
  |  Branch (624:34): [True: 0, False: 0]
  ------------------
  625|      0|                Dav1dTaskContext *const pf = &c->tc[n];
  626|      0|                if (!pf->task_thread.td.inited) break;
  ------------------
  |  Branch (626:21): [True: 0, False: 0]
  ------------------
  627|      0|                pthread_join(pf->task_thread.td.thread, NULL);
  628|      0|                pthread_cond_destroy(&pf->task_thread.td.cond);
  629|      0|                pthread_mutex_destroy(&pf->task_thread.td.lock);
  630|      0|            }
  631|      0|            pthread_cond_destroy(&ttd->delayed_fg.cond);
  632|      0|            pthread_cond_destroy(&ttd->cond);
  633|      0|            pthread_mutex_destroy(&ttd->lock);
  634|      0|        }
  635|  9.51k|        dav1d_free_aligned(c->tc);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  636|  9.51k|    }
  637|       |
  638|  19.0k|    for (unsigned n = 0; c->fc && n < c->n_fc; n++) {
  ------------------
  |  Branch (638:26): [True: 19.0k, False: 0]
  |  Branch (638:35): [True: 9.51k, False: 9.51k]
  ------------------
  639|  9.51k|        Dav1dFrameContext *const f = &c->fc[n];
  640|       |
  641|       |        // clean-up threading stuff
  642|  9.51k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (642:13): [True: 0, False: 9.51k]
  ------------------
  643|      0|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  644|      0|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  645|      0|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  646|      0|            dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  647|      0|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  648|      0|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  649|      0|            dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  650|      0|        }
  651|  9.51k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (651:13): [True: 0, False: 9.51k]
  ------------------
  652|      0|            pthread_mutex_destroy(&f->task_thread.pending_tasks.lock);
  653|      0|            pthread_cond_destroy(&f->task_thread.cond);
  654|      0|            pthread_mutex_destroy(&f->task_thread.lock);
  655|      0|        }
  656|  9.51k|        dav1d_free(f->frame_thread.frame_progress);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  657|  9.51k|        dav1d_free(f->task_thread.tasks);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  658|  9.51k|        dav1d_free(f->task_thread.tile_tasks[0]);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  659|  9.51k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  660|  9.51k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  661|  9.51k|        dav1d_free(f->a);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  662|  9.51k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  663|  9.51k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  664|  9.51k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  665|  9.51k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  666|  9.51k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  667|  9.51k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  668|  9.51k|        dav1d_free_aligned(f->rf.r);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  669|  9.51k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  670|  9.51k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  671|  9.51k|    }
  672|  9.51k|    dav1d_free_aligned(c->fc);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  673|  9.51k|    if (c->n_fc > 1 && c->frame_thread.out_delayed) {
  ------------------
  |  Branch (673:9): [True: 0, False: 9.51k]
  |  Branch (673:24): [True: 0, False: 0]
  ------------------
  674|      0|        for (unsigned n = 0; n < c->n_fc; n++)
  ------------------
  |  Branch (674:30): [True: 0, False: 0]
  ------------------
  675|      0|            if (c->frame_thread.out_delayed[n].p.frame_hdr)
  ------------------
  |  Branch (675:17): [True: 0, False: 0]
  ------------------
  676|      0|                dav1d_thread_picture_unref(&c->frame_thread.out_delayed[n]);
  677|      0|        dav1d_free(c->frame_thread.out_delayed);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  678|      0|    }
  679|  9.65k|    for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (679:21): [True: 134, False: 9.51k]
  ------------------
  680|    134|        dav1d_data_unref_internal(&c->tile[n].data);
  681|  9.51k|    dav1d_free(c->tile);
  ------------------
  |  |  135|  9.51k|#define dav1d_free(ptr) free(ptr)
  ------------------
  682|  85.6k|    for (int n = 0; n < 8; n++) {
  ------------------
  |  Branch (682:21): [True: 76.1k, False: 9.51k]
  ------------------
  683|  76.1k|        dav1d_cdf_thread_unref(&c->cdf[n]);
  684|  76.1k|        if (c->refs[n].p.p.frame_hdr)
  ------------------
  |  Branch (684:13): [True: 0, False: 76.1k]
  ------------------
  685|      0|            dav1d_thread_picture_unref(&c->refs[n].p);
  686|  76.1k|        dav1d_ref_dec(&c->refs[n].refmvs);
  687|  76.1k|        dav1d_ref_dec(&c->refs[n].segmap);
  688|  76.1k|    }
  689|  9.51k|    dav1d_ref_dec(&c->seq_hdr_ref);
  690|  9.51k|    dav1d_ref_dec(&c->frame_hdr_ref);
  691|       |
  692|  9.51k|    dav1d_ref_dec(&c->mastering_display_ref);
  693|  9.51k|    dav1d_ref_dec(&c->content_light_ref);
  694|  9.51k|    dav1d_ref_dec(&c->itut_t35_ref);
  695|       |
  696|  9.51k|    dav1d_mem_pool_end(c->seq_hdr_pool);
  697|  9.51k|    dav1d_mem_pool_end(c->frame_hdr_pool);
  698|  9.51k|    dav1d_mem_pool_end(c->segmap_pool);
  699|  9.51k|    dav1d_mem_pool_end(c->refmvs_pool);
  700|  9.51k|    dav1d_mem_pool_end(c->cdf_pool);
  701|  9.51k|    dav1d_mem_pool_end(c->picture_pool);
  702|  9.51k|    dav1d_mem_pool_end(c->pic_ctx_pool);
  703|       |
  704|  9.51k|    dav1d_freep_aligned(c_out);
  705|  9.51k|}

dav1d_loop_filter_dsp_init_8bpc:
  259|  3.41k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  3.41k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  3.41k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  3.41k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  3.41k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  3.41k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  3.41k|#endif
  275|  3.41k|#endif
  276|  3.41k|}
dav1d_loop_filter_dsp_init_16bpc:
  259|  4.61k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  4.61k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  4.61k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  4.61k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  4.61k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  4.61k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  4.61k|#endif
  275|  4.61k|#endif
  276|  4.61k|}

dav1d_loop_restoration_dsp_init_8bpc:
 1367|  3.41k|{
 1368|  3.41k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  3.41k|    c->sgr[0] = sgr_5x5_c;
 1370|  3.41k|    c->sgr[1] = sgr_3x3_c;
 1371|  3.41k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  3.41k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  3.41k|#endif
 1383|  3.41k|#endif
 1384|  3.41k|}
looprestoration_tmpl.c:sgr_5x5_c:
  830|  5.50k|{
  831|  5.50k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.50k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.50k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  832|  5.50k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.50k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.50k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  833|  5.50k|    int32_t *sumsq_ptrs[5], *sumsq_rows[5];
  834|  5.50k|    coef *sum_ptrs[5], *sum_rows[5];
  835|  33.0k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (835:21): [True: 27.5k, False: 5.50k]
  ------------------
  836|  27.5k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  27.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  837|  27.5k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  27.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  838|  27.5k|    }
  839|       |
  840|  5.50k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.50k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.50k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  841|  5.50k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.50k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.50k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  842|  5.50k|    int32_t *A_ptrs[2];
  843|  5.50k|    coef *B_ptrs[2];
  844|  16.5k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (844:21): [True: 11.0k, False: 5.50k]
  ------------------
  845|  11.0k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  11.0k|#define BUF_STRIDE (384 + 16)
  ------------------
  846|  11.0k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  11.0k|#define BUF_STRIDE (384 + 16)
  ------------------
  847|  11.0k|    }
  848|  5.50k|    const pixel *src = dst;
  849|  5.50k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  5.50k|#define PXSTRIDE(x) (x)
  ------------------
  850|       |
  851|  5.50k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (851:9): [True: 2.68k, False: 2.82k]
  ------------------
  852|  2.68k|        sumsq_ptrs[0] = sumsq_rows[0];
  853|  2.68k|        sumsq_ptrs[1] = sumsq_rows[0];
  854|  2.68k|        sumsq_ptrs[2] = sumsq_rows[1];
  855|  2.68k|        sumsq_ptrs[3] = sumsq_rows[2];
  856|  2.68k|        sumsq_ptrs[4] = sumsq_rows[3];
  857|  2.68k|        sum_ptrs[0] = sum_rows[0];
  858|  2.68k|        sum_ptrs[1] = sum_rows[0];
  859|  2.68k|        sum_ptrs[2] = sum_rows[1];
  860|  2.68k|        sum_ptrs[3] = sum_rows[2];
  861|  2.68k|        sum_ptrs[4] = sum_rows[3];
  862|       |
  863|  2.68k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  864|  2.68k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.68k|#define PXSTRIDE(x) (x)
  ------------------
  865|  2.68k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  866|       |
  867|  2.68k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  868|  2.68k|        left++;
  869|  2.68k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.68k|#define PXSTRIDE(x) (x)
  ------------------
  870|       |
  871|  2.68k|        if (--h <= 0)
  ------------------
  |  Branch (871:13): [True: 295, False: 2.38k]
  ------------------
  872|    295|            goto vert_1;
  873|       |
  874|  2.38k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  875|  2.38k|        left++;
  876|  2.38k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.38k|#define PXSTRIDE(x) (x)
  ------------------
  877|  2.38k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  878|  2.38k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.38k|#define BITDEPTH_MAX 0xff
  ------------------
  879|  2.38k|        rotate(A_ptrs, B_ptrs, 2);
  880|       |
  881|  2.38k|        if (--h <= 0)
  ------------------
  |  Branch (881:13): [True: 415, False: 1.97k]
  ------------------
  882|    415|            goto vert_2;
  883|       |
  884|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  885|       |        // one of them to point at the previously unused rows[4].
  886|  1.97k|        sumsq_ptrs[3] = sumsq_rows[4];
  887|  1.97k|        sum_ptrs[3] = sum_rows[4];
  888|  2.82k|    } else {
  889|  2.82k|        sumsq_ptrs[0] = sumsq_rows[0];
  890|  2.82k|        sumsq_ptrs[1] = sumsq_rows[0];
  891|  2.82k|        sumsq_ptrs[2] = sumsq_rows[0];
  892|  2.82k|        sumsq_ptrs[3] = sumsq_rows[0];
  893|  2.82k|        sumsq_ptrs[4] = sumsq_rows[0];
  894|  2.82k|        sum_ptrs[0] = sum_rows[0];
  895|  2.82k|        sum_ptrs[1] = sum_rows[0];
  896|  2.82k|        sum_ptrs[2] = sum_rows[0];
  897|  2.82k|        sum_ptrs[3] = sum_rows[0];
  898|  2.82k|        sum_ptrs[4] = sum_rows[0];
  899|       |
  900|  2.82k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  901|  2.82k|        left++;
  902|  2.82k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.82k|#define PXSTRIDE(x) (x)
  ------------------
  903|       |
  904|  2.82k|        if (--h <= 0)
  ------------------
  |  Branch (904:13): [True: 312, False: 2.50k]
  ------------------
  905|    312|            goto vert_1;
  906|       |
  907|  2.50k|        sumsq_ptrs[4] = sumsq_rows[1];
  908|  2.50k|        sum_ptrs[4] = sum_rows[1];
  909|       |
  910|  2.50k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], left, src, w, edges);
  911|  2.50k|        left++;
  912|  2.50k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.50k|#define PXSTRIDE(x) (x)
  ------------------
  913|       |
  914|  2.50k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  915|  2.50k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.50k|#define BITDEPTH_MAX 0xff
  ------------------
  916|  2.50k|        rotate(A_ptrs, B_ptrs, 2);
  917|       |
  918|  2.50k|        if (--h <= 0)
  ------------------
  |  Branch (918:13): [True: 438, False: 2.07k]
  ------------------
  919|    438|            goto vert_2;
  920|       |
  921|  2.07k|        sumsq_ptrs[3] = sumsq_rows[2];
  922|  2.07k|        sumsq_ptrs[4] = sumsq_rows[3];
  923|  2.07k|        sum_ptrs[3] = sum_rows[2];
  924|  2.07k|        sum_ptrs[4] = sum_rows[3];
  925|       |
  926|  2.07k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  927|  2.07k|        left++;
  928|  2.07k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.07k|#define PXSTRIDE(x) (x)
  ------------------
  929|       |
  930|  2.07k|        if (--h <= 0)
  ------------------
  |  Branch (930:13): [True: 286, False: 1.78k]
  ------------------
  931|    286|            goto odd;
  932|       |
  933|  1.78k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  934|  1.78k|        left++;
  935|  1.78k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.78k|#define PXSTRIDE(x) (x)
  ------------------
  936|       |
  937|  1.78k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  938|  1.78k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.78k|#define BITDEPTH_MAX 0xff
  ------------------
  939|  1.78k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  940|  1.78k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  941|       |
  942|  1.78k|        if (--h <= 0)
  ------------------
  |  Branch (942:13): [True: 291, False: 1.49k]
  ------------------
  943|    291|            goto vert_2;
  944|       |
  945|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  946|       |        // one of them to point at the previously unused rows[4].
  947|  1.49k|        sumsq_ptrs[3] = sumsq_rows[4];
  948|  1.49k|        sum_ptrs[3] = sum_rows[4];
  949|  1.49k|    }
  950|       |
  951|  84.5k|    do {
  952|  84.5k|        sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], left, src, w, edges);
  953|  84.5k|        left++;
  954|  84.5k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  84.5k|#define PXSTRIDE(x) (x)
  ------------------
  955|       |
  956|  84.5k|        if (--h <= 0)
  ------------------
  |  Branch (956:13): [True: 329, False: 84.2k]
  ------------------
  957|    329|            goto odd;
  958|       |
  959|  84.2k|        sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], left, src, w, edges);
  960|  84.2k|        left++;
  961|  84.2k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  84.2k|#define PXSTRIDE(x) (x)
  ------------------
  962|       |
  963|  84.2k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  964|  84.2k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  84.2k|#define BITDEPTH_MAX 0xff
  ------------------
  965|  84.2k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  966|  84.2k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  967|  84.2k|    } while (--h > 0);
  ------------------
  |  Branch (967:14): [True: 81.0k, False: 3.13k]
  ------------------
  968|       |
  969|  3.13k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (969:9): [True: 365, False: 2.77k]
  ------------------
  970|    365|        goto vert_2;
  971|       |
  972|  2.77k|    sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], NULL, lpf_bottom, w, edges);
  973|  2.77k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.77k|#define PXSTRIDE(x) (x)
  ------------------
  974|  2.77k|    sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], NULL, lpf_bottom, w, edges);
  975|       |
  976|  4.28k|output_2:
  977|  4.28k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  978|  4.28k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.28k|#define BITDEPTH_MAX 0xff
  ------------------
  979|  4.28k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  980|  4.28k|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  981|  4.28k|    return;
  982|       |
  983|  1.50k|vert_2:
  984|       |    // Duplicate the last row twice more
  985|  1.50k|    sumsq_ptrs[3] = sumsq_ptrs[2];
  986|  1.50k|    sumsq_ptrs[4] = sumsq_ptrs[2];
  987|  1.50k|    sum_ptrs[3] = sum_ptrs[2];
  988|  1.50k|    sum_ptrs[4] = sum_ptrs[2];
  989|  1.50k|    goto output_2;
  990|       |
  991|    615|odd:
  992|       |    // Copy the last row as padding once
  993|    615|    sumsq_ptrs[4] = sumsq_ptrs[3];
  994|    615|    sum_ptrs[4] = sum_ptrs[3];
  995|       |
  996|    615|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  997|    615|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    615|#define BITDEPTH_MAX 0xff
  ------------------
  998|    615|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  999|    615|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1000|       |
 1001|  1.22k|output_1:
 1002|       |    // Duplicate the last row twice more
 1003|  1.22k|    sumsq_ptrs[3] = sumsq_ptrs[2];
 1004|  1.22k|    sumsq_ptrs[4] = sumsq_ptrs[2];
 1005|  1.22k|    sum_ptrs[3] = sum_ptrs[2];
 1006|  1.22k|    sum_ptrs[4] = sum_ptrs[2];
 1007|       |
 1008|  1.22k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1009|  1.22k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.22k|#define BITDEPTH_MAX 0xff
  ------------------
 1010|       |    // Output only one row
 1011|  1.22k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
 1012|  1.22k|                w, 1, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1013|  1.22k|    return;
 1014|       |
 1015|    607|vert_1:
 1016|       |    // Copy the last row as padding once
 1017|    607|    sumsq_ptrs[4] = sumsq_ptrs[3];
 1018|    607|    sum_ptrs[4] = sum_ptrs[3];
 1019|       |
 1020|    607|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1021|    607|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    607|#define BITDEPTH_MAX 0xff
  ------------------
 1022|    607|    rotate(A_ptrs, B_ptrs, 2);
 1023|       |
 1024|    607|    goto output_1;
 1025|    615|}
looprestoration_tmpl.c:sgr_box5_row_h:
  441|   397k|{
  442|   397k|    sumsq++;
  443|   397k|    sum++;
  444|   397k|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][1] : src[-3]) : src[0];
  ------------------
  |  Branch (444:13): [True: 324k, False: 72.6k]
  |  Branch (444:37): [True: 306k, False: 17.9k]
  ------------------
  445|   397k|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (445:13): [True: 324k, False: 72.6k]
  |  Branch (445:37): [True: 306k, False: 17.9k]
  ------------------
  446|   397k|    int c = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (446:13): [True: 324k, False: 72.6k]
  |  Branch (446:37): [True: 306k, False: 17.9k]
  ------------------
  447|   397k|    int d = src[0];
  448|  50.1M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (448:22): [True: 49.7M, False: 397k]
  ------------------
  449|  49.7M|        int e = (x + 2 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 2] : src[w - 1];
  ------------------
  |  Branch (449:18): [True: 48.5M, False: 1.19M]
  |  Branch (449:31): [True: 968k, False: 223k]
  ------------------
  450|  49.7M|        sum[x] = a + b + c + d + e;
  451|  49.7M|        sumsq[x] = a * a + b * b + c * c + d * d + e * e;
  452|  49.7M|        a = b;
  453|  49.7M|        b = c;
  454|  49.7M|        c = d;
  455|  49.7M|        d = e;
  456|  49.7M|    }
  457|   397k|}
looprestoration_tmpl.c:sgr_box5_vert:
  537|   200k|{
  538|   200k|    sgr_box5_row_v(sumsq, sum, sumsq_out, sum_out, w);
  539|   200k|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 25, 164);
  540|   200k|    rotate5_x2(sumsq, sum);
  541|   200k|}
looprestoration_tmpl.c:sgr_box5_row_v:
  488|   200k|{
  489|  25.0M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (489:21): [True: 24.8M, False: 200k]
  ------------------
  490|  24.8M|        int sq_a = sumsq[0][x];
  491|  24.8M|        int sq_b = sumsq[1][x];
  492|  24.8M|        int sq_c = sumsq[2][x];
  493|  24.8M|        int sq_d = sumsq[3][x];
  494|  24.8M|        int sq_e = sumsq[4][x];
  495|  24.8M|        int s_a = sum[0][x];
  496|  24.8M|        int s_b = sum[1][x];
  497|  24.8M|        int s_c = sum[2][x];
  498|  24.8M|        int s_d = sum[3][x];
  499|  24.8M|        int s_e = sum[4][x];
  500|  24.8M|        sumsq_out[x] = sq_a + sq_b + sq_c + sq_d + sq_e;
  501|  24.8M|        sum_out[x] = s_a + s_b + s_c + s_d + s_e;
  502|  24.8M|    }
  503|   200k|}
looprestoration_tmpl.c:sgr_calc_row_ab:
  507|   510k|{
  508|   510k|    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
  ------------------
  |  |   58|   510k|#define bitdepth_from_max(x) 8
  ------------------
  509|  60.7M|    for (int i = 0; i < w + 2; i++) {
  ------------------
  |  Branch (509:21): [True: 60.2M, False: 510k]
  ------------------
  510|  60.2M|        const int a =
  511|  60.2M|            (AA[i] + ((1 << (2 * bitdepth_min_8)) >> 1)) >> (2 * bitdepth_min_8);
  512|  60.2M|        const int b =
  513|  60.2M|            (BB[i] + ((1 << bitdepth_min_8) >> 1)) >> bitdepth_min_8;
  514|       |
  515|  60.2M|        const unsigned p = imax(a * n - b * b, 0);
  516|  60.2M|        const unsigned z = (p * s + (1 << 19)) >> 20;
  517|  60.2M|        const unsigned x = dav1d_sgr_x_by_x[umin(z, 255)];
  518|       |
  519|       |        // This is where we invert A and B, so that B is of size coef.
  520|  60.2M|        AA[i] = (x * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
  521|  60.2M|        BB[i] = x;
  522|  60.2M|    }
  523|   510k|}
looprestoration_tmpl.c:rotate5_x2:
  402|   200k|{
  403|   200k|    int32_t *tmp32[2];
  404|   200k|    coef *tmpc[2];
  405|   601k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (405:21): [True: 400k, False: 200k]
  ------------------
  406|   400k|        tmp32[i] = sumsq_ptrs[i];
  407|   400k|        tmpc[i] = sum_ptrs[i];
  408|   400k|    }
  409|   801k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (409:21): [True: 601k, False: 200k]
  ------------------
  410|   601k|        sumsq_ptrs[i] = sumsq_ptrs[i + 2];
  411|   601k|        sum_ptrs[i] = sum_ptrs[i + 2];
  412|   601k|    }
  413|   601k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (413:21): [True: 400k, False: 200k]
  ------------------
  414|   400k|        sumsq_ptrs[3 + i] = tmp32[i];
  415|   400k|        sum_ptrs[3 + i] = tmpc[i];
  416|   400k|    }
  417|   200k|}
looprestoration_tmpl.c:rotate:
  390|   822k|{
  391|   822k|    int32_t *tmp32 = sumsq_ptrs[0];
  392|   822k|    coef *tmpc = sum_ptrs[0];
  393|  2.47M|    for (int i = 0; i < n - 1; i++) {
  ------------------
  |  Branch (393:21): [True: 1.65M, False: 822k]
  ------------------
  394|  1.65M|        sumsq_ptrs[i] = sumsq_ptrs[i + 1];
  395|  1.65M|        sum_ptrs[i] = sum_ptrs[i + 1];
  396|  1.65M|    }
  397|   822k|    sumsq_ptrs[n - 1] = tmp32;
  398|   822k|    sum_ptrs[n - 1] = tmpc;
  399|   822k|}
looprestoration_tmpl.c:sgr_finish2:
  645|  92.1k|{
  646|  92.1k|    ALIGN_STK_16(coef, tmp, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  92.1k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  92.1k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  647|       |
  648|  92.1k|    sgr_finish_filter2(tmp, *dst, stride, A_ptrs, B_ptrs, w, h);
  649|  92.1k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  650|  92.1k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  92.1k|#define PXSTRIDE(x) (x)
  ------------------
  651|  92.1k|    if (h > 1) {
  ------------------
  |  Branch (651:9): [True: 90.8k, False: 1.22k]
  ------------------
  652|  90.8k|        sgr_weighted_row1(*dst, tmp + FILTER_OUT_STRIDE, w, w1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |  572|  90.8k|#define FILTER_OUT_STRIDE (384)
  ------------------
  653|  90.8k|        *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  90.8k|#define PXSTRIDE(x) (x)
  ------------------
  654|  90.8k|    }
  655|  92.1k|    rotate(A_ptrs, B_ptrs, 2);
  656|  92.1k|}
looprestoration_tmpl.c:sgr_finish_filter2:
  579|   188k|{
  580|   188k|#define SIX_NEIGHBORS(P, i)\
  581|   188k|    ((P[0][i]     + P[1][i]) * 6 +   \
  582|   188k|     (P[0][i - 1] + P[1][i - 1] +    \
  583|   188k|      P[0][i + 1] + P[1][i + 1]) * 5)
  584|  23.4M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (584:21): [True: 23.2M, False: 188k]
  ------------------
  585|  23.2M|        const int a = SIX_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  581|  23.2M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  23.2M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  23.2M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  586|  23.2M|        const int b = SIX_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  581|  23.2M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  23.2M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  23.2M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  587|  23.2M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  588|  23.2M|    }
  589|   188k|    if (h <= 1)
  ------------------
  |  Branch (589:9): [True: 2.95k, False: 185k]
  ------------------
  590|  2.95k|        return;
  591|   185k|    tmp += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   185k|#define FILTER_OUT_STRIDE (384)
  ------------------
  592|   185k|    src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|   185k|#define PXSTRIDE(x) (x)
  ------------------
  593|   185k|    const int32_t *A = &A_ptrs[1][1];
  594|   185k|    const coef *B = &B_ptrs[1][1];
  595|  23.1M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (595:21): [True: 22.9M, False: 185k]
  ------------------
  596|  22.9M|        const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
  597|  22.9M|        const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
  598|  22.9M|        tmp[i] = (b - a * src[i] + (1 << 7)) >> 8;
  599|  22.9M|    }
  600|   185k|#undef SIX_NEIGHBORS
  601|   185k|}
looprestoration_tmpl.c:sgr_weighted_row1:
  605|   282k|{
  606|  37.1M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (606:21): [True: 36.8M, False: 282k]
  ------------------
  607|  36.8M|        const int v = w1 * t1[i];
  608|  36.8M|        dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  36.8M|#define iclip_pixel iclip_u8
  ------------------
  609|  36.8M|    }
  610|   282k|}
looprestoration_tmpl.c:sgr_3x3_c:
  684|  3.18k|{
  685|  3.18k|#define BUF_STRIDE (384 + 16)
  686|  3.18k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  3.18k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.18k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  687|  3.18k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  3.18k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.18k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  688|  3.18k|    int32_t *sumsq_ptrs[3], *sumsq_rows[3];
  689|  3.18k|    coef *sum_ptrs[3], *sum_rows[3];
  690|  12.7k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (690:21): [True: 9.54k, False: 3.18k]
  ------------------
  691|  9.54k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  9.54k|#define BUF_STRIDE (384 + 16)
  ------------------
  692|  9.54k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  9.54k|#define BUF_STRIDE (384 + 16)
  ------------------
  693|  9.54k|    }
  694|       |
  695|  3.18k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  3.18k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.18k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  696|  3.18k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  3.18k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.18k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  697|  3.18k|    int32_t *A_ptrs[3];
  698|  3.18k|    coef *B_ptrs[3];
  699|  12.7k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (699:21): [True: 9.54k, False: 3.18k]
  ------------------
  700|  9.54k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  9.54k|#define BUF_STRIDE (384 + 16)
  ------------------
  701|  9.54k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  9.54k|#define BUF_STRIDE (384 + 16)
  ------------------
  702|  9.54k|    }
  703|  3.18k|    const pixel *src = dst;
  704|  3.18k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  3.18k|#define PXSTRIDE(x) (x)
  ------------------
  705|       |
  706|  3.18k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (706:9): [True: 1.57k, False: 1.60k]
  ------------------
  707|  1.57k|        sumsq_ptrs[0] = sumsq_rows[0];
  708|  1.57k|        sumsq_ptrs[1] = sumsq_rows[1];
  709|  1.57k|        sumsq_ptrs[2] = sumsq_rows[2];
  710|  1.57k|        sum_ptrs[0] = sum_rows[0];
  711|  1.57k|        sum_ptrs[1] = sum_rows[1];
  712|  1.57k|        sum_ptrs[2] = sum_rows[2];
  713|       |
  714|  1.57k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  715|  1.57k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.57k|#define PXSTRIDE(x) (x)
  ------------------
  716|  1.57k|        sgr_box3_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  717|       |
  718|  1.57k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  719|  1.57k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.57k|#define BITDEPTH_MAX 0xff
  ------------------
  720|  1.57k|        left++;
  721|  1.57k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.57k|#define PXSTRIDE(x) (x)
  ------------------
  722|  1.57k|        rotate(A_ptrs, B_ptrs, 3);
  723|       |
  724|  1.57k|        if (--h <= 0)
  ------------------
  |  Branch (724:13): [True: 457, False: 1.11k]
  ------------------
  725|    457|            goto vert_1;
  726|       |
  727|  1.11k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  728|  1.11k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.11k|#define BITDEPTH_MAX 0xff
  ------------------
  729|  1.11k|        left++;
  730|  1.11k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.11k|#define PXSTRIDE(x) (x)
  ------------------
  731|  1.11k|        rotate(A_ptrs, B_ptrs, 3);
  732|       |
  733|  1.11k|        if (--h <= 0)
  ------------------
  |  Branch (733:13): [True: 133, False: 984]
  ------------------
  734|    133|            goto vert_2;
  735|  1.60k|    } else {
  736|  1.60k|        sumsq_ptrs[0] = sumsq_rows[0];
  737|  1.60k|        sumsq_ptrs[1] = sumsq_rows[0];
  738|  1.60k|        sumsq_ptrs[2] = sumsq_rows[0];
  739|  1.60k|        sum_ptrs[0] = sum_rows[0];
  740|  1.60k|        sum_ptrs[1] = sum_rows[0];
  741|  1.60k|        sum_ptrs[2] = sum_rows[0];
  742|       |
  743|  1.60k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  744|  1.60k|        left++;
  745|  1.60k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.60k|#define PXSTRIDE(x) (x)
  ------------------
  746|       |
  747|  1.60k|        sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  748|  1.60k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.60k|#define BITDEPTH_MAX 0xff
  ------------------
  749|  1.60k|        rotate(A_ptrs, B_ptrs, 3);
  750|       |
  751|  1.60k|        if (--h <= 0)
  ------------------
  |  Branch (751:13): [True: 173, False: 1.43k]
  ------------------
  752|    173|            goto vert_1;
  753|       |
  754|  1.43k|        sumsq_ptrs[2] = sumsq_rows[1];
  755|  1.43k|        sum_ptrs[2] = sum_rows[1];
  756|       |
  757|  1.43k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  758|  1.43k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.43k|#define BITDEPTH_MAX 0xff
  ------------------
  759|  1.43k|        left++;
  760|  1.43k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.43k|#define PXSTRIDE(x) (x)
  ------------------
  761|  1.43k|        rotate(A_ptrs, B_ptrs, 3);
  762|       |
  763|  1.43k|        if (--h <= 0)
  ------------------
  |  Branch (763:13): [True: 320, False: 1.11k]
  ------------------
  764|    320|            goto vert_2;
  765|       |
  766|  1.11k|        sumsq_ptrs[2] = sumsq_rows[2];
  767|  1.11k|        sum_ptrs[2] = sum_rows[2];
  768|  1.11k|    }
  769|       |
  770|  94.2k|    do {
  771|  94.2k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  772|  94.2k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  94.2k|#define BITDEPTH_MAX 0xff
  ------------------
  773|  94.2k|        left++;
  774|  94.2k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  94.2k|#define PXSTRIDE(x) (x)
  ------------------
  775|       |
  776|  94.2k|        sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  777|  94.2k|                    w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  778|  94.2k|    } while (--h > 0);
  ------------------
  |  Branch (778:14): [True: 92.1k, False: 2.09k]
  ------------------
  779|       |
  780|  2.09k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (780:9): [True: 468, False: 1.62k]
  ------------------
  781|    468|        goto vert_2;
  782|       |
  783|  1.62k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  784|  1.62k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.62k|#define BITDEPTH_MAX 0xff
  ------------------
  785|  1.62k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.62k|#define PXSTRIDE(x) (x)
  ------------------
  786|       |
  787|  1.62k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  788|  1.62k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  789|       |
  790|  1.62k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  791|  1.62k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.62k|#define BITDEPTH_MAX 0xff
  ------------------
  792|       |
  793|  1.62k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  794|  1.62k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  795|  1.62k|    return;
  796|       |
  797|    921|vert_2:
  798|    921|    sumsq_ptrs[2] = sumsq_ptrs[1];
  799|    921|    sum_ptrs[2] = sum_ptrs[1];
  800|    921|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  801|    921|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    921|#define BITDEPTH_MAX 0xff
  ------------------
  802|       |
  803|    921|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  804|    921|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  805|       |
  806|  1.55k|output_1:
  807|  1.55k|    sumsq_ptrs[2] = sumsq_ptrs[1];
  808|  1.55k|    sum_ptrs[2] = sum_ptrs[1];
  809|  1.55k|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  810|  1.55k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.55k|#define BITDEPTH_MAX 0xff
  ------------------
  811|       |
  812|  1.55k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  813|  1.55k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  814|  1.55k|    return;
  815|       |
  816|    630|vert_1:
  817|    630|    sumsq_ptrs[2] = sumsq_ptrs[1];
  818|    630|    sum_ptrs[2] = sum_ptrs[1];
  819|    630|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  820|    630|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    630|#define BITDEPTH_MAX 0xff
  ------------------
  821|    630|    rotate(A_ptrs, B_ptrs, 3);
  822|    630|    goto output_1;
  823|    921|}
looprestoration_tmpl.c:sgr_box3_row_h:
  423|   309k|{
  424|   309k|    sumsq++;
  425|   309k|    sum++;
  426|   309k|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (426:13): [True: 266k, False: 43.2k]
  |  Branch (426:37): [True: 251k, False: 15.3k]
  ------------------
  427|   309k|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (427:13): [True: 266k, False: 43.2k]
  |  Branch (427:37): [True: 251k, False: 15.3k]
  ------------------
  428|  35.8M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (428:22): [True: 35.5M, False: 309k]
  ------------------
  429|  35.5M|        int c = (x + 1 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 1] : src[w - 1];
  ------------------
  |  Branch (429:18): [True: 34.9M, False: 619k]
  |  Branch (429:31): [True: 514k, False: 104k]
  ------------------
  430|  35.5M|        sum[x] = a + b + c;
  431|  35.5M|        sumsq[x] = a * a + b * b + c * c;
  432|  35.5M|        a = b;
  433|  35.5M|        b = c;
  434|  35.5M|    }
  435|   309k|}
looprestoration_tmpl.c:sgr_box3_hv:
  550|   101k|{
  551|   101k|    sgr_box3_row_h(sumsq[2], sum[2], left, src, w, edges);
  552|   101k|    sgr_box3_vert(sumsq, sum, AA, BB, w, s, bitdepth_max);
  553|   101k|}
looprestoration_tmpl.c:sgr_box3_vert:
  528|   310k|{
  529|   310k|    sgr_box3_row_v(sumsq, sum, sumsq_out, sum_out, w);
  530|   310k|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 9, 455);
  531|   310k|    rotate(sumsq, sum, 3);
  532|   310k|}
looprestoration_tmpl.c:sgr_box3_row_v:
  472|   310k|{
  473|  35.6M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (473:21): [True: 35.3M, False: 310k]
  ------------------
  474|  35.3M|        int sq_a = sumsq[0][x];
  475|  35.3M|        int sq_b = sumsq[1][x];
  476|  35.3M|        int sq_c = sumsq[2][x];
  477|  35.3M|        int s_a = sum[0][x];
  478|  35.3M|        int s_b = sum[1][x];
  479|  35.3M|        int s_c = sum[2][x];
  480|  35.3M|        sumsq_out[x] = sq_a + sq_b + sq_c;
  481|  35.3M|        sum_out[x] = s_a + s_b + s_c;
  482|  35.3M|    }
  483|   310k|}
looprestoration_tmpl.c:sgr_finish1:
  631|  99.9k|{
  632|       |    // Only one single row, no stride needed
  633|  99.9k|    ALIGN_STK_16(coef, tmp, 384,);
  ------------------
  |  |  100|  99.9k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  99.9k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  634|       |
  635|  99.9k|    sgr_finish_filter_row1(tmp, *dst, A_ptrs, B_ptrs, w);
  636|  99.9k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  637|  99.9k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  99.9k|#define PXSTRIDE(x) (x)
  ------------------
  638|  99.9k|    rotate(A_ptrs, B_ptrs, 3);
  639|  99.9k|}
looprestoration_tmpl.c:sgr_finish_filter_row1:
  559|   291k|{
  560|   291k|#define EIGHT_NEIGHBORS(P, i)\
  561|   291k|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  562|   291k|     (P[0][i - 1] + P[2][i - 1] +                           \
  563|   291k|      P[0][i + 1] + P[2][i + 1]) * 3)
  564|  33.1M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (564:21): [True: 32.8M, False: 291k]
  ------------------
  565|  32.8M|        const int a = EIGHT_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  561|  32.8M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|  32.8M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|  32.8M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  566|  32.8M|        const int b = EIGHT_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  561|  32.8M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|  32.8M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|  32.8M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  567|  32.8M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  568|  32.8M|    }
  569|   291k|#undef EIGHT_NEIGHBORS
  570|   291k|}
looprestoration_tmpl.c:sgr_mix_c:
 1032|  6.32k|{
 1033|  6.32k|    ALIGN_STK_16(int32_t, sumsq5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1034|  6.32k|    ALIGN_STK_16(coef, sum5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1035|  6.32k|    int32_t *sumsq5_ptrs[5], *sumsq5_rows[5];
 1036|  6.32k|    coef *sum5_ptrs[5], *sum5_rows[5];
 1037|  37.9k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (1037:21): [True: 31.6k, False: 6.32k]
  ------------------
 1038|  31.6k|        sumsq5_rows[i] = &sumsq5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  31.6k|#define BUF_STRIDE (384 + 16)
  ------------------
 1039|  31.6k|        sum5_rows[i] = &sum5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  31.6k|#define BUF_STRIDE (384 + 16)
  ------------------
 1040|  31.6k|    }
 1041|  6.32k|    ALIGN_STK_16(int32_t, sumsq3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1042|  6.32k|    ALIGN_STK_16(coef, sum3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1043|  6.32k|    int32_t *sumsq3_ptrs[3], *sumsq3_rows[3];
 1044|  6.32k|    coef *sum3_ptrs[3], *sum3_rows[3];
 1045|  25.3k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1045:21): [True: 18.9k, False: 6.32k]
  ------------------
 1046|  18.9k|        sumsq3_rows[i] = &sumsq3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  18.9k|#define BUF_STRIDE (384 + 16)
  ------------------
 1047|  18.9k|        sum3_rows[i] = &sum3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  18.9k|#define BUF_STRIDE (384 + 16)
  ------------------
 1048|  18.9k|    }
 1049|       |
 1050|  6.32k|    ALIGN_STK_16(int32_t, A5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1051|  6.32k|    ALIGN_STK_16(coef, B5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1052|  6.32k|    int32_t *A5_ptrs[2];
 1053|  6.32k|    coef *B5_ptrs[2];
 1054|  18.9k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1054:21): [True: 12.6k, False: 6.32k]
  ------------------
 1055|  12.6k|        A5_ptrs[i] = &A5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  12.6k|#define BUF_STRIDE (384 + 16)
  ------------------
 1056|  12.6k|        B5_ptrs[i] = &B5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  12.6k|#define BUF_STRIDE (384 + 16)
  ------------------
 1057|  12.6k|    }
 1058|  6.32k|    ALIGN_STK_16(int32_t, A3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1059|  6.32k|    ALIGN_STK_16(coef, B3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  6.32k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.32k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1060|  6.32k|    int32_t *A3_ptrs[4];
 1061|  6.32k|    coef *B3_ptrs[4];
 1062|  31.6k|    for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (1062:21): [True: 25.3k, False: 6.32k]
  ------------------
 1063|  25.3k|        A3_ptrs[i] = &A3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  25.3k|#define BUF_STRIDE (384 + 16)
  ------------------
 1064|  25.3k|        B3_ptrs[i] = &B3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  25.3k|#define BUF_STRIDE (384 + 16)
  ------------------
 1065|  25.3k|    }
 1066|  6.32k|    const pixel *src = dst;
 1067|  6.32k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  6.32k|#define PXSTRIDE(x) (x)
  ------------------
 1068|       |
 1069|  6.32k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (1069:9): [True: 2.96k, False: 3.36k]
  ------------------
 1070|  2.96k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1071|  2.96k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1072|  2.96k|        sumsq5_ptrs[2] = sumsq5_rows[1];
 1073|  2.96k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1074|  2.96k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1075|  2.96k|        sum5_ptrs[0] = sum5_rows[0];
 1076|  2.96k|        sum5_ptrs[1] = sum5_rows[0];
 1077|  2.96k|        sum5_ptrs[2] = sum5_rows[1];
 1078|  2.96k|        sum5_ptrs[3] = sum5_rows[2];
 1079|  2.96k|        sum5_ptrs[4] = sum5_rows[3];
 1080|       |
 1081|  2.96k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1082|  2.96k|        sumsq3_ptrs[1] = sumsq3_rows[1];
 1083|  2.96k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1084|  2.96k|        sum3_ptrs[0] = sum3_rows[0];
 1085|  2.96k|        sum3_ptrs[1] = sum3_rows[1];
 1086|  2.96k|        sum3_ptrs[2] = sum3_rows[2];
 1087|       |
 1088|  2.96k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1089|  2.96k|                        sumsq5_rows[0], sum5_rows[0],
 1090|  2.96k|                        NULL, lpf, w, edges);
 1091|  2.96k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.96k|#define PXSTRIDE(x) (x)
  ------------------
 1092|  2.96k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1093|  2.96k|                        sumsq5_rows[1], sum5_rows[1],
 1094|  2.96k|                        NULL, lpf, w, edges);
 1095|       |
 1096|  2.96k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1097|  2.96k|                        sumsq5_rows[2], sum5_rows[2],
 1098|  2.96k|                        left, src, w, edges);
 1099|  2.96k|        left++;
 1100|  2.96k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.96k|#define PXSTRIDE(x) (x)
  ------------------
 1101|       |
 1102|  2.96k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1103|  2.96k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.96k|#define BITDEPTH_MAX 0xff
  ------------------
 1104|  2.96k|        rotate(A3_ptrs, B3_ptrs, 4);
 1105|       |
 1106|  2.96k|        if (--h <= 0)
  ------------------
  |  Branch (1106:13): [True: 662, False: 2.30k]
  ------------------
 1107|    662|            goto vert_1;
 1108|       |
 1109|  2.30k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1110|  2.30k|                        sumsq5_rows[3], sum5_rows[3],
 1111|  2.30k|                        left, src, w, edges);
 1112|  2.30k|        left++;
 1113|  2.30k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.30k|#define PXSTRIDE(x) (x)
  ------------------
 1114|  2.30k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1115|  2.30k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.30k|#define BITDEPTH_MAX 0xff
  ------------------
 1116|  2.30k|        rotate(A5_ptrs, B5_ptrs, 2);
 1117|  2.30k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1118|  2.30k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.30k|#define BITDEPTH_MAX 0xff
  ------------------
 1119|  2.30k|        rotate(A3_ptrs, B3_ptrs, 4);
 1120|       |
 1121|  2.30k|        if (--h <= 0)
  ------------------
  |  Branch (1121:13): [True: 294, False: 2.00k]
  ------------------
 1122|    294|            goto vert_2;
 1123|       |
 1124|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1125|       |        // one of them to point at the previously unused rows[4].
 1126|  2.00k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1127|  2.00k|        sum5_ptrs[3] = sum5_rows[4];
 1128|  3.36k|    } else {
 1129|  3.36k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1130|  3.36k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1131|  3.36k|        sumsq5_ptrs[2] = sumsq5_rows[0];
 1132|  3.36k|        sumsq5_ptrs[3] = sumsq5_rows[0];
 1133|  3.36k|        sumsq5_ptrs[4] = sumsq5_rows[0];
 1134|  3.36k|        sum5_ptrs[0] = sum5_rows[0];
 1135|  3.36k|        sum5_ptrs[1] = sum5_rows[0];
 1136|  3.36k|        sum5_ptrs[2] = sum5_rows[0];
 1137|  3.36k|        sum5_ptrs[3] = sum5_rows[0];
 1138|  3.36k|        sum5_ptrs[4] = sum5_rows[0];
 1139|       |
 1140|  3.36k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1141|  3.36k|        sumsq3_ptrs[1] = sumsq3_rows[0];
 1142|  3.36k|        sumsq3_ptrs[2] = sumsq3_rows[0];
 1143|  3.36k|        sum3_ptrs[0] = sum3_rows[0];
 1144|  3.36k|        sum3_ptrs[1] = sum3_rows[0];
 1145|  3.36k|        sum3_ptrs[2] = sum3_rows[0];
 1146|       |
 1147|  3.36k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1148|  3.36k|                        sumsq5_rows[0], sum5_rows[0],
 1149|  3.36k|                        left, src, w, edges);
 1150|  3.36k|        left++;
 1151|  3.36k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  3.36k|#define PXSTRIDE(x) (x)
  ------------------
 1152|       |
 1153|  3.36k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1154|  3.36k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.36k|#define BITDEPTH_MAX 0xff
  ------------------
 1155|  3.36k|        rotate(A3_ptrs, B3_ptrs, 4);
 1156|       |
 1157|  3.36k|        if (--h <= 0)
  ------------------
  |  Branch (1157:13): [True: 401, False: 2.96k]
  ------------------
 1158|    401|            goto vert_1;
 1159|       |
 1160|  2.96k|        sumsq5_ptrs[4] = sumsq5_rows[1];
 1161|  2.96k|        sum5_ptrs[4] = sum5_rows[1];
 1162|       |
 1163|  2.96k|        sumsq3_ptrs[2] = sumsq3_rows[1];
 1164|  2.96k|        sum3_ptrs[2] = sum3_rows[1];
 1165|       |
 1166|  2.96k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1167|  2.96k|                        sumsq5_rows[1], sum5_rows[1],
 1168|  2.96k|                        left, src, w, edges);
 1169|  2.96k|        left++;
 1170|  2.96k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.96k|#define PXSTRIDE(x) (x)
  ------------------
 1171|       |
 1172|  2.96k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1173|  2.96k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.96k|#define BITDEPTH_MAX 0xff
  ------------------
 1174|  2.96k|        rotate(A5_ptrs, B5_ptrs, 2);
 1175|  2.96k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1176|  2.96k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.96k|#define BITDEPTH_MAX 0xff
  ------------------
 1177|  2.96k|        rotate(A3_ptrs, B3_ptrs, 4);
 1178|       |
 1179|  2.96k|        if (--h <= 0)
  ------------------
  |  Branch (1179:13): [True: 665, False: 2.29k]
  ------------------
 1180|    665|            goto vert_2;
 1181|       |
 1182|  2.29k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1183|  2.29k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1184|  2.29k|        sum5_ptrs[3] = sum5_rows[2];
 1185|  2.29k|        sum5_ptrs[4] = sum5_rows[3];
 1186|       |
 1187|  2.29k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1188|  2.29k|        sum3_ptrs[2] = sum3_rows[2];
 1189|       |
 1190|  2.29k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1191|  2.29k|                        sumsq5_rows[2], sum5_rows[2],
 1192|  2.29k|                        left, src, w, edges);
 1193|  2.29k|        left++;
 1194|  2.29k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.29k|#define PXSTRIDE(x) (x)
  ------------------
 1195|       |
 1196|  2.29k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1197|  2.29k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.29k|#define BITDEPTH_MAX 0xff
  ------------------
 1198|  2.29k|        rotate(A3_ptrs, B3_ptrs, 4);
 1199|       |
 1200|  2.29k|        if (--h <= 0)
  ------------------
  |  Branch (1200:13): [True: 374, False: 1.92k]
  ------------------
 1201|    374|            goto odd;
 1202|       |
 1203|  1.92k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1204|  1.92k|                        sumsq5_rows[3], sum5_rows[3],
 1205|  1.92k|                        left, src, w, edges);
 1206|  1.92k|        left++;
 1207|  1.92k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.92k|#define PXSTRIDE(x) (x)
  ------------------
 1208|       |
 1209|  1.92k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1210|  1.92k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.92k|#define BITDEPTH_MAX 0xff
  ------------------
 1211|  1.92k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1212|  1.92k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.92k|#define BITDEPTH_MAX 0xff
  ------------------
 1213|  1.92k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1214|  1.92k|                       w, 2, params->sgr.w0, params->sgr.w1
 1215|  1.92k|                       HIGHBD_TAIL_SUFFIX);
 1216|       |
 1217|  1.92k|        if (--h <= 0)
  ------------------
  |  Branch (1217:13): [True: 280, False: 1.64k]
  ------------------
 1218|    280|            goto vert_2;
 1219|       |
 1220|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1221|       |        // one of them to point at the previously unused rows[4].
 1222|  1.64k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1223|  1.64k|        sum5_ptrs[3] = sum5_rows[4];
 1224|  1.64k|    }
 1225|       |
 1226|  87.8k|    do {
 1227|  87.8k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1228|  87.8k|                        sumsq5_ptrs[3], sum5_ptrs[3],
 1229|  87.8k|                        left, src, w, edges);
 1230|  87.8k|        left++;
 1231|  87.8k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  87.8k|#define PXSTRIDE(x) (x)
  ------------------
 1232|       |
 1233|  87.8k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1234|  87.8k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  87.8k|#define BITDEPTH_MAX 0xff
  ------------------
 1235|  87.8k|        rotate(A3_ptrs, B3_ptrs, 4);
 1236|       |
 1237|  87.8k|        if (--h <= 0)
  ------------------
  |  Branch (1237:13): [True: 295, False: 87.5k]
  ------------------
 1238|    295|            goto odd;
 1239|       |
 1240|  87.5k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1241|  87.5k|                        sumsq5_ptrs[4], sum5_ptrs[4],
 1242|  87.5k|                        left, src, w, edges);
 1243|  87.5k|        left++;
 1244|  87.5k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  87.5k|#define PXSTRIDE(x) (x)
  ------------------
 1245|       |
 1246|  87.5k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1247|  87.5k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  87.5k|#define BITDEPTH_MAX 0xff
  ------------------
 1248|  87.5k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1249|  87.5k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  87.5k|#define BITDEPTH_MAX 0xff
  ------------------
 1250|  87.5k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1251|  87.5k|                       w, 2, params->sgr.w0, params->sgr.w1
 1252|  87.5k|                       HIGHBD_TAIL_SUFFIX);
 1253|  87.5k|    } while (--h > 0);
  ------------------
  |  Branch (1253:14): [True: 84.2k, False: 3.35k]
  ------------------
 1254|       |
 1255|  3.35k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (1255:9): [True: 276, False: 3.07k]
  ------------------
 1256|    276|        goto vert_2;
 1257|       |
 1258|  3.07k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1259|  3.07k|                    sumsq5_ptrs[3], sum5_ptrs[3],
 1260|  3.07k|                    NULL, lpf_bottom, w, edges);
 1261|  3.07k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  3.07k|#define PXSTRIDE(x) (x)
  ------------------
 1262|  3.07k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1263|  3.07k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.07k|#define BITDEPTH_MAX 0xff
  ------------------
 1264|  3.07k|    rotate(A3_ptrs, B3_ptrs, 4);
 1265|       |
 1266|  3.07k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1267|  3.07k|                    sumsq5_ptrs[4], sum5_ptrs[4],
 1268|  3.07k|                    NULL, lpf_bottom, w, edges);
 1269|       |
 1270|  4.59k|output_2:
 1271|  4.59k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1272|  4.59k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.59k|#define BITDEPTH_MAX 0xff
  ------------------
 1273|  4.59k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1274|  4.59k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.59k|#define BITDEPTH_MAX 0xff
  ------------------
 1275|  4.59k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1276|  4.59k|                   w, 2, params->sgr.w0, params->sgr.w1
 1277|  4.59k|                   HIGHBD_TAIL_SUFFIX);
 1278|  4.59k|    return;
 1279|       |
 1280|  1.51k|vert_2:
 1281|       |    // Duplicate the last row twice more
 1282|  1.51k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1283|  1.51k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1284|  1.51k|    sum5_ptrs[3] = sum5_ptrs[2];
 1285|  1.51k|    sum5_ptrs[4] = sum5_ptrs[2];
 1286|       |
 1287|  1.51k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1288|  1.51k|    sum3_ptrs[2] = sum3_ptrs[1];
 1289|  1.51k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1290|  1.51k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.51k|#define BITDEPTH_MAX 0xff
  ------------------
 1291|  1.51k|    rotate(A3_ptrs, B3_ptrs, 4);
 1292|       |
 1293|  1.51k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1294|  1.51k|    sum3_ptrs[2] = sum3_ptrs[1];
 1295|       |
 1296|  1.51k|    goto output_2;
 1297|       |
 1298|    669|odd:
 1299|       |    // Copy the last row as padding once
 1300|    669|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1301|    669|    sum5_ptrs[4] = sum5_ptrs[3];
 1302|       |
 1303|    669|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1304|    669|    sum3_ptrs[2] = sum3_ptrs[1];
 1305|       |
 1306|    669|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1307|    669|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    669|#define BITDEPTH_MAX 0xff
  ------------------
 1308|    669|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1309|    669|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    669|#define BITDEPTH_MAX 0xff
  ------------------
 1310|    669|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1311|    669|                   w, 2, params->sgr.w0, params->sgr.w1
 1312|    669|                   HIGHBD_TAIL_SUFFIX);
 1313|       |
 1314|  1.73k|output_1:
 1315|       |    // Duplicate the last row twice more
 1316|  1.73k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1317|  1.73k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1318|  1.73k|    sum5_ptrs[3] = sum5_ptrs[2];
 1319|  1.73k|    sum5_ptrs[4] = sum5_ptrs[2];
 1320|       |
 1321|  1.73k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1322|  1.73k|    sum3_ptrs[2] = sum3_ptrs[1];
 1323|       |
 1324|  1.73k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1325|  1.73k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.73k|#define BITDEPTH_MAX 0xff
  ------------------
 1326|  1.73k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1327|  1.73k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.73k|#define BITDEPTH_MAX 0xff
  ------------------
 1328|  1.73k|    rotate(A3_ptrs, B3_ptrs, 4);
 1329|       |    // Output only one row
 1330|  1.73k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1331|  1.73k|                   w, 1, params->sgr.w0, params->sgr.w1
 1332|  1.73k|                   HIGHBD_TAIL_SUFFIX);
 1333|  1.73k|    return;
 1334|       |
 1335|  1.06k|vert_1:
 1336|       |    // Copy the last row as padding once
 1337|  1.06k|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1338|  1.06k|    sum5_ptrs[4] = sum5_ptrs[3];
 1339|       |
 1340|  1.06k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1341|  1.06k|    sum3_ptrs[2] = sum3_ptrs[1];
 1342|       |
 1343|  1.06k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1344|  1.06k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.06k|#define BITDEPTH_MAX 0xff
  ------------------
 1345|  1.06k|    rotate(A5_ptrs, B5_ptrs, 2);
 1346|  1.06k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1347|  1.06k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.06k|#define BITDEPTH_MAX 0xff
  ------------------
 1348|  1.06k|    rotate(A3_ptrs, B3_ptrs, 4);
 1349|       |
 1350|  1.06k|    goto output_1;
 1351|    669|}
looprestoration_tmpl.c:sgr_box35_row_h:
  464|   203k|{
  465|   203k|    sgr_box3_row_h(sumsq3, sum3, left, src, w, edges);
  466|   203k|    sgr_box5_row_h(sumsq5, sum5, left, src, w, edges);
  467|   203k|}
looprestoration_tmpl.c:sgr_finish_mix:
  663|  96.4k|{
  664|  96.4k|    ALIGN_STK_16(coef, tmp5, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  96.4k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  96.4k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  665|  96.4k|    ALIGN_STK_16(coef, tmp3, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  96.4k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  96.4k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  666|       |
  667|  96.4k|    sgr_finish_filter2(tmp5, *dst, stride, A5_ptrs, B5_ptrs, w, h);
  668|  96.4k|    sgr_finish_filter_row1(tmp3, *dst, A3_ptrs, B3_ptrs, w);
  669|  96.4k|    if (h > 1)
  ------------------
  |  Branch (669:9): [True: 94.7k, False: 1.73k]
  ------------------
  670|  94.7k|        sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |  572|  94.7k|#define FILTER_OUT_STRIDE (384)
  ------------------
                      sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |   53|  94.7k|#define PXSTRIDE(x) (x)
  ------------------
  671|  94.7k|                               &A3_ptrs[1], &B3_ptrs[1], w);
  672|  96.4k|    sgr_weighted2(*dst, stride, tmp5, tmp3, w, h, w0, w1 HIGHBD_TAIL_SUFFIX);
  673|  96.4k|    *dst += h*PXSTRIDE(stride);
  ------------------
  |  |   53|  96.4k|#define PXSTRIDE(x) (x)
  ------------------
  674|  96.4k|    rotate(A5_ptrs, B5_ptrs, 2);
  675|  96.4k|    rotate(A3_ptrs, B3_ptrs, 4);
  676|  96.4k|}
looprestoration_tmpl.c:sgr_weighted2:
  616|  96.4k|{
  617|   287k|    for (int j = 0; j < h; j++) {
  ------------------
  |  Branch (617:21): [True: 191k, False: 96.4k]
  ------------------
  618|  21.2M|        for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (618:25): [True: 21.0M, False: 191k]
  ------------------
  619|  21.0M|            const int v = w0 * t1[i] + w1 * t2[i];
  620|  21.0M|            dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  21.0M|#define iclip_pixel iclip_u8
  ------------------
  621|  21.0M|        }
  622|   191k|        dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   191k|#define PXSTRIDE(x) (x)
  ------------------
  623|   191k|        t1 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   191k|#define FILTER_OUT_STRIDE (384)
  ------------------
  624|   191k|        t2 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   191k|#define FILTER_OUT_STRIDE (384)
  ------------------
  625|   191k|    }
  626|  96.4k|}
dav1d_loop_restoration_dsp_init_16bpc:
 1367|  4.61k|{
 1368|  4.61k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  4.61k|    c->sgr[0] = sgr_5x5_c;
 1370|  4.61k|    c->sgr[1] = sgr_3x3_c;
 1371|  4.61k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  4.61k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  4.61k|#endif
 1383|  4.61k|#endif
 1384|  4.61k|}

dav1d_lr_sbrow_8bpc:
  170|  19.1k|{
  171|  19.1k|    const int offset_y = 8 * !!sby;
  172|  19.1k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|  19.1k|    const int restore_planes = f->lf.restore_planes;
  174|  19.1k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|  19.1k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 15.8k, False: 3.33k]
  ------------------
  177|  15.8k|        const int h = f->sr_cur.p.p.h;
  178|  15.8k|        const int w = f->sr_cur.p.p.w;
  179|  15.8k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|  15.8k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|  15.8k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|  15.8k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  ------------------
  |  |   53|  15.8k|#define PXSTRIDE(x) (x)
  ------------------
  183|  15.8k|                 h, row_h, 0);
  184|  15.8k|    }
  185|  19.1k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 5.92k, False: 13.2k]
  ------------------
  186|  5.92k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  5.92k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  5.92k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  5.92k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  5.92k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  5.92k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  5.92k|        const int offset_uv = offset_y >> ss_ver;
  193|  5.92k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  5.92k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 1.92k, False: 3.99k]
  ------------------
  195|  1.92k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  1.92k|#define PXSTRIDE(x) (x)
  ------------------
  196|  1.92k|                     w, h, row_h, 1);
  197|       |
  198|  5.92k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 5.02k, False: 891]
  ------------------
  199|  5.02k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  5.02k|#define PXSTRIDE(x) (x)
  ------------------
  200|  5.02k|                     w, h, row_h, 2);
  201|  5.92k|    }
  202|  19.1k|}
lr_apply_tmpl.c:lr_sbrow:
  109|  42.1k|{
  110|  42.1k|    const int chroma = !!plane;
  111|  42.1k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
  112|  42.1k|    const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
  113|  42.1k|    const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
  114|       |
  115|  42.1k|    const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!plane];
  116|  42.1k|    const int unit_size = 1 << unit_size_log2;
  117|  42.1k|    const int half_unit_size = unit_size >> 1;
  118|  42.1k|    const int max_unit_size = unit_size + half_unit_size;
  119|       |
  120|       |    // Y coordinate of the sbrow (y is 8 luma pixel rows above row_y)
  121|  42.1k|    const int row_y = y + ((8 >> ss_ver) * !!y);
  122|       |
  123|       |    // FIXME This is an ugly hack to lookup the proper AV1Filter unit for
  124|       |    // chroma planes. Question: For Multithreaded decoding, is it better
  125|       |    // to store the chroma LR information with collocated Luma information?
  126|       |    // In other words. For a chroma restoration unit locate at 128,128 and
  127|       |    // with a 4:2:0 chroma subsampling, do we store the filter information at
  128|       |    // the AV1Filter unit located at (128,128) or (256,256)
  129|       |    // TODO Support chroma subsampling.
  130|  42.1k|    const int shift_hor = 7 - ss_hor;
  131|       |
  132|       |    /* maximum sbrow height is 128 + 8 rows offset */
  133|  42.1k|    ALIGN_STK_16(pixel, pre_lr_border, 2, [128 + 8][4]);
  ------------------
  |  |  100|  42.1k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  42.1k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  134|  42.1k|    const Av1RestorationUnit *lr[2];
  135|       |
  136|  42.1k|    enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) | LR_HAVE_RIGHT;
  ------------------
  |  Branch (136:31): [True: 28.1k, False: 13.9k]
  ------------------
  137|       |
  138|  42.1k|    int aligned_unit_pos = row_y & ~(unit_size - 1);
  139|  42.1k|    if (aligned_unit_pos && aligned_unit_pos + half_unit_size > h)
  ------------------
  |  Branch (139:9): [True: 26.6k, False: 15.5k]
  |  Branch (139:29): [True: 573, False: 26.0k]
  ------------------
  140|    573|        aligned_unit_pos -= unit_size;
  141|  42.1k|    aligned_unit_pos <<= ss_ver;
  142|  42.1k|    const int sb_idx = (aligned_unit_pos >> 7) * f->sr_sb128w;
  143|  42.1k|    const int unit_idx = ((aligned_unit_pos >> 6) & 1) << 1;
  144|  42.1k|    lr[0] = &f->lf.lr_mask[sb_idx].lr[plane][unit_idx];
  145|  42.1k|    int restore = lr[0]->type != DAV1D_RESTORATION_NONE;
  146|  42.1k|    int x = 0, bit = 0;
  147|  64.4k|    for (; x + max_unit_size <= w; p += unit_size, edges |= LR_HAVE_LEFT, bit ^= 1) {
  ------------------
  |  Branch (147:12): [True: 22.2k, False: 42.1k]
  ------------------
  148|  22.2k|        const int next_x = x + unit_size;
  149|  22.2k|        const int next_u_idx = unit_idx + ((next_x >> (shift_hor - 1)) & 1);
  150|  22.2k|        lr[!bit] =
  151|  22.2k|            &f->lf.lr_mask[sb_idx + (next_x >> shift_hor)].lr[plane][next_u_idx];
  152|  22.2k|        const int restore_next = lr[!bit]->type != DAV1D_RESTORATION_NONE;
  153|  22.2k|        if (restore_next)
  ------------------
  |  Branch (153:13): [True: 12.3k, False: 9.89k]
  ------------------
  154|  12.3k|            backup4xU(pre_lr_border[bit], p + unit_size - 4, p_stride, row_h - y);
  155|  22.2k|        if (restore)
  ------------------
  |  Branch (155:13): [True: 12.1k, False: 10.0k]
  ------------------
  156|  12.1k|            lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_size, row_h,
  157|  12.1k|                      lr[bit], edges);
  158|  22.2k|        x = next_x;
  159|  22.2k|        restore = restore_next;
  160|  22.2k|    }
  161|  42.1k|    if (restore) {
  ------------------
  |  Branch (161:9): [True: 6.98k, False: 35.1k]
  ------------------
  162|  6.98k|        edges &= ~LR_HAVE_RIGHT;
  163|  6.98k|        const int unit_w = w - x;
  164|  6.98k|        lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_w, row_h, lr[bit], edges);
  165|  6.98k|    }
  166|  42.1k|}
lr_apply_tmpl.c:backup4xU:
  102|  12.3k|{
  103|   750k|    for (; u > 0; u--, dst++, src += PXSTRIDE(src_stride))
  ------------------
  |  |   53|   737k|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (103:12): [True: 737k, False: 12.3k]
  ------------------
  104|   737k|        pixel_copy(dst, src, 4);
  ------------------
  |  |   47|   737k|#define pixel_copy memcpy
  ------------------
  105|  12.3k|}
lr_apply_tmpl.c:lr_stripe:
   40|  19.1k|{
   41|  19.1k|    const Dav1dDSPContext *const dsp = f->dsp;
   42|  19.1k|    const int chroma = !!plane;
   43|  19.1k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
   44|  19.1k|    const ptrdiff_t stride = f->sr_cur.p.stride[chroma];
   45|  19.1k|    const int sby = (y + (y ? 8 << ss_ver : 0)) >> (6 - ss_ver + f->seq_hdr->sb128);
  ------------------
  |  Branch (45:27): [True: 7.70k, False: 11.4k]
  ------------------
   46|  19.1k|    const int have_tt = f->c->n_tc > 1;
   47|  19.1k|    const pixel *lpf = f->lf.lr_lpf_line[plane] +
   48|  19.1k|        have_tt * (sby * (4 << f->seq_hdr->sb128) - 4) * PXSTRIDE(stride) + x;
  ------------------
  |  |   53|  19.1k|#define PXSTRIDE(x) (x)
  ------------------
   49|       |
   50|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   51|  19.1k|    int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);
   52|       |
   53|  19.1k|    looprestorationfilter_fn lr_fn;
   54|  19.1k|    LooprestorationParams params;
   55|  19.1k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (55:9): [True: 4.44k, False: 14.7k]
  ------------------
   56|  4.44k|        int16_t (*const filter)[8] = params.filter;
   57|  4.44k|        filter[0][0] = filter[0][6] = lr->filter_h[0];
   58|  4.44k|        filter[0][1] = filter[0][5] = lr->filter_h[1];
   59|  4.44k|        filter[0][2] = filter[0][4] = lr->filter_h[2];
   60|  4.44k|        filter[0][3] = -(filter[0][0] + filter[0][1] + filter[0][2]) * 2;
   61|       |#if BITDEPTH != 8
   62|       |        /* For 8-bit SIMD it's beneficial to handle the +128 separately
   63|       |         * in order to avoid overflows. */
   64|       |        filter[0][3] += 128;
   65|       |#endif
   66|       |
   67|  4.44k|        filter[1][0] = filter[1][6] = lr->filter_v[0];
   68|  4.44k|        filter[1][1] = filter[1][5] = lr->filter_v[1];
   69|  4.44k|        filter[1][2] = filter[1][4] = lr->filter_v[2];
   70|  4.44k|        filter[1][3] = 128 - (filter[1][0] + filter[1][1] + filter[1][2]) * 2;
   71|       |
   72|  4.44k|        lr_fn = dsp->lr.wiener[!(filter[0][0] | filter[1][0])];
   73|  14.7k|    } else {
   74|  14.7k|        assert(lr->type >= DAV1D_RESTORATION_SGRPROJ);
  ------------------
  |  Branch (74:9): [True: 14.7k, False: 0]
  ------------------
   75|  14.7k|        const int sgr_idx = lr->type - DAV1D_RESTORATION_SGRPROJ;
   76|  14.7k|        const uint16_t *const sgr_params = dav1d_sgr_params[sgr_idx];
   77|  14.7k|        params.sgr.s0 = sgr_params[0];
   78|  14.7k|        params.sgr.s1 = sgr_params[1];
   79|  14.7k|        params.sgr.w0 = lr->sgr_weights[0];
   80|  14.7k|        params.sgr.w1 = 128 - (lr->sgr_weights[0] + lr->sgr_weights[1]);
   81|       |
   82|  14.7k|        lr_fn = dsp->lr.sgr[!!sgr_params[0] + !!sgr_params[1] * 2 - 1];
   83|  14.7k|    }
   84|       |
   85|  29.9k|    while (y + stripe_h <= row_h) {
  ------------------
  |  Branch (85:12): [True: 29.9k, False: 0]
  ------------------
   86|       |        // Change the HAVE_BOTTOM bit in edges to (sby + 1 != f->sbh || y + stripe_h != row_h)
   87|  29.9k|        edges ^= (-(sby + 1 != f->sbh || y + stripe_h != row_h) ^ edges) & LR_HAVE_BOTTOM;
  ------------------
  |  Branch (87:21): [True: 12.6k, False: 17.3k]
  |  Branch (87:42): [True: 6.35k, False: 10.9k]
  ------------------
   88|  29.9k|        lr_fn(p, stride, left, lpf, unit_w, stripe_h, &params, edges HIGHBD_CALL_SUFFIX);
   89|       |
   90|  29.9k|        left += stripe_h;
   91|  29.9k|        y += stripe_h;
   92|  29.9k|        p += stripe_h * PXSTRIDE(stride);
  ------------------
  |  |   53|  29.9k|#define PXSTRIDE(x) (x)
  ------------------
   93|  29.9k|        edges |= LR_HAVE_TOP;
   94|  29.9k|        stripe_h = imin(64 >> ss_ver, row_h - y);
   95|  29.9k|        if (stripe_h == 0) break;
  ------------------
  |  Branch (95:13): [True: 19.1k, False: 10.7k]
  ------------------
   96|  10.7k|        lpf += 4 * PXSTRIDE(stride);
  ------------------
  |  |   53|  10.7k|#define PXSTRIDE(x) (x)
  ------------------
   97|  10.7k|    }
   98|  19.1k|}
dav1d_lr_sbrow_16bpc:
  170|  15.1k|{
  171|  15.1k|    const int offset_y = 8 * !!sby;
  172|  15.1k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|  15.1k|    const int restore_planes = f->lf.restore_planes;
  174|  15.1k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|  15.1k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 13.5k, False: 1.54k]
  ------------------
  177|  13.5k|        const int h = f->sr_cur.p.p.h;
  178|  13.5k|        const int w = f->sr_cur.p.p.w;
  179|  13.5k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|  13.5k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|  13.5k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|  13.5k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  183|  13.5k|                 h, row_h, 0);
  184|  13.5k|    }
  185|  15.1k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 4.20k, False: 10.9k]
  ------------------
  186|  4.20k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  4.20k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  4.20k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  4.20k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  4.20k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  4.20k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  4.20k|        const int offset_uv = offset_y >> ss_ver;
  193|  4.20k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  4.20k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 2.70k, False: 1.50k]
  ------------------
  195|  2.70k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  196|  2.70k|                     w, h, row_h, 1);
  197|       |
  198|  4.20k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 3.05k, False: 1.15k]
  ------------------
  199|  3.05k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  200|  3.05k|                     w, h, row_h, 2);
  201|  4.20k|    }
  202|  15.1k|}

dav1d_mc_dsp_init_8bpc:
  960|  3.41k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  3.41k|#define init_mc_fns(type, name) do { \
  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  3.41k|} while (0)
  967|       |
  968|  3.41k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  969|  3.41k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  970|  3.41k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  971|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  972|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  973|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  974|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  975|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  976|  3.41k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  977|  3.41k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  3.41k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.41k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.41k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.41k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.41k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.41k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.41k]
  |  |  ------------------
  ------------------
  978|       |
  979|  3.41k|    c->avg      = avg_c;
  980|  3.41k|    c->w_avg    = w_avg_c;
  981|  3.41k|    c->mask     = mask_c;
  982|  3.41k|    c->blend    = blend_c;
  983|  3.41k|    c->blend_v  = blend_v_c;
  984|  3.41k|    c->blend_h  = blend_h_c;
  985|  3.41k|    c->w_mask[0] = w_mask_444_c;
  986|  3.41k|    c->w_mask[1] = w_mask_422_c;
  987|  3.41k|    c->w_mask[2] = w_mask_420_c;
  988|  3.41k|    c->warp8x8  = warp_affine_8x8_c;
  989|  3.41k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  3.41k|    c->emu_edge = emu_edge_c;
  991|  3.41k|    c->resize   = resize_c;
  992|       |
  993|  3.41k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  3.41k|#endif
 1005|  3.41k|#endif
 1006|  3.41k|}
dav1d_mc_dsp_init_16bpc:
  960|  4.61k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  4.61k|#define init_mc_fns(type, name) do { \
  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  4.61k|} while (0)
  967|       |
  968|  4.61k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  969|  4.61k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  970|  4.61k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  971|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  972|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  973|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  974|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  975|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  976|  4.61k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  977|  4.61k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  4.61k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.61k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.61k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.61k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.61k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.61k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.61k]
  |  |  ------------------
  ------------------
  978|       |
  979|  4.61k|    c->avg      = avg_c;
  980|  4.61k|    c->w_avg    = w_avg_c;
  981|  4.61k|    c->mask     = mask_c;
  982|  4.61k|    c->blend    = blend_c;
  983|  4.61k|    c->blend_v  = blend_v_c;
  984|  4.61k|    c->blend_h  = blend_h_c;
  985|  4.61k|    c->w_mask[0] = w_mask_444_c;
  986|  4.61k|    c->w_mask[1] = w_mask_422_c;
  987|  4.61k|    c->w_mask[2] = w_mask_420_c;
  988|  4.61k|    c->warp8x8  = warp_affine_8x8_c;
  989|  4.61k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  4.61k|    c->emu_edge = emu_edge_c;
  991|  4.61k|    c->resize   = resize_c;
  992|       |
  993|  4.61k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  4.61k|#endif
 1005|  4.61k|#endif
 1006|  4.61k|}

dav1d_mem_pool_push:
  224|   212k|void dav1d_mem_pool_push(Dav1dMemPool *const pool, void *const ptr) {
  225|   212k|    pthread_mutex_lock(&pool->lock);
  226|   212k|    Dav1dMemPoolBuffer *const buf = (Dav1dMemPoolBuffer*)((uintptr_t)ptr - 64);
  227|   212k|    const int ref_cnt = --pool->ref_cnt;
  228|   212k|    if (!pool->end) {
  ------------------
  |  Branch (228:9): [True: 211k, False: 952]
  ------------------
  229|   211k|        buf->next = pool->buf;
  230|   211k|        pool->buf = buf;
  231|   211k|        pthread_mutex_unlock(&pool->lock);
  232|   211k|        assert(ref_cnt > 0);
  ------------------
  |  Branch (232:9): [True: 211k, False: 0]
  ------------------
  233|   211k|    } else {
  234|    952|        pthread_mutex_unlock(&pool->lock);
  235|    952|        dav1d_free_aligned(buf);
  ------------------
  |  |  136|    952|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  236|    952|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (236:13): [True: 952, False: 0]
  ------------------
  237|    952|    }
  238|   212k|}
dav1d_mem_pool_pop:
  240|   212k|void *dav1d_mem_pool_pop(Dav1dMemPool *const pool, const size_t size) {
  241|   212k|    pthread_mutex_lock(&pool->lock);
  242|   212k|    Dav1dMemPoolBuffer *buf = pool->buf;
  243|   212k|    pool->ref_cnt++;
  244|       |
  245|   212k|    if (buf) {
  ------------------
  |  Branch (245:9): [True: 153k, False: 59.4k]
  ------------------
  246|   153k|        pool->buf = buf->next;
  247|   153k|        pthread_mutex_unlock(&pool->lock);
  248|   153k|        if (buf->size != size) {
  ------------------
  |  Branch (248:13): [True: 2.96k, False: 150k]
  ------------------
  249|       |            /* Reallocate if the size has changed */
  250|  2.96k|            dav1d_free_aligned(buf);
  ------------------
  |  |  136|  2.96k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  251|  2.96k|            goto alloc;
  252|  2.96k|        }
  253|       |#if TRACK_HEAP_ALLOCATIONS
  254|       |        dav1d_track_reuse(pool->type);
  255|       |#endif
  256|   153k|    } else {
  257|  59.4k|        pthread_mutex_unlock(&pool->lock);
  258|  62.3k|alloc:
  259|  62.3k|        buf = dav1d_alloc_aligned(pool->type, size + 64, 64);
  ------------------
  |  |  134|  62.3k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  260|  62.3k|        if (!buf) {
  ------------------
  |  Branch (260:13): [True: 0, False: 62.3k]
  ------------------
  261|      0|            pthread_mutex_lock(&pool->lock);
  262|      0|            const int ref_cnt = --pool->ref_cnt;
  263|      0|            pthread_mutex_unlock(&pool->lock);
  264|      0|            if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (264:17): [True: 0, False: 0]
  ------------------
  265|      0|            return NULL;
  266|      0|        }
  267|  62.3k|        buf->size = size;
  268|  62.3k|    }
  269|       |
  270|   212k|    return (void*)((uintptr_t)buf + 64);
  271|   212k|}
dav1d_mem_pool_init:
  275|  66.6k|{
  276|  66.6k|    Dav1dMemPool *const pool = dav1d_malloc(ALLOC_COMMON_CTX,
  ------------------
  |  |  132|  66.6k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  277|  66.6k|                                            sizeof(Dav1dMemPool));
  278|  66.6k|    if (pool) {
  ------------------
  |  Branch (278:9): [True: 66.6k, False: 0]
  ------------------
  279|  66.6k|        if (!pthread_mutex_init(&pool->lock, NULL)) {
  ------------------
  |  Branch (279:13): [True: 66.6k, False: 0]
  ------------------
  280|  66.6k|            pool->buf = NULL;
  281|  66.6k|            pool->ref_cnt = 1;
  282|  66.6k|            pool->end = 0;
  283|       |#if TRACK_HEAP_ALLOCATIONS
  284|       |            pool->type = type;
  285|       |#endif
  286|  66.6k|            *ppool = pool;
  287|  66.6k|            return 0;
  288|  66.6k|        }
  289|      0|        dav1d_free(pool);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  290|      0|    }
  291|      0|    *ppool = NULL;
  292|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  293|  66.6k|}
dav1d_mem_pool_end:
  295|  66.6k|COLD void dav1d_mem_pool_end(Dav1dMemPool *const pool) {
  296|  66.6k|    if (pool) {
  ------------------
  |  Branch (296:9): [True: 66.6k, False: 0]
  ------------------
  297|  66.6k|        pthread_mutex_lock(&pool->lock);
  298|  66.6k|        Dav1dMemPoolBuffer *buf = pool->buf;
  299|  66.6k|        const int ref_cnt = --pool->ref_cnt;
  300|  66.6k|        pool->buf = NULL;
  301|  66.6k|        pool->end = 1;
  302|  66.6k|        pthread_mutex_unlock(&pool->lock);
  303|       |
  304|   125k|        while (buf) {
  ------------------
  |  Branch (304:16): [True: 58.4k, False: 66.6k]
  ------------------
  305|  58.4k|            void *const ptr = buf;
  306|  58.4k|            buf = buf->next;
  307|  58.4k|            dav1d_free_aligned(ptr);
  ------------------
  |  |  136|  58.4k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  308|  58.4k|        }
  309|  66.6k|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (309:13): [True: 65.6k, False: 952]
  ------------------
  310|  66.6k|    }
  311|  66.6k|}
mem.c:mem_pool_destroy:
  219|  66.6k|static COLD void mem_pool_destroy(Dav1dMemPool *const pool) {
  220|  66.6k|    pthread_mutex_destroy(&pool->lock);
  221|  66.6k|    dav1d_free(pool);
  ------------------
  |  |  135|  66.6k|#define dav1d_free(ptr) free(ptr)
  ------------------
  222|  66.6k|}

lib.c:dav1d_alloc_aligned_internal:
   89|  28.5k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  28.5k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 28.5k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  28.5k|    void *ptr;
   95|  28.5k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 28.5k]
  ------------------
   96|  28.5k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  28.5k|}
lib.c:dav1d_free_aligned_internal:
  113|  76.1k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  76.1k|}
lib.c:dav1d_freep_aligned:
  144|  9.51k|static inline void dav1d_freep_aligned(void *ptr) {
  145|  9.51k|    void **mem = (void **) ptr;
  146|  9.51k|    if (*mem) {
  ------------------
  |  Branch (146:9): [True: 9.51k, False: 0]
  ------------------
  147|  9.51k|        dav1d_free_aligned(*mem);
  ------------------
  |  |  136|  9.51k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  148|       |        *mem = NULL;
  149|  9.51k|    }
  150|  9.51k|}
mem.c:dav1d_free_aligned_internal:
  113|  62.3k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  62.3k|}
mem.c:dav1d_alloc_aligned_internal:
   89|  62.3k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  62.3k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 62.3k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  62.3k|    void *ptr;
   95|  62.3k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 62.3k]
  ------------------
   96|  62.3k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  62.3k|}
ref.c:dav1d_alloc_aligned_internal:
   89|  72.9k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  72.9k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 72.9k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  72.9k|    void *ptr;
   95|  72.9k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 72.9k]
  ------------------
   96|  72.9k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  72.9k|}
ref.c:dav1d_free_aligned_internal:
  113|  72.9k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  72.9k|}
refmvs.c:dav1d_free_aligned_internal:
  113|  6.56k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  6.56k|}
refmvs.c:dav1d_alloc_aligned_internal:
   89|  6.56k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  6.56k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 6.56k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  6.56k|    void *ptr;
   95|  6.56k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 6.56k]
  ------------------
   96|  6.56k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  6.56k|}
decode.c:dav1d_free_aligned_internal:
  113|  38.2k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  38.2k|}
decode.c:dav1d_alloc_aligned_internal:
   89|  38.2k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  38.2k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 38.2k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  38.2k|    void *ptr;
   95|  38.2k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 38.2k]
  ------------------
   96|  38.2k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  38.2k|}

dav1d_msac_decode_subexp:
   62|   131k|{
   63|   131k|    assert(n >> k == 8);
  ------------------
  |  Branch (63:5): [True: 131k, False: 0]
  ------------------
   64|       |
   65|   131k|    unsigned a = 0;
   66|   131k|    if (dav1d_msac_decode_bool_equi(s)) {
  ------------------
  |  |   53|   131k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (66:9): [True: 71.6k, False: 59.8k]
  ------------------
   67|  71.6k|        if (dav1d_msac_decode_bool_equi(s))
  ------------------
  |  |   53|  71.6k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (67:13): [True: 42.7k, False: 28.9k]
  ------------------
   68|  42.7k|            k += dav1d_msac_decode_bool_equi(s) + 1;
  ------------------
  |  |   53|  42.7k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   69|  71.6k|        a = 1 << k;
   70|  71.6k|    }
   71|   131k|    const unsigned v = dav1d_msac_decode_bools(s, k) + a;
   72|   131k|    return ref * 2 <= n ? inv_recenter(ref, v) :
  ------------------
  |  Branch (72:12): [True: 74.1k, False: 57.3k]
  ------------------
   73|   131k|                          n - 1 - inv_recenter(n - 1 - ref, v);
   74|   131k|}
dav1d_msac_init:
  206|  46.3k|{
  207|  46.3k|    s->buf_pos = data;
  208|  46.3k|    s->buf_end = data + sz;
  209|  46.3k|    s->dif = 0;
  210|  46.3k|    s->rng = 0x8000;
  211|  46.3k|    s->cnt = -15;
  212|  46.3k|    s->allow_update_cdf = !disable_cdf_update_flag;
  213|  46.3k|    ctx_refill(s);
  214|       |
  215|  46.3k|#if ARCH_X86_64 && HAVE_ASM
  216|  46.3k|    s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt_c;
  217|       |
  218|  46.3k|    msac_init_x86(s);
  219|  46.3k|#endif
  220|  46.3k|}
msac.c:ctx_refill:
   41|  46.3k|static inline void ctx_refill(MsacContext *const s) {
   42|  46.3k|    const uint8_t *buf_pos = s->buf_pos;
   43|  46.3k|    const uint8_t *buf_end = s->buf_end;
   44|  46.3k|    int c = EC_WIN_SIZE - s->cnt - 24;
  ------------------
  |  |   39|  46.3k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   45|  46.3k|    ec_win dif = s->dif;
   46|   160k|    do {
   47|   160k|        if (buf_pos >= buf_end) {
  ------------------
  |  Branch (47:13): [True: 38.7k, False: 121k]
  ------------------
   48|       |            // set remaining bits to 1;
   49|  38.7k|            dif |= ~(~(ec_win)0xff << c);
   50|  38.7k|            break;
   51|  38.7k|        }
   52|   121k|        dif |= (ec_win)(*buf_pos++ ^ 0xff) << c;
   53|   121k|        c -= 8;
   54|   121k|    } while (c >= 0);
  ------------------
  |  Branch (54:14): [True: 114k, False: 7.56k]
  ------------------
   55|  46.3k|    s->dif = dif;
   56|  46.3k|    s->cnt = EC_WIN_SIZE - c - 24;
  ------------------
  |  |   39|  46.3k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   57|  46.3k|    s->buf_pos = buf_pos;
   58|  46.3k|}

decode.c:dav1d_msac_decode_bools:
   94|   527k|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|   527k|    unsigned v = 0;
   96|  1.23M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 712k, False: 527k]
  ------------------
   97|   712k|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|   712k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|   527k|    return v;
   99|   527k|}
decode.c:dav1d_msac_decode_uniform:
  101|  90.7k|static inline int dav1d_msac_decode_uniform(MsacContext *const s, const unsigned n) {
  102|  90.7k|    assert(n > 0);
  ------------------
  |  Branch (102:5): [True: 90.7k, False: 0]
  ------------------
  103|  90.7k|    const int l = ulog2(n) + 1;
  104|  90.7k|    assert(l > 1);
  ------------------
  |  Branch (104:5): [True: 90.7k, False: 0]
  ------------------
  105|  90.7k|    const unsigned m = (1 << l) - n;
  106|  90.7k|    const unsigned v = dav1d_msac_decode_bools(s, l - 1);
  107|  90.7k|    return v < m ? v : (v << 1) - m + dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  23.9k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (107:12): [True: 66.7k, False: 23.9k]
  ------------------
  108|  90.7k|}
msac.c:dav1d_msac_decode_bools:
   94|   131k|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|   131k|    unsigned v = 0;
   96|   584k|    while (n--)
  ------------------
  |  Branch (96:12): [True: 452k, False: 131k]
  ------------------
   97|   452k|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|   452k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|   131k|    return v;
   99|   131k|}
recon_tmpl.c:dav1d_msac_decode_bools:
   94|  3.01M|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|  3.01M|    unsigned v = 0;
   96|  11.7M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 8.76M, False: 3.01M]
  ------------------
   97|  8.76M|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  8.76M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|  3.01M|    return v;
   99|  3.01M|}

dav1d_parse_sequence_header:
  304|  13.4k|{
  305|  13.4k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  13.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 13.4k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  306|  13.4k|    validate_input_or_ret(ptr != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  13.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 13.4k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  307|  13.4k|    validate_input_or_ret(sz > 0 && sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  26.8k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 13.4k, False: 0]
  |  |  |  Branch (52:11): [True: 13.4k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  308|       |
  309|  13.4k|    GetBits gb;
  310|  13.4k|    dav1d_init_get_bits(&gb, ptr, sz);
  311|  13.4k|    int res = DAV1D_ERR(ENOENT);
  ------------------
  |  |   58|  13.4k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  312|       |
  313|  27.4k|    do {
  314|  27.4k|        dav1d_get_bit(&gb); // obu_forbidden_bit
  315|  27.4k|        const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
  316|  27.4k|        const int has_extension = dav1d_get_bit(&gb);
  317|  27.4k|        const int has_length_field = dav1d_get_bit(&gb);
  318|  27.4k|        dav1d_get_bits(&gb, 1 + 8 * has_extension); // ignore
  319|       |
  320|  27.4k|        const uint8_t *obu_end = gb.ptr_end;
  321|  27.4k|        if (has_length_field) {
  ------------------
  |  Branch (321:13): [True: 16.2k, False: 11.1k]
  ------------------
  322|  16.2k|            const size_t len = dav1d_get_uleb128(&gb);
  323|  16.2k|            if (len > (size_t)(obu_end - gb.ptr)) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    293|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (323:17): [True: 293, False: 15.9k]
  ------------------
  324|  15.9k|            obu_end = gb.ptr + len;
  325|  15.9k|        }
  326|       |
  327|  27.1k|        if (type == DAV1D_OBU_SEQ_HDR) {
  ------------------
  |  Branch (327:13): [True: 12.7k, False: 14.3k]
  ------------------
  328|  12.7k|            if ((res = parse_seq_hdr(out, &gb, 0)) < 0) return res;
  ------------------
  |  Branch (328:17): [True: 2.92k, False: 9.80k]
  ------------------
  329|  9.80k|            if (gb.ptr > obu_end) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    215|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (329:17): [True: 215, False: 9.58k]
  ------------------
  330|  9.58k|            dav1d_bytealign_get_bits(&gb);
  331|  9.58k|        }
  332|       |
  333|  23.9k|        if (gb.error) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    557|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (333:13): [True: 557, False: 23.4k]
  ------------------
  334|  23.9k|        assert(gb.state == 0 && gb.bits_left == 0);
  ------------------
  |  Branch (334:9): [True: 23.4k, False: 0]
  |  Branch (334:9): [True: 23.4k, False: 0]
  ------------------
  335|  23.4k|        gb.ptr = obu_end;
  336|  23.4k|    } while (gb.ptr < gb.ptr_end);
  ------------------
  |  Branch (336:14): [True: 13.9k, False: 9.45k]
  ------------------
  337|       |
  338|  9.45k|    return res;
  339|  13.4k|}
dav1d_parse_obus:
 1169|   105k|ptrdiff_t dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
 1170|   105k|    GetBits gb;
 1171|   105k|    int res;
 1172|       |
 1173|   105k|    dav1d_init_get_bits(&gb, in->data, in->sz);
 1174|       |
 1175|       |    // obu header
 1176|   105k|    const int obu_forbidden_bit = dav1d_get_bit(&gb);
 1177|   105k|    if (c->strict_std_compliance && obu_forbidden_bit) goto error;
  ------------------
  |  Branch (1177:9): [True: 0, False: 105k]
  |  Branch (1177:37): [True: 0, False: 0]
  ------------------
 1178|   105k|    const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
 1179|   105k|    const int has_extension = dav1d_get_bit(&gb);
 1180|   105k|    const int has_length_field = dav1d_get_bit(&gb);
 1181|   105k|    dav1d_get_bit(&gb); // reserved
 1182|       |
 1183|   105k|    int temporal_id = 0, spatial_id = 0;
 1184|   105k|    if (has_extension) {
  ------------------
  |  Branch (1184:9): [True: 4.53k, False: 101k]
  ------------------
 1185|  4.53k|        temporal_id = dav1d_get_bits(&gb, 3);
 1186|  4.53k|        spatial_id = dav1d_get_bits(&gb, 2);
 1187|  4.53k|        dav1d_get_bits(&gb, 3); // reserved
 1188|  4.53k|    }
 1189|       |
 1190|   105k|    if (has_length_field) {
  ------------------
  |  Branch (1190:9): [True: 39.1k, False: 66.6k]
  ------------------
 1191|  39.1k|        const size_t len = dav1d_get_uleb128(&gb);
 1192|  39.1k|        if (len > (size_t)(gb.ptr_end - gb.ptr)) goto error;
  ------------------
  |  Branch (1192:13): [True: 798, False: 38.3k]
  ------------------
 1193|  38.3k|        gb.ptr_end = gb.ptr + len;
 1194|  38.3k|    }
 1195|   105k|    if (gb.error) goto error;
  ------------------
  |  Branch (1195:9): [True: 352, False: 104k]
  ------------------
 1196|       |
 1197|       |    // We must have read a whole number of bytes at this point (1 byte
 1198|       |    // for the header and whole bytes at a time when reading the
 1199|       |    // leb128 length field).
 1200|   105k|    assert(gb.bits_left == 0);
  ------------------
  |  Branch (1200:5): [True: 104k, False: 0]
  ------------------
 1201|       |
 1202|       |    // skip obu not belonging to the selected temporal/spatial layer
 1203|   104k|    if (type != DAV1D_OBU_SEQ_HDR && type != DAV1D_OBU_TD &&
  ------------------
  |  Branch (1203:9): [True: 84.9k, False: 19.7k]
  |  Branch (1203:38): [True: 81.3k, False: 3.59k]
  ------------------
 1204|  81.3k|        has_extension && c->operating_point_idc != 0)
  ------------------
  |  Branch (1204:9): [True: 3.78k, False: 77.5k]
  |  Branch (1204:26): [True: 968, False: 2.82k]
  ------------------
 1205|    968|    {
 1206|    968|        const int in_temporal_layer = (c->operating_point_idc >> temporal_id) & 1;
 1207|    968|        const int in_spatial_layer = (c->operating_point_idc >> (spatial_id + 8)) & 1;
 1208|    968|        if (!in_temporal_layer || !in_spatial_layer)
  ------------------
  |  Branch (1208:13): [True: 351, False: 617]
  |  Branch (1208:35): [True: 223, False: 394]
  ------------------
 1209|    574|            return gb.ptr_end - gb.ptr_start;
 1210|    968|    }
 1211|       |
 1212|   104k|    switch (type) {
 1213|  19.7k|    case DAV1D_OBU_SEQ_HDR: {
  ------------------
  |  Branch (1213:5): [True: 19.7k, False: 84.4k]
  ------------------
 1214|  19.7k|        Dav1dRef *ref = dav1d_ref_create_using_pool(c->seq_hdr_pool,
 1215|  19.7k|                                                    sizeof(Dav1dSequenceHeader));
 1216|  19.7k|        if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1216:13): [True: 0, False: 19.7k]
  ------------------
 1217|  19.7k|        Dav1dSequenceHeader *seq_hdr = ref->data;
 1218|  19.7k|        if ((res = parse_seq_hdr(seq_hdr, &gb, c->strict_std_compliance)) < 0) {
  ------------------
  |  Branch (1218:13): [True: 1.77k, False: 17.9k]
  ------------------
 1219|  1.77k|            dav1d_log(c, "Error parsing sequence header\n");
  ------------------
  |  |   44|  1.77k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 1.77k]
  |  |  ------------------
  ------------------
 1220|  1.77k|            dav1d_ref_dec(&ref);
 1221|  1.77k|            goto error;
 1222|  1.77k|        }
 1223|       |
 1224|  17.9k|        const int op_idx =
 1225|  17.9k|            c->operating_point < seq_hdr->num_operating_points ? c->operating_point : 0;
  ------------------
  |  Branch (1225:13): [True: 17.9k, False: 0]
  ------------------
 1226|  17.9k|        c->operating_point_idc = seq_hdr->operating_points[op_idx].idc;
 1227|  17.9k|        const unsigned spatial_mask = c->operating_point_idc >> 8;
 1228|  17.9k|        c->max_spatial_id = spatial_mask ? ulog2(spatial_mask) : 0;
  ------------------
  |  Branch (1228:29): [True: 5.50k, False: 12.4k]
  ------------------
 1229|       |
 1230|       |        // If we have read a sequence header which is different from
 1231|       |        // the old one, this is a new video sequence and can't use any
 1232|       |        // previous state. Free that state.
 1233|       |
 1234|  17.9k|        if (!c->seq_hdr) {
  ------------------
  |  Branch (1234:13): [True: 9.15k, False: 8.78k]
  ------------------
 1235|  9.15k|            c->frame_hdr = NULL;
 1236|  9.15k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1237|       |        // see 7.5, operating_parameter_info is allowed to change in
 1238|       |        // sequence headers of a single sequence
 1239|  9.15k|        } else if (memcmp(seq_hdr, c->seq_hdr, offsetof(Dav1dSequenceHeader, operating_parameter_info))) {
  ------------------
  |  Branch (1239:20): [True: 3.99k, False: 4.79k]
  ------------------
 1240|  3.99k|            c->frame_hdr = NULL;
 1241|  3.99k|            c->mastering_display = NULL;
 1242|  3.99k|            c->content_light = NULL;
 1243|  3.99k|            dav1d_ref_dec(&c->mastering_display_ref);
 1244|  3.99k|            dav1d_ref_dec(&c->content_light_ref);
 1245|  35.9k|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1245:29): [True: 31.9k, False: 3.99k]
  ------------------
 1246|  31.9k|                if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1246:21): [True: 1.50k, False: 30.4k]
  ------------------
 1247|  1.50k|                    dav1d_thread_picture_unref(&c->refs[i].p);
 1248|  31.9k|                dav1d_ref_dec(&c->refs[i].segmap);
 1249|  31.9k|                dav1d_ref_dec(&c->refs[i].refmvs);
 1250|  31.9k|                dav1d_cdf_thread_unref(&c->cdf[i]);
 1251|  31.9k|            }
 1252|  3.99k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1253|       |        // If operating_parameter_info changed, signal it
 1254|  4.79k|        } else if (memcmp(seq_hdr->operating_parameter_info, c->seq_hdr->operating_parameter_info,
  ------------------
  |  Branch (1254:20): [True: 256, False: 4.53k]
  ------------------
 1255|  4.79k|                          sizeof(seq_hdr->operating_parameter_info)))
 1256|    256|        {
 1257|    256|            c->frame_flags |= PICTURE_FLAG_NEW_OP_PARAMS_INFO;
 1258|    256|        }
 1259|  17.9k|        dav1d_ref_dec(&c->seq_hdr_ref);
 1260|  17.9k|        c->seq_hdr_ref = ref;
 1261|  17.9k|        c->seq_hdr = seq_hdr;
 1262|  17.9k|        break;
 1263|  19.7k|    }
 1264|  1.38k|    case DAV1D_OBU_REDUNDANT_FRAME_HDR:
  ------------------
  |  Branch (1264:5): [True: 1.38k, False: 102k]
  ------------------
 1265|  1.38k|        if (c->frame_hdr) break;
  ------------------
  |  Branch (1265:13): [True: 742, False: 643]
  ------------------
 1266|       |        // fall-through
 1267|  56.8k|    case DAV1D_OBU_FRAME:
  ------------------
  |  Branch (1267:5): [True: 56.2k, False: 47.8k]
  ------------------
 1268|  72.0k|    case DAV1D_OBU_FRAME_HDR:
  ------------------
  |  Branch (1268:5): [True: 15.1k, False: 89.0k]
  ------------------
 1269|  72.0k|        if (!c->seq_hdr) goto error;
  ------------------
  |  Branch (1269:13): [True: 194, False: 71.8k]
  ------------------
 1270|  71.8k|        if (!c->frame_hdr_ref) {
  ------------------
  |  Branch (1270:13): [True: 46.5k, False: 25.3k]
  ------------------
 1271|  46.5k|            c->frame_hdr_ref = dav1d_ref_create_using_pool(c->frame_hdr_pool,
 1272|  46.5k|                                                           sizeof(Dav1dFrameHeader));
 1273|  46.5k|            if (!c->frame_hdr_ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1273:17): [True: 0, False: 46.5k]
  ------------------
 1274|  46.5k|        }
 1275|  71.8k|#ifndef NDEBUG
 1276|       |        // ensure that the reference is writable
 1277|  71.8k|        assert(dav1d_ref_is_writable(c->frame_hdr_ref));
  ------------------
  |  Branch (1277:9): [True: 71.8k, False: 0]
  ------------------
 1278|  71.8k|#endif
 1279|  71.8k|        c->frame_hdr = c->frame_hdr_ref->data;
 1280|  71.8k|        memset(c->frame_hdr, 0, sizeof(*c->frame_hdr));
 1281|  71.8k|        c->frame_hdr->temporal_id = temporal_id;
 1282|  71.8k|        c->frame_hdr->spatial_id = spatial_id;
 1283|  71.8k|        if ((res = parse_frame_hdr(c, &gb)) < 0) {
  ------------------
  |  Branch (1283:13): [True: 4.78k, False: 67.0k]
  ------------------
 1284|  4.78k|            c->frame_hdr = NULL;
 1285|  4.78k|            goto error;
 1286|  4.78k|        }
 1287|  70.5k|        for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (1287:25): [True: 3.57k, False: 67.0k]
  ------------------
 1288|  3.57k|            dav1d_data_unref_internal(&c->tile[n].data);
 1289|  67.0k|        c->n_tile_data = 0;
 1290|  67.0k|        c->n_tiles = 0;
 1291|  67.0k|        if (type != DAV1D_OBU_FRAME) {
  ------------------
  |  Branch (1291:13): [True: 14.2k, False: 52.7k]
  ------------------
 1292|       |            // This is actually a frame header OBU so read the
 1293|       |            // trailing bit and check for overrun.
 1294|  14.2k|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1294:17): [True: 6.09k, False: 8.20k]
  ------------------
 1295|  6.09k|                c->frame_hdr = NULL;
 1296|  6.09k|                goto error;
 1297|  6.09k|            }
 1298|  14.2k|        }
 1299|       |
 1300|  60.9k|        if (c->frame_size_limit && (int64_t)c->frame_hdr->width[1] *
  ------------------
  |  Branch (1300:13): [True: 60.9k, False: 0]
  |  Branch (1300:36): [True: 275, False: 60.6k]
  ------------------
 1301|  60.9k|            c->frame_hdr->height > c->frame_size_limit)
 1302|    275|        {
 1303|    275|            dav1d_log(c, "Frame size %dx%d exceeds limit %u\n", c->frame_hdr->width[1],
  ------------------
  |  |   44|    275|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 275]
  |  |  ------------------
  ------------------
 1304|    275|                      c->frame_hdr->height, c->frame_size_limit);
 1305|    275|            c->frame_hdr = NULL;
 1306|    275|            return DAV1D_ERR(ERANGE);
  ------------------
  |  |   58|    275|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1307|    275|        }
 1308|       |
 1309|  60.6k|        if (type != DAV1D_OBU_FRAME)
  ------------------
  |  Branch (1309:13): [True: 8.20k, False: 52.4k]
  ------------------
 1310|  8.20k|            break;
 1311|       |        // OBU_FRAMEs shouldn't be signaled with show_existing_frame
 1312|  52.4k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1312:13): [True: 224, False: 52.2k]
  ------------------
 1313|    224|            c->frame_hdr = NULL;
 1314|    224|            goto error;
 1315|    224|        }
 1316|       |
 1317|       |        // This is the frame header at the start of a frame OBU.
 1318|       |        // There's no trailing bit at the end to skip, but we do need
 1319|       |        // to align to the next byte.
 1320|  52.2k|        dav1d_bytealign_get_bits(&gb);
 1321|       |        // fall-through
 1322|  54.3k|    case DAV1D_OBU_TILE_GRP: {
  ------------------
  |  Branch (1322:5): [True: 2.09k, False: 102k]
  ------------------
 1323|  54.3k|        if (!c->frame_hdr) goto error;
  ------------------
  |  Branch (1323:13): [True: 584, False: 53.7k]
  ------------------
 1324|  53.7k|        if (c->n_tile_data_alloc < c->n_tile_data + 1) {
  ------------------
  |  Branch (1324:13): [True: 8.46k, False: 45.2k]
  ------------------
 1325|  8.46k|            if ((c->n_tile_data + 1) > INT_MAX / (int)sizeof(*c->tile)) goto error;
  ------------------
  |  Branch (1325:17): [True: 0, False: 8.46k]
  ------------------
 1326|  8.46k|            struct Dav1dTileGroup *tile = dav1d_realloc(ALLOC_TILE, c->tile,
  ------------------
  |  |  133|  8.46k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1327|  8.46k|                                                        (c->n_tile_data + 1) * sizeof(*c->tile));
 1328|  8.46k|            if (!tile) goto error;
  ------------------
  |  Branch (1328:17): [True: 0, False: 8.46k]
  ------------------
 1329|  8.46k|            c->tile = tile;
 1330|  8.46k|            memset(c->tile + c->n_tile_data, 0, sizeof(*c->tile));
 1331|  8.46k|            c->n_tile_data_alloc = c->n_tile_data + 1;
 1332|  8.46k|        }
 1333|  53.7k|        parse_tile_hdr(c, &gb);
 1334|       |        // Align to the next byte boundary and check for overrun.
 1335|  53.7k|        dav1d_bytealign_get_bits(&gb);
 1336|  53.7k|        if (gb.error) goto error;
  ------------------
  |  Branch (1336:13): [True: 7.12k, False: 46.6k]
  ------------------
 1337|       |
 1338|  46.6k|        dav1d_data_ref(&c->tile[c->n_tile_data].data, in);
 1339|  46.6k|        c->tile[c->n_tile_data].data.data = gb.ptr;
 1340|  46.6k|        c->tile[c->n_tile_data].data.sz = (size_t)(gb.ptr_end - gb.ptr);
 1341|       |        // ensure tile groups are in order and sane, see 6.10.1
 1342|  46.6k|        if (c->tile[c->n_tile_data].start > c->tile[c->n_tile_data].end ||
  ------------------
  |  Branch (1342:13): [True: 216, False: 46.3k]
  ------------------
 1343|  46.3k|            c->tile[c->n_tile_data].start != c->n_tiles)
  ------------------
  |  Branch (1343:13): [True: 471, False: 45.9k]
  ------------------
 1344|    687|        {
 1345|  1.66k|            for (int i = 0; i <= c->n_tile_data; i++)
  ------------------
  |  Branch (1345:29): [True: 982, False: 687]
  ------------------
 1346|    982|                dav1d_data_unref_internal(&c->tile[i].data);
 1347|    687|            c->n_tile_data = 0;
 1348|    687|            c->n_tiles = 0;
 1349|    687|            goto error;
 1350|    687|        }
 1351|  45.9k|        c->n_tiles += 1 + c->tile[c->n_tile_data].end -
 1352|  45.9k|                          c->tile[c->n_tile_data].start;
 1353|  45.9k|        c->n_tile_data++;
 1354|  45.9k|        break;
 1355|  46.6k|    }
 1356|  3.29k|    case DAV1D_OBU_METADATA: {
  ------------------
  |  Branch (1356:5): [True: 3.29k, False: 100k]
  ------------------
 1357|  3.29k|#define DEBUG_OBU_METADATA 0
 1358|       |#if DEBUG_OBU_METADATA
 1359|       |        const uint8_t *const init_ptr = gb.ptr;
 1360|       |#endif
 1361|       |        // obu metadta type field
 1362|  3.29k|        const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
 1363|  3.29k|        if (gb.error) goto error;
  ------------------
  |  Branch (1363:13): [True: 239, False: 3.05k]
  ------------------
 1364|       |
 1365|  3.05k|        switch (meta_type) {
 1366|    483|        case OBU_META_HDR_CLL: {
  ------------------
  |  Branch (1366:9): [True: 483, False: 2.57k]
  ------------------
 1367|    483|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    483|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1368|    483|                                             sizeof(Dav1dContentLightLevel));
 1369|    483|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1369:17): [True: 0, False: 483]
  ------------------
 1370|    483|            Dav1dContentLightLevel *const content_light = ref->data;
 1371|       |
 1372|    483|            content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
 1373|       |#if DEBUG_OBU_METADATA
 1374|       |            printf("CLLOBU: max-content-light-level: %d [off=%td]\n",
 1375|       |                   content_light->max_content_light_level,
 1376|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1377|       |#endif
 1378|    483|            content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
 1379|       |#if DEBUG_OBU_METADATA
 1380|       |            printf("CLLOBU: max-frame-average-light-level: %d [off=%td]\n",
 1381|       |                   content_light->max_frame_average_light_level,
 1382|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1383|       |#endif
 1384|       |
 1385|    483|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1385:17): [True: 217, False: 266]
  ------------------
 1386|    217|                dav1d_ref_dec(&ref);
 1387|    217|                goto error;
 1388|    217|            }
 1389|       |
 1390|    266|            dav1d_ref_dec(&c->content_light_ref);
 1391|    266|            c->content_light = content_light;
 1392|    266|            c->content_light_ref = ref;
 1393|    266|            break;
 1394|    483|        }
 1395|    574|        case OBU_META_HDR_MDCV: {
  ------------------
  |  Branch (1395:9): [True: 574, False: 2.47k]
  ------------------
 1396|    574|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    574|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1397|    574|                                             sizeof(Dav1dMasteringDisplay));
 1398|    574|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1398:17): [True: 0, False: 574]
  ------------------
 1399|    574|            Dav1dMasteringDisplay *const mastering_display = ref->data;
 1400|       |
 1401|  2.29k|            for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1401:29): [True: 1.72k, False: 574]
  ------------------
 1402|  1.72k|                mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
 1403|  1.72k|                mastering_display->primaries[i][1] = dav1d_get_bits(&gb, 16);
 1404|       |#if DEBUG_OBU_METADATA
 1405|       |                printf("MDCVOBU: primaries[%d]: (%d, %d) [off=%td]\n", i,
 1406|       |                       mastering_display->primaries[i][0],
 1407|       |                       mastering_display->primaries[i][1],
 1408|       |                       (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1409|       |#endif
 1410|  1.72k|            }
 1411|    574|            mastering_display->white_point[0] = dav1d_get_bits(&gb, 16);
 1412|       |#if DEBUG_OBU_METADATA
 1413|       |            printf("MDCVOBU: white-point-x: %d [off=%td]\n",
 1414|       |                   mastering_display->white_point[0],
 1415|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1416|       |#endif
 1417|    574|            mastering_display->white_point[1] = dav1d_get_bits(&gb, 16);
 1418|       |#if DEBUG_OBU_METADATA
 1419|       |            printf("MDCVOBU: white-point-y: %d [off=%td]\n",
 1420|       |                   mastering_display->white_point[1],
 1421|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1422|       |#endif
 1423|    574|            mastering_display->max_luminance = dav1d_get_bits(&gb, 32);
 1424|       |#if DEBUG_OBU_METADATA
 1425|       |            printf("MDCVOBU: max-luminance: %d [off=%td]\n",
 1426|       |                   mastering_display->max_luminance,
 1427|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1428|       |#endif
 1429|    574|            mastering_display->min_luminance = dav1d_get_bits(&gb, 32);
 1430|       |#if DEBUG_OBU_METADATA
 1431|       |            printf("MDCVOBU: min-luminance: %d [off=%td]\n",
 1432|       |                   mastering_display->min_luminance,
 1433|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1434|       |#endif
 1435|    574|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1435:17): [True: 66, False: 508]
  ------------------
 1436|     66|                dav1d_ref_dec(&ref);
 1437|     66|                goto error;
 1438|     66|            }
 1439|       |
 1440|    508|            dav1d_ref_dec(&c->mastering_display_ref);
 1441|    508|            c->mastering_display = mastering_display;
 1442|    508|            c->mastering_display_ref = ref;
 1443|    508|            break;
 1444|    574|        }
 1445|  1.53k|        case OBU_META_ITUT_T35: {
  ------------------
  |  Branch (1445:9): [True: 1.53k, False: 1.51k]
  ------------------
 1446|  1.53k|            ptrdiff_t payload_size = gb.ptr_end - gb.ptr;
 1447|       |            // Don't take into account all the trailing bits for payload_size
 1448|  1.89k|            while (payload_size > 0 && !gb.ptr[payload_size - 1])
  ------------------
  |  Branch (1448:20): [True: 1.61k, False: 278]
  |  Branch (1448:40): [True: 361, False: 1.25k]
  ------------------
 1449|    361|                payload_size--; // trailing_zero_bit x 8
 1450|  1.53k|            payload_size--; // trailing_one_bit + trailing_zero_bit x 7
 1451|       |
 1452|  1.53k|            int country_code_extension_byte = 0;
 1453|  1.53k|            const int country_code = dav1d_get_bits(&gb, 8);
 1454|  1.53k|            payload_size--;
 1455|  1.53k|            if (country_code == 0xFF) {
  ------------------
  |  Branch (1455:17): [True: 413, False: 1.12k]
  ------------------
 1456|    413|                country_code_extension_byte = dav1d_get_bits(&gb, 8);
 1457|    413|                payload_size--;
 1458|    413|            }
 1459|       |
 1460|  1.53k|            if (payload_size <= 0 || gb.ptr[payload_size] != 0x80) {
  ------------------
  |  Branch (1460:17): [True: 280, False: 1.25k]
  |  Branch (1460:38): [True: 119, False: 1.13k]
  ------------------
 1461|    399|                dav1d_log(c, "Malformed ITU-T T.35 metadata message format\n");
  ------------------
  |  |   44|    399|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 399]
  |  |  ------------------
  ------------------
 1462|    399|                break;
 1463|    399|            }
 1464|       |
 1465|  1.13k|            if ((c->n_itut_t35 + 1) > INT_MAX / (int)sizeof(*c->itut_t35)) goto error;
  ------------------
  |  Branch (1465:17): [True: 0, False: 1.13k]
  ------------------
 1466|  1.13k|            struct Dav1dITUTT35 *itut_t35 = dav1d_realloc(ALLOC_OBU_META, c->itut_t35,
  ------------------
  |  |  133|  1.13k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1467|  1.13k|                                                          (c->n_itut_t35 + 1) * sizeof(*c->itut_t35));
 1468|  1.13k|            if (!itut_t35) goto error;
  ------------------
  |  Branch (1468:17): [True: 0, False: 1.13k]
  ------------------
 1469|  1.13k|            c->itut_t35 = itut_t35;
 1470|  1.13k|            memset(c->itut_t35 + c->n_itut_t35, 0, sizeof(*c->itut_t35));
 1471|       |
 1472|  1.13k|            struct itut_t35_ctx_context *itut_t35_ctx;
 1473|  1.13k|            if (!c->n_itut_t35) {
  ------------------
  |  Branch (1473:17): [True: 622, False: 513]
  ------------------
 1474|    622|                assert(!c->itut_t35_ref);
  ------------------
  |  Branch (1474:17): [True: 622, False: 0]
  ------------------
 1475|    622|                itut_t35_ctx = dav1d_malloc(ALLOC_OBU_META, sizeof(struct itut_t35_ctx_context));
  ------------------
  |  |  132|    622|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1476|    622|                if (!itut_t35_ctx) goto error;
  ------------------
  |  Branch (1476:21): [True: 0, False: 622]
  ------------------
 1477|    622|                c->itut_t35_ref = dav1d_ref_init(&itut_t35_ctx->ref, c->itut_t35,
 1478|    622|                                                 dav1d_picture_free_itut_t35, itut_t35_ctx, 0);
 1479|    622|            } else {
 1480|    513|                assert(c->itut_t35_ref && atomic_load(&c->itut_t35_ref->ref_cnt) == 1);
  ------------------
  |  Branch (1480:17): [True: 513, False: 0]
  |  Branch (1480:17): [True: 513, False: 0]
  ------------------
 1481|    513|                itut_t35_ctx = c->itut_t35_ref->user_data;
 1482|    513|                c->itut_t35_ref->const_data = (uint8_t *)c->itut_t35;
 1483|    513|            }
 1484|  1.13k|            itut_t35_ctx->itut_t35 = c->itut_t35;
 1485|  1.13k|            itut_t35_ctx->n_itut_t35 = c->n_itut_t35 + 1;
 1486|       |
 1487|  1.13k|            Dav1dITUTT35 *const itut_t35_metadata = &c->itut_t35[c->n_itut_t35];
 1488|  1.13k|            itut_t35_metadata->payload = dav1d_malloc(ALLOC_OBU_META, payload_size);
  ------------------
  |  |  132|  1.13k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1489|  1.13k|            if (!itut_t35_metadata->payload) goto error;
  ------------------
  |  Branch (1489:17): [True: 0, False: 1.13k]
  ------------------
 1490|       |
 1491|  1.13k|            itut_t35_metadata->country_code = country_code;
 1492|  1.13k|            itut_t35_metadata->country_code_extension_byte = country_code_extension_byte;
 1493|  1.13k|            itut_t35_metadata->payload_size = payload_size;
 1494|       |
 1495|       |            // We know that we've read a whole number of bytes and that the
 1496|       |            // payload is within the OBU boundaries, so just use memcpy()
 1497|  1.13k|            assert(gb.bits_left == 0);
  ------------------
  |  Branch (1497:13): [True: 1.13k, False: 0]
  ------------------
 1498|  1.13k|            memcpy(itut_t35_metadata->payload, gb.ptr, payload_size);
 1499|       |
 1500|  1.13k|            c->n_itut_t35++;
 1501|  1.13k|            break;
 1502|  1.13k|        }
 1503|      0|        case OBU_META_SCALABILITY:
  ------------------
  |  Branch (1503:9): [True: 0, False: 3.05k]
  ------------------
 1504|      1|        case OBU_META_TIMECODE:
  ------------------
  |  Branch (1504:9): [True: 1, False: 3.05k]
  ------------------
 1505|       |            // ignore metadata OBUs we don't care about
 1506|      1|            break;
 1507|    461|        default:
  ------------------
  |  Branch (1507:9): [True: 461, False: 2.59k]
  ------------------
 1508|       |            // print a warning but don't fail for unknown types
 1509|    461|            if (meta_type > 31) // Types 6 to 31 are "Unregistered user private", so ignore them.
  ------------------
  |  Branch (1509:17): [True: 214, False: 247]
  ------------------
 1510|    214|                dav1d_log(c, "Unknown Metadata OBU type %d\n", meta_type);
  ------------------
  |  |   44|    214|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 214]
  |  |  ------------------
  ------------------
 1511|    461|            break;
 1512|  3.05k|        }
 1513|       |
 1514|  2.77k|        break;
 1515|  3.05k|    }
 1516|  3.59k|    case DAV1D_OBU_TD:
  ------------------
  |  Branch (1516:5): [True: 3.59k, False: 100k]
  ------------------
 1517|  3.59k|        c->frame_flags |= PICTURE_FLAG_NEW_TEMPORAL_UNIT;
 1518|  3.59k|        break;
 1519|    200|    case DAV1D_OBU_PADDING:
  ------------------
  |  Branch (1519:5): [True: 200, False: 103k]
  ------------------
 1520|       |        // ignore OBUs we don't care about
 1521|    200|        break;
 1522|  2.48k|    default:
  ------------------
  |  Branch (1522:5): [True: 2.48k, False: 101k]
  ------------------
 1523|       |        // print a warning but don't fail for unknown types
 1524|  2.48k|        dav1d_log(c, "Unknown OBU type %d of size %td\n", type, gb.ptr_end - gb.ptr);
  ------------------
  |  |   44|  2.48k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 2.48k]
  |  |  ------------------
  ------------------
 1525|  2.48k|        break;
 1526|   104k|    }
 1527|       |
 1528|  81.8k|    if (c->seq_hdr && c->frame_hdr) {
  ------------------
  |  Branch (1528:9): [True: 80.9k, False: 962]
  |  Branch (1528:23): [True: 56.0k, False: 24.8k]
  ------------------
 1529|  56.0k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1529:13): [True: 6.01k, False: 50.0k]
  ------------------
 1530|  6.01k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1530:17): [True: 479, False: 5.54k]
  ------------------
 1531|  5.54k|            switch (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type) {
 1532|    403|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1532:13): [True: 403, False: 5.13k]
  ------------------
 1533|    597|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1533:13): [True: 194, False: 5.34k]
  ------------------
 1534|    597|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE)
  ------------------
  |  Branch (1534:21): [True: 0, False: 597]
  ------------------
 1535|      0|                    goto skip;
 1536|    597|                break;
 1537|    597|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1537:13): [True: 203, False: 5.33k]
  ------------------
 1538|    203|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA)
  ------------------
  |  Branch (1538:21): [True: 0, False: 203]
  ------------------
 1539|      0|                    goto skip;
 1540|       |                // fall-through
 1541|  4.94k|            default:
  ------------------
  |  Branch (1541:13): [True: 4.74k, False: 800]
  ------------------
 1542|  4.94k|                break;
 1543|  5.54k|            }
 1544|  5.54k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.data[0]) goto error;
  ------------------
  |  Branch (1544:17): [True: 0, False: 5.54k]
  ------------------
 1545|  5.54k|            if (c->strict_std_compliance &&
  ------------------
  |  Branch (1545:17): [True: 0, False: 5.54k]
  ------------------
 1546|      0|                !c->refs[c->frame_hdr->existing_frame_idx].p.showable)
  ------------------
  |  Branch (1546:17): [True: 0, False: 0]
  ------------------
 1547|      0|            {
 1548|      0|                goto error;
 1549|      0|            }
 1550|  5.54k|            if (c->n_fc == 1) {
  ------------------
  |  Branch (1550:17): [True: 5.54k, False: 0]
  ------------------
 1551|  5.54k|                dav1d_thread_picture_ref(&c->out,
 1552|  5.54k|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1553|  5.54k|                dav1d_picture_copy_props(&c->out.p,
 1554|  5.54k|                                         c->content_light, c->content_light_ref,
 1555|  5.54k|                                         c->mastering_display, c->mastering_display_ref,
 1556|  5.54k|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1557|  5.54k|                                         &in->m);
 1558|       |                // Must be removed from the context after being attached to the frame
 1559|  5.54k|                dav1d_ref_dec(&c->itut_t35_ref);
 1560|  5.54k|                c->itut_t35 = NULL;
 1561|  5.54k|                c->n_itut_t35 = 0;
 1562|  5.54k|                c->event_flags |= dav1d_picture_get_event_flags(&c->refs[c->frame_hdr->existing_frame_idx].p);
 1563|  5.54k|            } else {
 1564|      0|                pthread_mutex_lock(&c->task_thread.lock);
 1565|       |                // need to append this to the frame output queue
 1566|      0|                const unsigned next = c->frame_thread.next++;
 1567|      0|                if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (1567:21): [True: 0, False: 0]
  ------------------
 1568|      0|                    c->frame_thread.next = 0;
 1569|       |
 1570|      0|                Dav1dFrameContext *const f = &c->fc[next];
 1571|      0|                while (f->n_tile_data > 0)
  ------------------
  |  Branch (1571:24): [True: 0, False: 0]
  ------------------
 1572|      0|                    pthread_cond_wait(&f->task_thread.cond,
 1573|      0|                                      &f->task_thread.ttd->lock);
 1574|      0|                Dav1dThreadPicture *const out_delayed =
 1575|      0|                    &c->frame_thread.out_delayed[next];
 1576|      0|                if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (1576:21): [True: 0, False: 0]
  |  Branch (1576:47): [True: 0, False: 0]
  ------------------
 1577|      0|                    unsigned first = atomic_load(&c->task_thread.first);
 1578|      0|                    if (first + 1U < c->n_fc)
  ------------------
  |  Branch (1578:25): [True: 0, False: 0]
  ------------------
 1579|      0|                        atomic_fetch_add(&c->task_thread.first, 1U);
 1580|      0|                    else
 1581|      0|                        atomic_store(&c->task_thread.first, 0);
 1582|      0|                    atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 1583|      0|                                                   &first, UINT_MAX);
 1584|      0|                    if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (1584:25): [True: 0, False: 0]
  |  Branch (1584:47): [True: 0, False: 0]
  ------------------
 1585|      0|                        c->task_thread.cur--;
 1586|      0|                }
 1587|      0|                const int error = f->task_thread.retval;
 1588|      0|                if (error) {
  ------------------
  |  Branch (1588:21): [True: 0, False: 0]
  ------------------
 1589|      0|                    c->cached_error = error;
 1590|      0|                    f->task_thread.retval = 0;
 1591|      0|                    dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 1592|      0|                    dav1d_thread_picture_unref(out_delayed);
 1593|      0|                } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (1593:28): [True: 0, False: 0]
  ------------------
 1594|      0|                    const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 1595|      0|                                                                   memory_order_relaxed);
 1596|      0|                    if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (1596:26): [True: 0, False: 0]
  |  Branch (1596:50): [True: 0, False: 0]
  ------------------
 1597|      0|                        progress != FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (1597:25): [True: 0, False: 0]
  ------------------
 1598|      0|                    {
 1599|      0|                        dav1d_thread_picture_ref(&c->out, out_delayed);
 1600|      0|                        c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 1601|      0|                    }
 1602|      0|                    dav1d_thread_picture_unref(out_delayed);
 1603|      0|                }
 1604|      0|                dav1d_thread_picture_ref(out_delayed,
 1605|      0|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1606|      0|                out_delayed->visible = 1;
 1607|      0|                dav1d_picture_copy_props(&out_delayed->p,
 1608|      0|                                         c->content_light, c->content_light_ref,
 1609|      0|                                         c->mastering_display, c->mastering_display_ref,
 1610|      0|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1611|      0|                                         &in->m);
 1612|       |                // Must be removed from the context after being attached to the frame
 1613|      0|                dav1d_ref_dec(&c->itut_t35_ref);
 1614|      0|                c->itut_t35 = NULL;
 1615|      0|                c->n_itut_t35 = 0;
 1616|       |
 1617|      0|                pthread_mutex_unlock(&c->task_thread.lock);
 1618|      0|            }
 1619|  5.54k|            if (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY) {
  ------------------
  |  Branch (1619:17): [True: 4.74k, False: 800]
  ------------------
 1620|  4.74k|                const int r = c->frame_hdr->existing_frame_idx;
 1621|  4.74k|                c->refs[r].p.showable = 0;
 1622|  42.6k|                for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1622:33): [True: 37.9k, False: 4.74k]
  ------------------
 1623|  37.9k|                    if (i == r) continue;
  ------------------
  |  Branch (1623:25): [True: 4.74k, False: 33.1k]
  ------------------
 1624|       |
 1625|  33.1k|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1625:25): [True: 32.9k, False: 268]
  ------------------
 1626|  32.9k|                        dav1d_thread_picture_unref(&c->refs[i].p);
 1627|  33.1k|                    dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
 1628|       |
 1629|  33.1k|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 1630|  33.1k|                    dav1d_cdf_thread_ref(&c->cdf[i], &c->cdf[r]);
 1631|       |
 1632|  33.1k|                    dav1d_ref_dec(&c->refs[i].segmap);
 1633|  33.1k|                    c->refs[i].segmap = c->refs[r].segmap;
 1634|  33.1k|                    if (c->refs[r].segmap)
  ------------------
  |  Branch (1634:25): [True: 2.68k, False: 30.4k]
  ------------------
 1635|  2.68k|                        dav1d_ref_inc(c->refs[r].segmap);
 1636|  33.1k|                    dav1d_ref_dec(&c->refs[i].refmvs);
 1637|  33.1k|                }
 1638|  4.74k|            }
 1639|  5.54k|            c->frame_hdr = NULL;
 1640|  50.0k|        } else if (c->n_tiles == c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows) {
  ------------------
  |  Branch (1640:20): [True: 45.0k, False: 4.95k]
  ------------------
 1641|  45.0k|            switch (c->frame_hdr->frame_type) {
 1642|  15.0k|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1642:13): [True: 15.0k, False: 29.9k]
  ------------------
 1643|  15.3k|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1643:13): [True: 302, False: 44.7k]
  ------------------
 1644|  15.3k|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE ||
  ------------------
  |  Branch (1644:21): [True: 0, False: 15.3k]
  ------------------
 1645|  15.3k|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1645:22): [True: 0, False: 15.3k]
  ------------------
 1646|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1646:22): [True: 0, False: 0]
  ------------------
 1647|      0|                    goto skip;
 1648|  15.3k|                break;
 1649|  15.3k|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1649:13): [True: 596, False: 44.4k]
  ------------------
 1650|    596|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA ||
  ------------------
  |  Branch (1650:21): [True: 0, False: 596]
  ------------------
 1651|    596|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1651:22): [True: 0, False: 596]
  ------------------
 1652|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1652:22): [True: 0, False: 0]
  ------------------
 1653|      0|                    goto skip;
 1654|       |                // fall-through
 1655|  29.6k|            default:
  ------------------
  |  Branch (1655:13): [True: 29.1k, False: 15.9k]
  ------------------
 1656|  29.6k|                break;
 1657|  45.0k|            }
 1658|  45.0k|            if (!c->n_tile_data)
  ------------------
  |  Branch (1658:17): [True: 0, False: 45.0k]
  ------------------
 1659|      0|                goto error;
 1660|  45.0k|            if ((res = dav1d_submit_frame(c)) < 0)
  ------------------
  |  Branch (1660:17): [True: 27.0k, False: 17.9k]
  ------------------
 1661|  27.0k|                return res;
 1662|  45.0k|            assert(!c->n_tile_data);
  ------------------
  |  Branch (1662:13): [True: 17.9k, False: 0]
  ------------------
 1663|  17.9k|            c->frame_hdr = NULL;
 1664|  17.9k|            c->n_tiles = 0;
 1665|  17.9k|        }
 1666|  56.0k|    }
 1667|       |
 1668|  54.2k|    return gb.ptr_end - gb.ptr_start;
 1669|       |
 1670|      0|skip:
 1671|       |    // update refs with only the headers in case we skip the frame
 1672|      0|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1672:21): [True: 0, False: 0]
  ------------------
 1673|      0|        if (c->frame_hdr->refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (1673:13): [True: 0, False: 0]
  ------------------
 1674|      0|            dav1d_thread_picture_unref(&c->refs[i].p);
 1675|      0|            c->refs[i].p.p.frame_hdr = c->frame_hdr;
 1676|      0|            c->refs[i].p.p.seq_hdr = c->seq_hdr;
 1677|      0|            c->refs[i].p.p.frame_hdr_ref = c->frame_hdr_ref;
 1678|      0|            c->refs[i].p.p.seq_hdr_ref = c->seq_hdr_ref;
 1679|      0|            dav1d_ref_inc(c->frame_hdr_ref);
 1680|      0|            dav1d_ref_inc(c->seq_hdr_ref);
 1681|      0|        }
 1682|      0|    }
 1683|       |
 1684|      0|    dav1d_ref_dec(&c->frame_hdr_ref);
 1685|      0|    c->frame_hdr = NULL;
 1686|      0|    c->n_tiles = 0;
 1687|       |
 1688|      0|    return gb.ptr_end - gb.ptr_start;
 1689|       |
 1690|  23.6k|error:
 1691|  23.6k|    dav1d_data_props_copy(&c->cached_error_props, &in->m);
 1692|  23.6k|    dav1d_log(c, gb.error ? "Overrun in OBU bit buffer\n" :
  ------------------
  |  |   44|  23.6k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 23.6k]
  |  |  ------------------
  ------------------
 1693|  23.6k|                            "Error parsing OBU data\n");
 1694|  23.6k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  23.6k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1695|  81.8k|}
obu.c:parse_seq_hdr:
   75|  32.4k|{
   76|  32.4k|#define DEBUG_SEQ_HDR 0
   77|       |
   78|       |#if DEBUG_SEQ_HDR
   79|       |    const unsigned init_bit_pos = dav1d_get_bits_pos(gb);
   80|       |#endif
   81|       |
   82|  32.4k|    memset(hdr, 0, sizeof(*hdr));
   83|  32.4k|    hdr->profile = dav1d_get_bits(gb, 3);
   84|  32.4k|    if (hdr->profile > 2) goto error;
  ------------------
  |  Branch (84:9): [True: 974, False: 31.4k]
  ------------------
   85|       |#if DEBUG_SEQ_HDR
   86|       |    printf("SEQHDR: post-profile: off=%u\n",
   87|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   88|       |#endif
   89|       |
   90|  31.4k|    hdr->still_picture = dav1d_get_bit(gb);
   91|  31.4k|    hdr->reduced_still_picture_header = dav1d_get_bit(gb);
   92|  31.4k|    if (hdr->reduced_still_picture_header && !hdr->still_picture) goto error;
  ------------------
  |  Branch (92:9): [True: 19.3k, False: 12.1k]
  |  Branch (92:46): [True: 360, False: 18.9k]
  ------------------
   93|       |#if DEBUG_SEQ_HDR
   94|       |    printf("SEQHDR: post-stillpicture_flags: off=%u\n",
   95|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   96|       |#endif
   97|       |
   98|  31.1k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (98:9): [True: 18.9k, False: 12.1k]
  ------------------
   99|  18.9k|        hdr->num_operating_points = 1;
  100|  18.9k|        hdr->operating_points[0].major_level = dav1d_get_bits(gb, 3);
  101|  18.9k|        hdr->operating_points[0].minor_level = dav1d_get_bits(gb, 2);
  102|  18.9k|        hdr->operating_points[0].initial_display_delay = 10;
  103|  18.9k|    } else {
  104|  12.1k|        hdr->timing_info_present = dav1d_get_bit(gb);
  105|  12.1k|        if (hdr->timing_info_present) {
  ------------------
  |  Branch (105:13): [True: 1.97k, False: 10.1k]
  ------------------
  106|  1.97k|            hdr->num_units_in_tick = dav1d_get_bits(gb, 32);
  107|  1.97k|            hdr->time_scale = dav1d_get_bits(gb, 32);
  108|  1.97k|            if (strict_std_compliance && (!hdr->num_units_in_tick || !hdr->time_scale))
  ------------------
  |  Branch (108:17): [True: 0, False: 1.97k]
  |  Branch (108:43): [True: 0, False: 0]
  |  Branch (108:70): [True: 0, False: 0]
  ------------------
  109|      0|                goto error;
  110|  1.97k|            hdr->equal_picture_interval = dav1d_get_bit(gb);
  111|  1.97k|            if (hdr->equal_picture_interval) {
  ------------------
  |  Branch (111:17): [True: 1.01k, False: 965]
  ------------------
  112|  1.01k|                const unsigned num_ticks_per_picture = dav1d_get_vlc(gb);
  113|  1.01k|                if (num_ticks_per_picture == UINT32_MAX)
  ------------------
  |  Branch (113:21): [True: 196, False: 815]
  ------------------
  114|    196|                    goto error;
  115|    815|                hdr->num_ticks_per_picture = num_ticks_per_picture + 1;
  116|    815|            }
  117|       |
  118|  1.78k|            hdr->decoder_model_info_present = dav1d_get_bit(gb);
  119|  1.78k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (119:17): [True: 1.09k, False: 681]
  ------------------
  120|  1.09k|                hdr->encoder_decoder_buffer_delay_length = dav1d_get_bits(gb, 5) + 1;
  121|  1.09k|                hdr->num_units_in_decoding_tick = dav1d_get_bits(gb, 32);
  122|  1.09k|                if (strict_std_compliance && !hdr->num_units_in_decoding_tick)
  ------------------
  |  Branch (122:21): [True: 0, False: 1.09k]
  |  Branch (122:46): [True: 0, False: 0]
  ------------------
  123|      0|                    goto error;
  124|  1.09k|                hdr->buffer_removal_delay_length = dav1d_get_bits(gb, 5) + 1;
  125|  1.09k|                hdr->frame_presentation_delay_length = dav1d_get_bits(gb, 5) + 1;
  126|  1.09k|            }
  127|  1.78k|        }
  128|       |#if DEBUG_SEQ_HDR
  129|       |        printf("SEQHDR: post-timinginfo: off=%u\n",
  130|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  131|       |#endif
  132|       |
  133|  11.9k|        hdr->display_model_info_present = dav1d_get_bit(gb);
  134|  11.9k|        hdr->num_operating_points = dav1d_get_bits(gb, 5) + 1;
  135|  31.3k|        for (int i = 0; i < hdr->num_operating_points; i++) {
  ------------------
  |  Branch (135:25): [True: 19.7k, False: 11.5k]
  ------------------
  136|  19.7k|            struct Dav1dSequenceHeaderOperatingPoint *const op =
  137|  19.7k|                &hdr->operating_points[i];
  138|  19.7k|            op->idc = dav1d_get_bits(gb, 12);
  139|  19.7k|            if (op->idc && (!(op->idc & 0xff) || !(op->idc & 0xf00)))
  ------------------
  |  Branch (139:17): [True: 12.1k, False: 7.53k]
  |  Branch (139:29): [True: 57, False: 12.1k]
  |  Branch (139:50): [True: 313, False: 11.8k]
  ------------------
  140|    370|                goto error;
  141|  19.3k|            op->major_level = 2 + dav1d_get_bits(gb, 3);
  142|  19.3k|            op->minor_level = dav1d_get_bits(gb, 2);
  143|  19.3k|            if (op->major_level > 3)
  ------------------
  |  Branch (143:17): [True: 4.98k, False: 14.3k]
  ------------------
  144|  4.98k|                op->tier = dav1d_get_bit(gb);
  145|  19.3k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (145:17): [True: 7.74k, False: 11.5k]
  ------------------
  146|  7.74k|                op->decoder_model_param_present = dav1d_get_bit(gb);
  147|  7.74k|                if (op->decoder_model_param_present) {
  ------------------
  |  Branch (147:21): [True: 2.88k, False: 4.86k]
  ------------------
  148|  2.88k|                    struct Dav1dSequenceHeaderOperatingParameterInfo *const opi =
  149|  2.88k|                        &hdr->operating_parameter_info[i];
  150|  2.88k|                    opi->decoder_buffer_delay =
  151|  2.88k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  152|  2.88k|                    opi->encoder_buffer_delay =
  153|  2.88k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  154|  2.88k|                    opi->low_delay_mode = dav1d_get_bit(gb);
  155|  2.88k|                }
  156|  7.74k|            }
  157|  19.3k|            if (hdr->display_model_info_present)
  ------------------
  |  Branch (157:17): [True: 7.27k, False: 12.0k]
  ------------------
  158|  7.27k|                op->display_model_param_present = dav1d_get_bit(gb);
  159|  19.3k|            op->initial_display_delay =
  160|  19.3k|                op->display_model_param_present ? dav1d_get_bits(gb, 4) + 1 : 10;
  ------------------
  |  Branch (160:17): [True: 2.05k, False: 17.2k]
  ------------------
  161|  19.3k|        }
  162|       |#if DEBUG_SEQ_HDR
  163|       |        printf("SEQHDR: post-operating-points: off=%u\n",
  164|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  165|       |#endif
  166|  11.9k|    }
  167|       |
  168|  30.5k|    hdr->width_n_bits = dav1d_get_bits(gb, 4) + 1;
  169|  30.5k|    hdr->height_n_bits = dav1d_get_bits(gb, 4) + 1;
  170|  30.5k|    hdr->max_width = dav1d_get_bits(gb, hdr->width_n_bits) + 1;
  171|  30.5k|    hdr->max_height = dav1d_get_bits(gb, hdr->height_n_bits) + 1;
  172|       |#if DEBUG_SEQ_HDR
  173|       |    printf("SEQHDR: post-size: off=%u\n",
  174|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  175|       |#endif
  176|  30.5k|    if (!hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (176:9): [True: 11.5k, False: 18.9k]
  ------------------
  177|  11.5k|        hdr->frame_id_numbers_present = dav1d_get_bit(gb);
  178|  11.5k|        if (hdr->frame_id_numbers_present) {
  ------------------
  |  Branch (178:13): [True: 1.58k, False: 10.0k]
  ------------------
  179|  1.58k|            hdr->delta_frame_id_n_bits = dav1d_get_bits(gb, 4) + 2;
  180|  1.58k|            hdr->frame_id_n_bits = dav1d_get_bits(gb, 3) + hdr->delta_frame_id_n_bits + 1;
  181|  1.58k|        }
  182|  11.5k|    }
  183|       |#if DEBUG_SEQ_HDR
  184|       |    printf("SEQHDR: post-frame-id-numbers-present: off=%u\n",
  185|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  186|       |#endif
  187|       |
  188|  30.5k|    hdr->sb128 = dav1d_get_bit(gb);
  189|  30.5k|    hdr->filter_intra = dav1d_get_bit(gb);
  190|  30.5k|    hdr->intra_edge_filter = dav1d_get_bit(gb);
  191|  30.5k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (191:9): [True: 18.9k, False: 11.5k]
  ------------------
  192|  18.9k|        hdr->screen_content_tools = DAV1D_ADAPTIVE;
  193|  18.9k|        hdr->force_integer_mv = DAV1D_ADAPTIVE;
  194|  18.9k|    } else {
  195|  11.5k|        hdr->inter_intra = dav1d_get_bit(gb);
  196|  11.5k|        hdr->masked_compound = dav1d_get_bit(gb);
  197|  11.5k|        hdr->warped_motion = dav1d_get_bit(gb);
  198|  11.5k|        hdr->dual_filter = dav1d_get_bit(gb);
  199|  11.5k|        hdr->order_hint = dav1d_get_bit(gb);
  200|  11.5k|        if (hdr->order_hint) {
  ------------------
  |  Branch (200:13): [True: 7.52k, False: 4.06k]
  ------------------
  201|  7.52k|            hdr->jnt_comp = dav1d_get_bit(gb);
  202|  7.52k|            hdr->ref_frame_mvs = dav1d_get_bit(gb);
  203|  7.52k|        }
  204|  11.5k|        hdr->screen_content_tools = dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb);
  ------------------
  |  Branch (204:37): [True: 4.48k, False: 7.10k]
  ------------------
  205|       |    #if DEBUG_SEQ_HDR
  206|       |        printf("SEQHDR: post-screentools: off=%u\n",
  207|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  208|       |    #endif
  209|  11.5k|        hdr->force_integer_mv = hdr->screen_content_tools ?
  ------------------
  |  Branch (209:33): [True: 8.22k, False: 3.36k]
  ------------------
  210|  8.22k|                                dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb) : 2;
  ------------------
  |  Branch (210:33): [True: 2.44k, False: 5.78k]
  ------------------
  211|  11.5k|        if (hdr->order_hint)
  ------------------
  |  Branch (211:13): [True: 7.52k, False: 4.06k]
  ------------------
  212|  7.52k|            hdr->order_hint_n_bits = dav1d_get_bits(gb, 3) + 1;
  213|  11.5k|    }
  214|  30.5k|    hdr->super_res = dav1d_get_bit(gb);
  215|  30.5k|    hdr->cdef = dav1d_get_bit(gb);
  216|  30.5k|    hdr->restoration = dav1d_get_bit(gb);
  217|       |#if DEBUG_SEQ_HDR
  218|       |    printf("SEQHDR: post-featurebits: off=%u\n",
  219|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  220|       |#endif
  221|       |
  222|  30.5k|    hdr->hbd = dav1d_get_bit(gb);
  223|  30.5k|    if (hdr->profile == 2 && hdr->hbd)
  ------------------
  |  Branch (223:9): [True: 12.5k, False: 17.9k]
  |  Branch (223:30): [True: 8.36k, False: 4.21k]
  ------------------
  224|  8.36k|        hdr->hbd += dav1d_get_bit(gb);
  225|  30.5k|    if (hdr->profile != 1)
  ------------------
  |  Branch (225:9): [True: 23.2k, False: 7.29k]
  ------------------
  226|  23.2k|        hdr->monochrome = dav1d_get_bit(gb);
  227|  30.5k|    hdr->color_description_present = dav1d_get_bit(gb);
  228|  30.5k|    if (hdr->color_description_present) {
  ------------------
  |  Branch (228:9): [True: 3.31k, False: 27.2k]
  ------------------
  229|  3.31k|        hdr->pri = dav1d_get_bits(gb, 8);
  230|  3.31k|        hdr->trc = dav1d_get_bits(gb, 8);
  231|  3.31k|        hdr->mtrx = dav1d_get_bits(gb, 8);
  232|  27.2k|    } else {
  233|  27.2k|        hdr->pri = DAV1D_COLOR_PRI_UNKNOWN;
  234|  27.2k|        hdr->trc = DAV1D_TRC_UNKNOWN;
  235|  27.2k|        hdr->mtrx = DAV1D_MC_UNKNOWN;
  236|  27.2k|    }
  237|  30.5k|    if (hdr->monochrome) {
  ------------------
  |  Branch (237:9): [True: 11.3k, False: 19.2k]
  ------------------
  238|  11.3k|        hdr->color_range = dav1d_get_bit(gb);
  239|  11.3k|        hdr->layout = DAV1D_PIXEL_LAYOUT_I400;
  240|  11.3k|        hdr->ss_hor = hdr->ss_ver = 1;
  241|  11.3k|        hdr->chr = DAV1D_CHR_UNKNOWN;
  242|  19.2k|    } else if (hdr->pri == DAV1D_COLOR_PRI_BT709 &&
  ------------------
  |  Branch (242:16): [True: 1.99k, False: 17.2k]
  ------------------
  243|  1.99k|               hdr->trc == DAV1D_TRC_SRGB &&
  ------------------
  |  Branch (243:16): [True: 1.35k, False: 638]
  ------------------
  244|  1.35k|               hdr->mtrx == DAV1D_MC_IDENTITY)
  ------------------
  |  Branch (244:16): [True: 943, False: 416]
  ------------------
  245|    943|    {
  246|    943|        hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  247|    943|        hdr->color_range = 1;
  248|    943|        if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->hbd == 2))
  ------------------
  |  Branch (248:13): [True: 748, False: 195]
  |  Branch (248:36): [True: 396, False: 352]
  |  Branch (248:57): [True: 201, False: 195]
  ------------------
  249|    547|            goto error;
  250|  18.2k|    } else {
  251|  18.2k|        hdr->color_range = dav1d_get_bit(gb);
  252|  18.2k|        switch (hdr->profile) {
  ------------------
  |  Branch (252:17): [True: 18.2k, False: 0]
  ------------------
  253|  5.63k|        case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (253:9): [True: 5.63k, False: 12.6k]
  ------------------
  254|  5.63k|                hdr->ss_hor = hdr->ss_ver = 1;
  255|  5.63k|                break;
  256|  7.10k|        case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (256:9): [True: 7.10k, False: 11.1k]
  ------------------
  257|  7.10k|                break;
  258|  5.52k|        case 2:
  ------------------
  |  Branch (258:9): [True: 5.52k, False: 12.7k]
  ------------------
  259|  5.52k|            if (hdr->hbd == 2) {
  ------------------
  |  Branch (259:17): [True: 2.37k, False: 3.15k]
  ------------------
  260|  2.37k|                hdr->ss_hor = dav1d_get_bit(gb);
  261|  2.37k|                if (hdr->ss_hor)
  ------------------
  |  Branch (261:21): [True: 868, False: 1.50k]
  ------------------
  262|    868|                    hdr->ss_ver = dav1d_get_bit(gb);
  263|  2.37k|            } else
  264|  3.15k|                hdr->ss_hor = 1;
  265|  5.52k|            hdr->layout = hdr->ss_hor ?
  ------------------
  |  Branch (265:27): [True: 4.02k, False: 1.50k]
  ------------------
  266|  4.02k|                          hdr->ss_ver ? DAV1D_PIXEL_LAYOUT_I420 :
  ------------------
  |  Branch (266:27): [True: 480, False: 3.54k]
  ------------------
  267|  4.02k|                                        DAV1D_PIXEL_LAYOUT_I422 :
  268|  5.52k|                                        DAV1D_PIXEL_LAYOUT_I444;
  269|  5.52k|            break;
  270|  18.2k|        }
  271|  18.2k|        hdr->chr = (hdr->ss_hor & hdr->ss_ver) ?
  ------------------
  |  Branch (271:20): [True: 6.11k, False: 12.1k]
  ------------------
  272|  12.1k|                   dav1d_get_bits(gb, 2) : DAV1D_CHR_UNKNOWN;
  273|  18.2k|    }
  274|  30.0k|    if (strict_std_compliance &&
  ------------------
  |  Branch (274:9): [True: 0, False: 30.0k]
  ------------------
  275|      0|        hdr->mtrx == DAV1D_MC_IDENTITY && hdr->layout != DAV1D_PIXEL_LAYOUT_I444)
  ------------------
  |  Branch (275:9): [True: 0, False: 0]
  |  Branch (275:43): [True: 0, False: 0]
  ------------------
  276|      0|    {
  277|      0|        goto error;
  278|      0|    }
  279|  30.0k|    if (!hdr->monochrome)
  ------------------
  |  Branch (279:9): [True: 18.6k, False: 11.3k]
  ------------------
  280|  18.6k|        hdr->separate_uv_delta_q = dav1d_get_bit(gb);
  281|       |#if DEBUG_SEQ_HDR
  282|       |    printf("SEQHDR: post-colorinfo: off=%u\n",
  283|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  284|       |#endif
  285|       |
  286|  30.0k|    hdr->film_grain_present = dav1d_get_bit(gb);
  287|       |#if DEBUG_SEQ_HDR
  288|       |    printf("SEQHDR: post-filmgrain: off=%u\n",
  289|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  290|       |#endif
  291|       |
  292|       |    // We needn't bother flushing the OBU here: we'll check we didn't
  293|       |    // overrun in the caller and will then discard gb, so there's no
  294|       |    // point in setting its position properly.
  295|       |
  296|  30.0k|    return check_trailing_bits(gb, strict_std_compliance);
  297|       |
  298|  2.44k|error:
  299|  2.44k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  2.44k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  300|  30.0k|}
obu.c:parse_frame_hdr:
  409|  71.8k|static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
  410|  71.8k|#define DEBUG_FRAME_HDR 0
  411|       |
  412|       |#if DEBUG_FRAME_HDR
  413|       |    const uint8_t *const init_ptr = gb->ptr;
  414|       |#endif
  415|  71.8k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  416|  71.8k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  417|       |
  418|  71.8k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (418:9): [True: 41.1k, False: 30.6k]
  ------------------
  419|  41.1k|        hdr->show_existing_frame = dav1d_get_bit(gb);
  420|       |#if DEBUG_FRAME_HDR
  421|       |    printf("HDR: post-show_existing_frame: off=%td\n",
  422|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  423|       |#endif
  424|  71.8k|    if (hdr->show_existing_frame) {
  ------------------
  |  Branch (424:9): [True: 6.69k, False: 65.1k]
  ------------------
  425|  6.69k|        hdr->existing_frame_idx = dav1d_get_bits(gb, 3);
  426|  6.69k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (426:13): [True: 539, False: 6.15k]
  |  Branch (426:51): [True: 191, False: 348]
  ------------------
  427|    191|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  428|  6.69k|        if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (428:13): [True: 706, False: 5.98k]
  ------------------
  429|    706|            hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  430|    706|            Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->existing_frame_idx].p.p.frame_hdr;
  431|    706|            if (!ref_frame_hdr || ref_frame_hdr->frame_id != hdr->frame_id) goto error;
  ------------------
  |  Branch (431:17): [True: 276, False: 430]
  |  Branch (431:35): [True: 183, False: 247]
  ------------------
  432|    706|        }
  433|  6.23k|        return 0;
  434|  6.69k|    }
  435|       |
  436|  65.1k|    if (seqhdr->reduced_still_picture_header) {
  ------------------
  |  Branch (436:9): [True: 30.6k, False: 34.4k]
  ------------------
  437|  30.6k|        hdr->frame_type = DAV1D_FRAME_TYPE_KEY;
  438|  30.6k|        hdr->show_frame = 1;
  439|  34.4k|    } else {
  440|  34.4k|        hdr->frame_type = dav1d_get_bits(gb, 2);
  441|  34.4k|        hdr->show_frame = dav1d_get_bit(gb);
  442|  34.4k|    }
  443|  65.1k|    if (hdr->show_frame) {
  ------------------
  |  Branch (443:9): [True: 57.8k, False: 7.29k]
  ------------------
  444|  57.8k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (444:13): [True: 1.49k, False: 56.3k]
  |  Branch (444:51): [True: 1.16k, False: 335]
  ------------------
  445|  1.16k|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  446|  57.8k|        hdr->showable_frame = hdr->frame_type != DAV1D_FRAME_TYPE_KEY;
  447|  57.8k|    } else
  448|  7.29k|        hdr->showable_frame = dav1d_get_bit(gb);
  449|  65.1k|    hdr->error_resilient_mode =
  450|  65.1k|        (hdr->frame_type == DAV1D_FRAME_TYPE_KEY && hdr->show_frame) ||
  ------------------
  |  Branch (450:10): [True: 40.1k, False: 24.9k]
  |  Branch (450:53): [True: 38.4k, False: 1.75k]
  ------------------
  451|  26.7k|        hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ||
  ------------------
  |  Branch (451:9): [True: 2.38k, False: 24.3k]
  ------------------
  452|  24.3k|        seqhdr->reduced_still_picture_header || dav1d_get_bit(gb);
  ------------------
  |  Branch (452:9): [True: 0, False: 24.3k]
  |  Branch (452:49): [True: 1.68k, False: 22.6k]
  ------------------
  453|       |#if DEBUG_FRAME_HDR
  454|       |    printf("HDR: post-frametype_bits: off=%td\n",
  455|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  456|       |#endif
  457|  65.1k|    hdr->disable_cdf_update = dav1d_get_bit(gb);
  458|  65.1k|    hdr->allow_screen_content_tools = seqhdr->screen_content_tools == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (458:39): [True: 45.3k, False: 19.7k]
  ------------------
  459|  45.3k|                                      dav1d_get_bit(gb) : seqhdr->screen_content_tools;
  460|  65.1k|    if (hdr->allow_screen_content_tools)
  ------------------
  |  Branch (460:9): [True: 40.6k, False: 24.4k]
  ------------------
  461|  40.6k|        hdr->force_integer_mv = seqhdr->force_integer_mv == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (461:33): [True: 26.2k, False: 14.3k]
  ------------------
  462|  26.2k|                                dav1d_get_bit(gb) : seqhdr->force_integer_mv;
  463|       |
  464|  65.1k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|  65.1k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  65.1k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 41.6k, False: 23.4k]
  |  |  ------------------
  ------------------
  465|  41.6k|        hdr->force_integer_mv = 1;
  466|       |
  467|  65.1k|    if (seqhdr->frame_id_numbers_present)
  ------------------
  |  Branch (467:9): [True: 1.74k, False: 63.3k]
  ------------------
  468|  1.74k|        hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  469|       |
  470|  65.1k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (470:9): [True: 34.4k, False: 30.6k]
  ------------------
  471|  34.4k|        hdr->frame_size_override = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 1 : dav1d_get_bit(gb);
  ------------------
  |  Branch (471:36): [True: 2.38k, False: 32.1k]
  ------------------
  472|       |#if DEBUG_FRAME_HDR
  473|       |    printf("HDR: post-frame_size_override_flag: off=%td\n",
  474|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  475|       |#endif
  476|  65.1k|    if (seqhdr->order_hint)
  ------------------
  |  Branch (476:9): [True: 24.0k, False: 41.1k]
  ------------------
  477|  24.0k|        hdr->frame_offset = dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  478|  65.1k|    hdr->primary_ref_frame = !hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) ?
  ------------------
  |  |   36|  22.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 20.4k, False: 2.19k]
  |  |  ------------------
  ------------------
  |  Branch (478:30): [True: 22.6k, False: 42.4k]
  ------------------
  479|  44.6k|                             dav1d_get_bits(gb, 3) : DAV1D_PRIMARY_REF_NONE;
  ------------------
  |  |   45|   109k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  480|       |
  481|  65.1k|    if (seqhdr->decoder_model_info_present) {
  ------------------
  |  Branch (481:9): [True: 1.50k, False: 63.6k]
  ------------------
  482|  1.50k|        hdr->buffer_removal_time_present = dav1d_get_bit(gb);
  483|  1.50k|        if (hdr->buffer_removal_time_present) {
  ------------------
  |  Branch (483:13): [True: 679, False: 829]
  ------------------
  484|  4.32k|            for (int i = 0; i < c->seq_hdr->num_operating_points; i++) {
  ------------------
  |  Branch (484:29): [True: 3.64k, False: 679]
  ------------------
  485|  3.64k|                const struct Dav1dSequenceHeaderOperatingPoint *const seqop = &seqhdr->operating_points[i];
  486|  3.64k|                struct Dav1dFrameHeaderOperatingPoint *const op = &hdr->operating_points[i];
  487|  3.64k|                if (seqop->decoder_model_param_present) {
  ------------------
  |  Branch (487:21): [True: 3.01k, False: 634]
  ------------------
  488|  3.01k|                    int in_temporal_layer = (seqop->idc >> hdr->temporal_id) & 1;
  489|  3.01k|                    int in_spatial_layer  = (seqop->idc >> (hdr->spatial_id + 8)) & 1;
  490|  3.01k|                    if (!seqop->idc || (in_temporal_layer && in_spatial_layer))
  ------------------
  |  Branch (490:25): [True: 255, False: 2.75k]
  |  Branch (490:41): [True: 2.16k, False: 597]
  |  Branch (490:62): [True: 1.65k, False: 505]
  ------------------
  491|  1.91k|                        op->buffer_removal_time = dav1d_get_bits(gb, seqhdr->buffer_removal_delay_length);
  492|  3.01k|                }
  493|  3.64k|            }
  494|    679|        }
  495|  1.50k|    }
  496|       |
  497|  65.1k|    if (IS_KEY_OR_INTRA(hdr)) {
  ------------------
  |  |   43|  65.1k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  65.1k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 41.6k, False: 23.4k]
  |  |  ------------------
  ------------------
  498|  41.6k|        hdr->refresh_frame_flags = (hdr->frame_type == DAV1D_FRAME_TYPE_KEY &&
  ------------------
  |  Branch (498:37): [True: 40.1k, False: 1.46k]
  ------------------
  499|  40.1k|                                    hdr->show_frame) ? 0xff : dav1d_get_bits(gb, 8);
  ------------------
  |  Branch (499:37): [True: 38.4k, False: 1.75k]
  ------------------
  500|  41.6k|        if (hdr->refresh_frame_flags != 0xff && hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (500:13): [True: 3.10k, False: 38.5k]
  |  Branch (500:49): [True: 947, False: 2.15k]
  |  Branch (500:78): [True: 708, False: 239]
  ------------------
  501|  6.37k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (501:29): [True: 5.66k, False: 708]
  ------------------
  502|  5.66k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  503|  41.6k|        if (c->strict_std_compliance &&
  ------------------
  |  Branch (503:13): [True: 0, False: 41.6k]
  ------------------
  504|      0|            hdr->frame_type == DAV1D_FRAME_TYPE_INTRA && hdr->refresh_frame_flags == 0xff)
  ------------------
  |  Branch (504:13): [True: 0, False: 0]
  |  Branch (504:58): [True: 0, False: 0]
  ------------------
  505|      0|        {
  506|      0|            goto error;
  507|      0|        }
  508|  41.6k|        if (read_frame_size(c, gb, 0) < 0) goto error;
  ------------------
  |  Branch (508:13): [True: 0, False: 41.6k]
  ------------------
  509|  41.6k|        if (hdr->allow_screen_content_tools && !hdr->super_res.enabled)
  ------------------
  |  Branch (509:13): [True: 25.8k, False: 15.7k]
  |  Branch (509:48): [True: 24.6k, False: 1.21k]
  ------------------
  510|  24.6k|            hdr->allow_intrabc = dav1d_get_bit(gb);
  511|  41.6k|    } else {
  512|  23.4k|        hdr->refresh_frame_flags = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 0xff :
  ------------------
  |  Branch (512:36): [True: 2.38k, False: 21.1k]
  ------------------
  513|  23.4k|                                   dav1d_get_bits(gb, 8);
  514|  23.4k|        if (hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (514:13): [True: 3.05k, False: 20.4k]
  |  Branch (514:42): [True: 2.24k, False: 804]
  ------------------
  515|  20.2k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (515:29): [True: 17.9k, False: 2.24k]
  ------------------
  516|  17.9k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  517|  23.4k|        if (seqhdr->order_hint) {
  ------------------
  |  Branch (517:13): [True: 18.4k, False: 5.08k]
  ------------------
  518|  18.4k|            hdr->frame_ref_short_signaling = dav1d_get_bit(gb);
  519|  18.4k|            if (hdr->frame_ref_short_signaling) {
  ------------------
  |  Branch (519:17): [True: 12.6k, False: 5.79k]
  ------------------
  520|  12.6k|                hdr->refidx[0] = dav1d_get_bits(gb, 3);
  521|  12.6k|                hdr->refidx[1] = hdr->refidx[2] = -1;
  522|  12.6k|                hdr->refidx[3] = dav1d_get_bits(gb, 3);
  523|       |
  524|       |                /* +1 allows for unconditional stores, as unused
  525|       |                 * values can be dumped into frame_offset[-1]. */
  526|  12.6k|                int frame_offset_mem[8+1];
  527|  12.6k|                int *const frame_offset = &frame_offset_mem[1];
  528|  12.6k|                int earliest_ref = -1;
  529|   112k|                for (int i = 0, earliest_offset = INT_MAX; i < 8; i++) {
  ------------------
  |  Branch (529:60): [True: 99.7k, False: 12.2k]
  ------------------
  530|  99.7k|                    const Dav1dFrameHeader *const refhdr = c->refs[i].p.p.frame_hdr;
  531|  99.7k|                    if (!refhdr) goto error;
  ------------------
  |  Branch (531:25): [True: 357, False: 99.4k]
  ------------------
  532|  99.4k|                    const int diff = get_poc_diff(seqhdr->order_hint_n_bits,
  533|  99.4k|                                                  refhdr->frame_offset,
  534|  99.4k|                                                  hdr->frame_offset);
  535|  99.4k|                    frame_offset[i] = diff;
  536|  99.4k|                    if (diff < earliest_offset) {
  ------------------
  |  Branch (536:25): [True: 15.3k, False: 84.0k]
  ------------------
  537|  15.3k|                        earliest_offset = diff;
  538|  15.3k|                        earliest_ref = i;
  539|  15.3k|                    }
  540|  99.4k|                }
  541|  12.2k|                frame_offset[hdr->refidx[0]] = INT_MIN; // = reference frame is used
  542|  12.2k|                frame_offset[hdr->refidx[3]] = INT_MIN;
  543|  12.2k|                assert(earliest_ref >= 0);
  ------------------
  |  Branch (543:17): [True: 12.2k, False: 0]
  ------------------
  544|       |
  545|  12.2k|                int refidx = -1;
  546|   110k|                for (int i = 0, latest_offset = 0; i < 8; i++) {
  ------------------
  |  Branch (546:52): [True: 98.0k, False: 12.2k]
  ------------------
  547|  98.0k|                    const int hint = frame_offset[i];
  548|  98.0k|                    if (hint >= latest_offset) {
  ------------------
  |  Branch (548:25): [True: 52.2k, False: 45.7k]
  ------------------
  549|  52.2k|                        latest_offset = hint;
  550|  52.2k|                        refidx = i;
  551|  52.2k|                    }
  552|  98.0k|                }
  553|  12.2k|                frame_offset[refidx] = INT_MIN;
  554|  12.2k|                hdr->refidx[6] = refidx;
  555|       |
  556|  36.7k|                for (int i = 4; i < 6; i++) {
  ------------------
  |  Branch (556:33): [True: 24.5k, False: 12.2k]
  ------------------
  557|       |                    /* Unsigned compares to handle negative values. */
  558|  24.5k|                    unsigned earliest_offset = UINT8_MAX;
  559|  24.5k|                    refidx = -1;
  560|   220k|                    for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (560:37): [True: 196k, False: 24.5k]
  ------------------
  561|   196k|                        const unsigned hint = frame_offset[j];
  562|   196k|                        if (hint < earliest_offset) {
  ------------------
  |  Branch (562:29): [True: 20.2k, False: 175k]
  ------------------
  563|  20.2k|                            earliest_offset = hint;
  564|  20.2k|                            refidx = j;
  565|  20.2k|                        }
  566|   196k|                    }
  567|  24.5k|                    frame_offset[refidx] = INT_MIN;
  568|  24.5k|                    hdr->refidx[i] = refidx;
  569|  24.5k|                }
  570|       |
  571|  85.7k|                for (int i = 1; i < 7; i++) {
  ------------------
  |  Branch (571:33): [True: 73.5k, False: 12.2k]
  ------------------
  572|  73.5k|                    refidx = hdr->refidx[i];
  573|  73.5k|                    if (refidx < 0) {
  ------------------
  |  Branch (573:25): [True: 31.6k, False: 41.8k]
  ------------------
  574|  31.6k|                        unsigned latest_offset = ~UINT8_MAX;
  575|   285k|                        for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (575:41): [True: 253k, False: 31.6k]
  ------------------
  576|   253k|                            const unsigned hint = frame_offset[j];
  577|   253k|                            if (hint >= latest_offset) {
  ------------------
  |  Branch (577:33): [True: 51.8k, False: 201k]
  ------------------
  578|  51.8k|                                latest_offset = hint;
  579|  51.8k|                                refidx = j;
  580|  51.8k|                            }
  581|   253k|                        }
  582|  31.6k|                        frame_offset[refidx] = INT_MIN;
  583|  31.6k|                        hdr->refidx[i] = refidx >= 0 ? refidx : earliest_ref;
  ------------------
  |  Branch (583:42): [True: 15.4k, False: 16.2k]
  ------------------
  584|  31.6k|                    }
  585|  73.5k|                }
  586|  12.2k|            }
  587|  18.4k|        }
  588|   180k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (588:25): [True: 157k, False: 22.4k]
  ------------------
  589|   157k|            if (!hdr->frame_ref_short_signaling)
  ------------------
  |  Branch (589:17): [True: 72.1k, False: 85.7k]
  ------------------
  590|  72.1k|                hdr->refidx[i] = dav1d_get_bits(gb, 3);
  591|   157k|            if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (591:17): [True: 917, False: 156k]
  ------------------
  592|    917|                const unsigned delta_ref_frame_id = dav1d_get_bits(gb, seqhdr->delta_frame_id_n_bits) + 1;
  593|    917|                const unsigned ref_frame_id = (hdr->frame_id + (1 << seqhdr->frame_id_n_bits) - delta_ref_frame_id) & ((1 << seqhdr->frame_id_n_bits) - 1);
  594|    917|                Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->refidx[i]].p.p.frame_hdr;
  595|    917|                if (!ref_frame_hdr || ref_frame_hdr->frame_id != ref_frame_id) goto error;
  ------------------
  |  Branch (595:21): [True: 333, False: 584]
  |  Branch (595:39): [True: 377, False: 207]
  ------------------
  596|    917|            }
  597|   157k|        }
  598|  22.4k|        const int use_ref = !hdr->error_resilient_mode &&
  ------------------
  |  Branch (598:29): [True: 20.0k, False: 2.42k]
  ------------------
  599|  20.0k|                            hdr->frame_size_override;
  ------------------
  |  Branch (599:29): [True: 11.5k, False: 8.50k]
  ------------------
  600|  22.4k|        if (read_frame_size(c, gb, use_ref) < 0) goto error;
  ------------------
  |  Branch (600:13): [True: 68, False: 22.3k]
  ------------------
  601|  22.3k|        if (!hdr->force_integer_mv)
  ------------------
  |  Branch (601:13): [True: 13.6k, False: 8.70k]
  ------------------
  602|  13.6k|            hdr->hp = dav1d_get_bit(gb);
  603|  22.3k|        hdr->subpel_filter_mode = dav1d_get_bit(gb) ? DAV1D_FILTER_SWITCHABLE :
  ------------------
  |  Branch (603:35): [True: 4.72k, False: 17.6k]
  ------------------
  604|  22.3k|                                                      dav1d_get_bits(gb, 2);
  605|  22.3k|        hdr->switchable_motion_mode = dav1d_get_bit(gb);
  606|  22.3k|        if (!hdr->error_resilient_mode && seqhdr->ref_frame_mvs &&
  ------------------
  |  Branch (606:13): [True: 19.9k, False: 2.42k]
  |  Branch (606:43): [True: 13.4k, False: 6.45k]
  ------------------
  607|  13.4k|            seqhdr->order_hint && IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|  13.4k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 13.4k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (607:13): [True: 13.4k, False: 0]
  ------------------
  608|  13.4k|        {
  609|  13.4k|            hdr->use_ref_frame_mvs = dav1d_get_bit(gb);
  610|  13.4k|        }
  611|  22.3k|    }
  612|       |#if DEBUG_FRAME_HDR
  613|       |    printf("HDR: post-frametype-specific-bits: off=%td\n",
  614|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  615|       |#endif
  616|       |
  617|  63.9k|    if (!seqhdr->reduced_still_picture_header && !hdr->disable_cdf_update)
  ------------------
  |  Branch (617:9): [True: 33.3k, False: 30.6k]
  |  Branch (617:50): [True: 27.5k, False: 5.82k]
  ------------------
  618|  27.5k|        hdr->refresh_context = !dav1d_get_bit(gb);
  619|       |#if DEBUG_FRAME_HDR
  620|       |    printf("HDR: post-refresh_context: off=%td\n",
  621|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  622|       |#endif
  623|       |
  624|       |    // tile data
  625|  63.9k|    hdr->tiling.uniform = dav1d_get_bit(gb);
  626|  63.9k|    const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
  627|  63.9k|    const int sbsz_log2 = 6 + seqhdr->sb128;
  628|  63.9k|    const int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;
  629|  63.9k|    const int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
  630|  63.9k|    const int max_tile_width_sb = 4096 >> sbsz_log2;
  631|  63.9k|    const int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
  632|  63.9k|    hdr->tiling.min_log2_cols = tile_log2(max_tile_width_sb, sbw);
  633|  63.9k|    hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, DAV1D_MAX_TILE_COLS));
  ------------------
  |  |   41|  63.9k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  634|  63.9k|    hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, DAV1D_MAX_TILE_ROWS));
  ------------------
  |  |   42|  63.9k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  635|  63.9k|    const int min_log2_tiles = imax(tile_log2(max_tile_area_sb, sbw * sbh),
  636|  63.9k|                              hdr->tiling.min_log2_cols);
  637|  63.9k|    if (hdr->tiling.uniform) {
  ------------------
  |  Branch (637:9): [True: 38.0k, False: 25.9k]
  ------------------
  638|  38.0k|        for (hdr->tiling.log2_cols = hdr->tiling.min_log2_cols;
  639|  39.3k|             hdr->tiling.log2_cols < hdr->tiling.max_log2_cols && dav1d_get_bit(gb);
  ------------------
  |  Branch (639:14): [True: 18.0k, False: 21.2k]
  |  Branch (639:67): [True: 1.32k, False: 16.7k]
  ------------------
  640|  38.0k|             hdr->tiling.log2_cols++) ;
  641|  38.0k|        const int tile_w = 1 + ((sbw - 1) >> hdr->tiling.log2_cols);
  642|  38.0k|        hdr->tiling.cols = 0;
  643|  90.7k|        for (int sbx = 0; sbx < sbw; sbx += tile_w, hdr->tiling.cols++)
  ------------------
  |  Branch (643:27): [True: 52.7k, False: 38.0k]
  ------------------
  644|  52.7k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  645|  38.0k|        hdr->tiling.min_log2_rows =
  646|  38.0k|            imax(min_log2_tiles - hdr->tiling.log2_cols, 0);
  647|       |
  648|  38.0k|        for (hdr->tiling.log2_rows = hdr->tiling.min_log2_rows;
  649|  39.2k|             hdr->tiling.log2_rows < hdr->tiling.max_log2_rows && dav1d_get_bit(gb);
  ------------------
  |  Branch (649:14): [True: 10.4k, False: 28.8k]
  |  Branch (649:67): [True: 1.22k, False: 9.19k]
  ------------------
  650|  38.0k|             hdr->tiling.log2_rows++) ;
  651|  38.0k|        const int tile_h = 1 + ((sbh - 1) >> hdr->tiling.log2_rows);
  652|  38.0k|        hdr->tiling.rows = 0;
  653|  82.5k|        for (int sby = 0; sby < sbh; sby += tile_h, hdr->tiling.rows++)
  ------------------
  |  Branch (653:27): [True: 44.5k, False: 38.0k]
  ------------------
  654|  44.5k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  655|  38.0k|    } else {
  656|  25.9k|        hdr->tiling.cols = 0;
  657|  25.9k|        int widest_tile = 0, max_tile_area_sb = sbw * sbh;
  658|  94.9k|        for (int sbx = 0; sbx < sbw && hdr->tiling.cols < DAV1D_MAX_TILE_COLS; hdr->tiling.cols++) {
  ------------------
  |  |   41|  69.3k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  |  Branch (658:27): [True: 69.3k, False: 25.5k]
  |  Branch (658:40): [True: 68.9k, False: 381]
  ------------------
  659|  68.9k|            const int tile_width_sb = imin(sbw - sbx, max_tile_width_sb);
  660|  68.9k|            const int tile_w = (tile_width_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_width_sb) : 1;
  ------------------
  |  Branch (660:32): [True: 44.9k, False: 23.9k]
  ------------------
  661|  68.9k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  662|  68.9k|            sbx += tile_w;
  663|  68.9k|            widest_tile = imax(widest_tile, tile_w);
  664|  68.9k|        }
  665|  25.9k|        hdr->tiling.log2_cols = tile_log2(1, hdr->tiling.cols);
  666|  25.9k|        if (min_log2_tiles) max_tile_area_sb >>= min_log2_tiles + 1;
  ------------------
  |  Branch (666:13): [True: 421, False: 25.5k]
  ------------------
  667|  25.9k|        const int max_tile_height_sb = imax(max_tile_area_sb / widest_tile, 1);
  668|       |
  669|  25.9k|        hdr->tiling.rows = 0;
  670|  87.7k|        for (int sby = 0; sby < sbh && hdr->tiling.rows < DAV1D_MAX_TILE_ROWS; hdr->tiling.rows++) {
  ------------------
  |  |   42|  62.2k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  |  Branch (670:27): [True: 62.2k, False: 25.5k]
  |  Branch (670:40): [True: 61.7k, False: 454]
  ------------------
  671|  61.7k|            const int tile_height_sb = imin(sbh - sby, max_tile_height_sb);
  672|  61.7k|            const int tile_h = (tile_height_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_height_sb) : 1;
  ------------------
  |  Branch (672:32): [True: 36.7k, False: 24.9k]
  ------------------
  673|  61.7k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  674|  61.7k|            sby += tile_h;
  675|  61.7k|        }
  676|  25.9k|        hdr->tiling.log2_rows = tile_log2(1, hdr->tiling.rows);
  677|  25.9k|    }
  678|  63.9k|    hdr->tiling.col_start_sb[hdr->tiling.cols] = sbw;
  679|  63.9k|    hdr->tiling.row_start_sb[hdr->tiling.rows] = sbh;
  680|  63.9k|    if (hdr->tiling.log2_cols || hdr->tiling.log2_rows) {
  ------------------
  |  Branch (680:9): [True: 5.87k, False: 58.1k]
  |  Branch (680:34): [True: 1.59k, False: 56.5k]
  ------------------
  681|  7.46k|        hdr->tiling.update = dav1d_get_bits(gb, hdr->tiling.log2_cols + hdr->tiling.log2_rows);
  682|  7.46k|        if (hdr->tiling.update >= hdr->tiling.cols * hdr->tiling.rows)
  ------------------
  |  Branch (682:13): [True: 321, False: 7.14k]
  ------------------
  683|    321|            goto error;
  684|  7.14k|        hdr->tiling.n_bytes = dav1d_get_bits(gb, 2) + 1;
  685|  7.14k|    }
  686|       |#if DEBUG_FRAME_HDR
  687|       |    printf("HDR: post-tiling: off=%td\n",
  688|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  689|       |#endif
  690|       |
  691|       |    // quant data
  692|  63.6k|    hdr->quant.yac = dav1d_get_bits(gb, 8);
  693|  63.6k|    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (693:9): [True: 7.76k, False: 55.8k]
  ------------------
  694|  7.76k|        hdr->quant.ydc_delta = dav1d_get_sbits(gb, 7);
  695|  63.6k|    if (!seqhdr->monochrome) {
  ------------------
  |  Branch (695:9): [True: 37.8k, False: 25.8k]
  ------------------
  696|       |        // If the sequence header says that delta_q might be different
  697|       |        // for U, V, we must check whether it actually is for this
  698|       |        // frame.
  699|  37.8k|        const int diff_uv_delta = seqhdr->separate_uv_delta_q ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (699:35): [True: 9.69k, False: 28.1k]
  ------------------
  700|  37.8k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (700:13): [True: 3.83k, False: 34.0k]
  ------------------
  701|  3.83k|            hdr->quant.udc_delta = dav1d_get_sbits(gb, 7);
  702|  37.8k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (702:13): [True: 3.79k, False: 34.0k]
  ------------------
  703|  3.79k|            hdr->quant.uac_delta = dav1d_get_sbits(gb, 7);
  704|  37.8k|        if (diff_uv_delta) {
  ------------------
  |  Branch (704:13): [True: 2.86k, False: 34.9k]
  ------------------
  705|  2.86k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (705:17): [True: 1.69k, False: 1.16k]
  ------------------
  706|  1.69k|                hdr->quant.vdc_delta = dav1d_get_sbits(gb, 7);
  707|  2.86k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (707:17): [True: 1.14k, False: 1.71k]
  ------------------
  708|  1.14k|                hdr->quant.vac_delta = dav1d_get_sbits(gb, 7);
  709|  34.9k|        } else {
  710|  34.9k|            hdr->quant.vdc_delta = hdr->quant.udc_delta;
  711|  34.9k|            hdr->quant.vac_delta = hdr->quant.uac_delta;
  712|  34.9k|        }
  713|  37.8k|    }
  714|       |#if DEBUG_FRAME_HDR
  715|       |    printf("HDR: post-quant: off=%td\n",
  716|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  717|       |#endif
  718|  63.6k|    hdr->quant.qm = dav1d_get_bit(gb);
  719|  63.6k|    if (hdr->quant.qm) {
  ------------------
  |  Branch (719:9): [True: 8.21k, False: 55.4k]
  ------------------
  720|  8.21k|        hdr->quant.qm_y = dav1d_get_bits(gb, 4);
  721|  8.21k|        hdr->quant.qm_u = dav1d_get_bits(gb, 4);
  722|  8.21k|        hdr->quant.qm_v = seqhdr->separate_uv_delta_q ? dav1d_get_bits(gb, 4) :
  ------------------
  |  Branch (722:27): [True: 1.57k, False: 6.63k]
  ------------------
  723|  8.21k|                                                        hdr->quant.qm_u;
  724|  8.21k|    }
  725|       |#if DEBUG_FRAME_HDR
  726|       |    printf("HDR: post-qm: off=%td\n",
  727|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  728|       |#endif
  729|       |
  730|       |    // segmentation data
  731|  63.6k|    hdr->segmentation.enabled = dav1d_get_bit(gb);
  732|  63.6k|    if (hdr->segmentation.enabled) {
  ------------------
  |  Branch (732:9): [True: 10.4k, False: 53.2k]
  ------------------
  733|  10.4k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  10.4k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (733:13): [True: 3.01k, False: 7.39k]
  ------------------
  734|  3.01k|            hdr->segmentation.update_map = 1;
  735|  3.01k|            hdr->segmentation.update_data = 1;
  736|  7.39k|        } else {
  737|  7.39k|            hdr->segmentation.update_map = dav1d_get_bit(gb);
  738|  7.39k|            if (hdr->segmentation.update_map)
  ------------------
  |  Branch (738:17): [True: 1.15k, False: 6.23k]
  ------------------
  739|  1.15k|                hdr->segmentation.temporal = dav1d_get_bit(gb);
  740|  7.39k|            hdr->segmentation.update_data = dav1d_get_bit(gb);
  741|  7.39k|        }
  742|       |
  743|  10.4k|        if (hdr->segmentation.update_data) {
  ------------------
  |  Branch (743:13): [True: 3.89k, False: 6.51k]
  ------------------
  744|  3.89k|            hdr->segmentation.seg_data.last_active_segid = -1;
  745|  35.0k|            for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|  35.0k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (745:29): [True: 31.1k, False: 3.89k]
  ------------------
  746|  31.1k|                Dav1dSegmentationData *const seg =
  747|  31.1k|                    &hdr->segmentation.seg_data.d[i];
  748|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (748:21): [True: 5.80k, False: 25.3k]
  ------------------
  749|  5.80k|                    seg->delta_q = dav1d_get_sbits(gb, 9);
  750|  5.80k|                    hdr->segmentation.seg_data.last_active_segid = i;
  751|  5.80k|                }
  752|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (752:21): [True: 3.65k, False: 27.4k]
  ------------------
  753|  3.65k|                    seg->delta_lf_y_v = dav1d_get_sbits(gb, 7);
  754|  3.65k|                    hdr->segmentation.seg_data.last_active_segid = i;
  755|  3.65k|                }
  756|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (756:21): [True: 4.98k, False: 26.1k]
  ------------------
  757|  4.98k|                    seg->delta_lf_y_h = dav1d_get_sbits(gb, 7);
  758|  4.98k|                    hdr->segmentation.seg_data.last_active_segid = i;
  759|  4.98k|                }
  760|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (760:21): [True: 4.36k, False: 26.7k]
  ------------------
  761|  4.36k|                    seg->delta_lf_u = dav1d_get_sbits(gb, 7);
  762|  4.36k|                    hdr->segmentation.seg_data.last_active_segid = i;
  763|  4.36k|                }
  764|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (764:21): [True: 3.25k, False: 27.8k]
  ------------------
  765|  3.25k|                    seg->delta_lf_v = dav1d_get_sbits(gb, 7);
  766|  3.25k|                    hdr->segmentation.seg_data.last_active_segid = i;
  767|  3.25k|                }
  768|  31.1k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (768:21): [True: 3.44k, False: 27.6k]
  ------------------
  769|  3.44k|                    seg->ref = dav1d_get_bits(gb, 3);
  770|  3.44k|                    hdr->segmentation.seg_data.last_active_segid = i;
  771|  3.44k|                    hdr->segmentation.seg_data.preskip = 1;
  772|  27.6k|                } else {
  773|  27.6k|                    seg->ref = -1;
  774|  27.6k|                }
  775|  31.1k|                if ((seg->skip = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (775:21): [True: 4.37k, False: 26.7k]
  ------------------
  776|  4.37k|                    hdr->segmentation.seg_data.last_active_segid = i;
  777|  4.37k|                    hdr->segmentation.seg_data.preskip = 1;
  778|  4.37k|                }
  779|  31.1k|                if ((seg->globalmv = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (779:21): [True: 3.88k, False: 27.2k]
  ------------------
  780|  3.88k|                    hdr->segmentation.seg_data.last_active_segid = i;
  781|  3.88k|                    hdr->segmentation.seg_data.preskip = 1;
  782|  3.88k|                }
  783|  31.1k|            }
  784|  6.51k|        } else {
  785|       |            // segmentation.update_data was false so we should copy
  786|       |            // segmentation data from the reference frame.
  787|  6.51k|            assert(hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (787:13): [True: 6.51k, False: 0]
  ------------------
  788|  6.51k|            const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
  789|  6.51k|            if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (789:17): [True: 242, False: 6.27k]
  ------------------
  790|  6.27k|            hdr->segmentation.seg_data =
  791|  6.27k|                c->refs[pri_ref].p.p.frame_hdr->segmentation.seg_data;
  792|  6.27k|        }
  793|  53.2k|    } else {
  794|   479k|        for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++)
  ------------------
  |  |   43|   479k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (794:25): [True: 426k, False: 53.2k]
  ------------------
  795|   426k|            hdr->segmentation.seg_data.d[i].ref = -1;
  796|  53.2k|    }
  797|       |#if DEBUG_FRAME_HDR
  798|       |    printf("HDR: post-segmentation: off=%td\n",
  799|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  800|       |#endif
  801|       |
  802|       |    // delta q
  803|  63.4k|    if (hdr->quant.yac) {
  ------------------
  |  Branch (803:9): [True: 47.7k, False: 15.6k]
  ------------------
  804|  47.7k|        hdr->delta.q.present = dav1d_get_bit(gb);
  805|  47.7k|        if (hdr->delta.q.present) {
  ------------------
  |  Branch (805:13): [True: 7.43k, False: 40.3k]
  ------------------
  806|  7.43k|            hdr->delta.q.res_log2 = dav1d_get_bits(gb, 2);
  807|  7.43k|            if (!hdr->allow_intrabc) {
  ------------------
  |  Branch (807:17): [True: 5.13k, False: 2.29k]
  ------------------
  808|  5.13k|                hdr->delta.lf.present = dav1d_get_bit(gb);
  809|  5.13k|                if (hdr->delta.lf.present) {
  ------------------
  |  Branch (809:21): [True: 1.67k, False: 3.46k]
  ------------------
  810|  1.67k|                    hdr->delta.lf.res_log2 = dav1d_get_bits(gb, 2);
  811|  1.67k|                    hdr->delta.lf.multi = dav1d_get_bit(gb);
  812|  1.67k|                }
  813|  5.13k|            }
  814|  7.43k|        }
  815|  47.7k|    }
  816|       |#if DEBUG_FRAME_HDR
  817|       |    printf("HDR: post-delta_q_lf_flags: off=%td\n",
  818|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  819|       |#endif
  820|       |
  821|       |    // derive lossless flags
  822|  63.4k|    const int delta_lossless = !hdr->quant.ydc_delta && !hdr->quant.udc_delta &&
  ------------------
  |  Branch (822:32): [True: 56.4k, False: 6.96k]
  |  Branch (822:57): [True: 54.9k, False: 1.49k]
  ------------------
  823|  54.9k|        !hdr->quant.uac_delta && !hdr->quant.vdc_delta && !hdr->quant.vac_delta;
  ------------------
  |  Branch (823:9): [True: 54.0k, False: 941]
  |  Branch (823:34): [True: 53.4k, False: 607]
  |  Branch (823:59): [True: 53.2k, False: 144]
  ------------------
  824|  63.4k|    hdr->all_lossless = 1;
  825|   570k|    for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|   570k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (825:21): [True: 507k, False: 63.4k]
  ------------------
  826|   507k|        hdr->segmentation.qidx[i] = hdr->segmentation.enabled ?
  ------------------
  |  Branch (826:37): [True: 81.3k, False: 426k]
  ------------------
  827|  81.3k|            iclip_u8(hdr->quant.yac + hdr->segmentation.seg_data.d[i].delta_q) :
  828|   507k|            hdr->quant.yac;
  829|   507k|        hdr->segmentation.lossless[i] =
  830|   507k|            !hdr->segmentation.qidx[i] && delta_lossless;
  ------------------
  |  Branch (830:13): [True: 126k, False: 380k]
  |  Branch (830:43): [True: 123k, False: 2.93k]
  ------------------
  831|   507k|        hdr->all_lossless &= hdr->segmentation.lossless[i];
  832|   507k|    }
  833|       |
  834|       |    // loopfilter
  835|  63.4k|    if (hdr->all_lossless || hdr->allow_intrabc) {
  ------------------
  |  Branch (835:9): [True: 15.1k, False: 48.2k]
  |  Branch (835:30): [True: 19.5k, False: 28.6k]
  ------------------
  836|  34.7k|        hdr->loopfilter.mode_ref_delta_enabled = 1;
  837|  34.7k|        hdr->loopfilter.mode_ref_delta_update = 1;
  838|  34.7k|        hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  839|  34.7k|    } else {
  840|  28.6k|        hdr->loopfilter.level_y[0] = dav1d_get_bits(gb, 6);
  841|  28.6k|        hdr->loopfilter.level_y[1] = dav1d_get_bits(gb, 6);
  842|  28.6k|        if (!seqhdr->monochrome &&
  ------------------
  |  Branch (842:13): [True: 10.4k, False: 18.1k]
  ------------------
  843|  10.4k|            (hdr->loopfilter.level_y[0] || hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (843:14): [True: 4.09k, False: 6.35k]
  |  Branch (843:44): [True: 1.13k, False: 5.21k]
  ------------------
  844|  5.23k|        {
  845|  5.23k|            hdr->loopfilter.level_u = dav1d_get_bits(gb, 6);
  846|  5.23k|            hdr->loopfilter.level_v = dav1d_get_bits(gb, 6);
  847|  5.23k|        }
  848|  28.6k|        hdr->loopfilter.sharpness = dav1d_get_bits(gb, 3);
  849|       |
  850|  28.6k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  28.6k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (850:13): [True: 14.8k, False: 13.8k]
  ------------------
  851|  14.8k|            hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  852|  14.8k|        } else {
  853|  13.8k|            const int ref = hdr->refidx[hdr->primary_ref_frame];
  854|  13.8k|            if (!c->refs[ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (854:17): [True: 505, False: 13.3k]
  ------------------
  855|  13.3k|            hdr->loopfilter.mode_ref_deltas =
  856|  13.3k|                c->refs[ref].p.p.frame_hdr->loopfilter.mode_ref_deltas;
  857|  13.3k|        }
  858|  28.1k|        hdr->loopfilter.mode_ref_delta_enabled = dav1d_get_bit(gb);
  859|  28.1k|        if (hdr->loopfilter.mode_ref_delta_enabled) {
  ------------------
  |  Branch (859:13): [True: 15.3k, False: 12.8k]
  ------------------
  860|  15.3k|            hdr->loopfilter.mode_ref_delta_update = dav1d_get_bit(gb);
  861|  15.3k|            if (hdr->loopfilter.mode_ref_delta_update) {
  ------------------
  |  Branch (861:17): [True: 1.64k, False: 13.6k]
  ------------------
  862|  14.8k|                for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (862:33): [True: 13.1k, False: 1.64k]
  ------------------
  863|  13.1k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (863:25): [True: 3.61k, False: 9.56k]
  ------------------
  864|  3.61k|                        hdr->loopfilter.mode_ref_deltas.ref_delta[i] =
  865|  3.61k|                            dav1d_get_sbits(gb, 7);
  866|  4.94k|                for (int i = 0; i < 2; i++)
  ------------------
  |  Branch (866:33): [True: 3.29k, False: 1.64k]
  ------------------
  867|  3.29k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (867:25): [True: 843, False: 2.45k]
  ------------------
  868|    843|                        hdr->loopfilter.mode_ref_deltas.mode_delta[i] =
  869|    843|                            dav1d_get_sbits(gb, 7);
  870|  1.64k|            }
  871|  15.3k|        }
  872|  28.1k|    }
  873|       |#if DEBUG_FRAME_HDR
  874|       |    printf("HDR: post-lpf: off=%td\n",
  875|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  876|       |#endif
  877|       |
  878|       |    // cdef
  879|  62.9k|    if (!hdr->all_lossless && seqhdr->cdef && !hdr->allow_intrabc) {
  ------------------
  |  Branch (879:9): [True: 47.7k, False: 15.1k]
  |  Branch (879:31): [True: 16.7k, False: 30.9k]
  |  Branch (879:47): [True: 7.91k, False: 8.87k]
  ------------------
  880|  7.91k|        hdr->cdef.damping = dav1d_get_bits(gb, 2) + 3;
  881|  7.91k|        hdr->cdef.n_bits = dav1d_get_bits(gb, 2);
  882|  19.8k|        for (int i = 0; i < (1 << hdr->cdef.n_bits); i++) {
  ------------------
  |  Branch (882:25): [True: 11.9k, False: 7.91k]
  ------------------
  883|  11.9k|            hdr->cdef.y_strength[i] = dav1d_get_bits(gb, 6);
  884|  11.9k|            if (!seqhdr->monochrome)
  ------------------
  |  Branch (884:17): [True: 8.32k, False: 3.61k]
  ------------------
  885|  8.32k|                hdr->cdef.uv_strength[i] = dav1d_get_bits(gb, 6);
  886|  11.9k|        }
  887|  7.91k|    }
  888|       |#if DEBUG_FRAME_HDR
  889|       |    printf("HDR: post-cdef: off=%td\n",
  890|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  891|       |#endif
  892|       |
  893|       |    // restoration
  894|  62.9k|    if ((!hdr->all_lossless || hdr->super_res.enabled) &&
  ------------------
  |  Branch (894:10): [True: 47.7k, False: 15.1k]
  |  Branch (894:32): [True: 4.05k, False: 11.1k]
  ------------------
  895|  51.7k|        seqhdr->restoration && !hdr->allow_intrabc)
  ------------------
  |  Branch (895:9): [True: 34.6k, False: 17.1k]
  |  Branch (895:32): [True: 26.1k, False: 8.45k]
  ------------------
  896|  26.1k|    {
  897|  26.1k|        hdr->restoration.type[0] = dav1d_get_bits(gb, 2);
  898|  26.1k|        if (!seqhdr->monochrome) {
  ------------------
  |  Branch (898:13): [True: 9.75k, False: 16.4k]
  ------------------
  899|  9.75k|            hdr->restoration.type[1] = dav1d_get_bits(gb, 2);
  900|  9.75k|            hdr->restoration.type[2] = dav1d_get_bits(gb, 2);
  901|  9.75k|        }
  902|       |
  903|  26.1k|        if (hdr->restoration.type[0] || hdr->restoration.type[1] ||
  ------------------
  |  Branch (903:13): [True: 18.2k, False: 7.91k]
  |  Branch (903:41): [True: 447, False: 7.46k]
  ------------------
  904|  7.46k|            hdr->restoration.type[2])
  ------------------
  |  Branch (904:13): [True: 364, False: 7.10k]
  ------------------
  905|  19.0k|        {
  906|       |            // Log2 of the restoration unit size.
  907|  19.0k|            hdr->restoration.unit_size[0] = 6 + seqhdr->sb128;
  908|  19.0k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (908:17): [True: 12.6k, False: 6.40k]
  ------------------
  909|  12.6k|                hdr->restoration.unit_size[0]++;
  910|  12.6k|                if (!seqhdr->sb128)
  ------------------
  |  Branch (910:21): [True: 2.27k, False: 10.3k]
  ------------------
  911|  2.27k|                    hdr->restoration.unit_size[0] += dav1d_get_bit(gb);
  912|  12.6k|            }
  913|  19.0k|            hdr->restoration.unit_size[1] = hdr->restoration.unit_size[0];
  914|  19.0k|            if ((hdr->restoration.type[1] || hdr->restoration.type[2]) &&
  ------------------
  |  Branch (914:18): [True: 2.99k, False: 16.0k]
  |  Branch (914:46): [True: 2.21k, False: 13.8k]
  ------------------
  915|  5.21k|                seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1)
  ------------------
  |  Branch (915:17): [True: 3.00k, False: 2.21k]
  |  Branch (915:40): [True: 1.05k, False: 1.94k]
  ------------------
  916|  1.05k|            {
  917|  1.05k|                hdr->restoration.unit_size[1] -= dav1d_get_bit(gb);
  918|  1.05k|            }
  919|  19.0k|        } else {
  920|  7.10k|            hdr->restoration.unit_size[0] = 8;
  921|  7.10k|        }
  922|  26.1k|    }
  923|       |#if DEBUG_FRAME_HDR
  924|       |    printf("HDR: post-restoration: off=%td\n",
  925|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  926|       |#endif
  927|       |
  928|  62.9k|    if (!hdr->all_lossless)
  ------------------
  |  Branch (928:9): [True: 47.7k, False: 15.1k]
  ------------------
  929|  47.7k|        hdr->txfm_mode = dav1d_get_bit(gb) ? DAV1D_TX_SWITCHABLE : DAV1D_TX_LARGEST;
  ------------------
  |  Branch (929:26): [True: 8.26k, False: 39.4k]
  ------------------
  930|       |#if DEBUG_FRAME_HDR
  931|       |    printf("HDR: post-txfmmode: off=%td\n",
  932|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  933|       |#endif
  934|  62.9k|    if (IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|  62.9k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 21.6k, False: 41.3k]
  |  |  ------------------
  ------------------
  935|  21.6k|        hdr->switchable_comp_refs = dav1d_get_bit(gb);
  936|       |#if DEBUG_FRAME_HDR
  937|       |    printf("HDR: post-refmode: off=%td\n",
  938|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  939|       |#endif
  940|  62.9k|    if (hdr->switchable_comp_refs && IS_INTER_OR_SWITCH(hdr) && seqhdr->order_hint) {
  ------------------
  |  |   36|  76.0k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 13.1k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (940:9): [True: 13.1k, False: 49.7k]
  |  Branch (940:65): [True: 11.7k, False: 1.47k]
  ------------------
  941|  11.7k|        const int poc = hdr->frame_offset;
  942|  11.7k|        int off_before = -1, off_after = -1;
  943|  11.7k|        int off_before_idx, off_after_idx;
  944|  92.2k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (944:25): [True: 80.7k, False: 11.4k]
  ------------------
  945|  80.7k|            if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (945:17): [True: 203, False: 80.5k]
  ------------------
  946|  80.5k|            const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  947|       |
  948|  80.5k|            const int diff = get_poc_diff(seqhdr->order_hint_n_bits, refpoc, poc);
  949|  80.5k|            if (diff > 0) {
  ------------------
  |  Branch (949:17): [True: 16.5k, False: 63.9k]
  ------------------
  950|  16.5k|                if (off_after < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (950:21): [True: 6.72k, False: 9.82k]
  |  Branch (950:38): [True: 530, False: 9.29k]
  ------------------
  951|  9.82k|                                                  off_after, refpoc) > 0)
  952|  7.25k|                {
  953|  7.25k|                    off_after = refpoc;
  954|  7.25k|                    off_after_idx = i;
  955|  7.25k|                }
  956|  63.9k|            } else if (diff < 0 && (off_before < 0 ||
  ------------------
  |  Branch (956:24): [True: 26.4k, False: 37.5k]
  |  Branch (956:37): [True: 5.69k, False: 20.7k]
  ------------------
  957|  20.7k|                                    get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (957:37): [True: 824, False: 19.9k]
  ------------------
  958|  20.7k|                                                 refpoc, off_before) > 0))
  959|  6.52k|            {
  960|  6.52k|                off_before = refpoc;
  961|  6.52k|                off_before_idx = i;
  962|  6.52k|            }
  963|  80.5k|        }
  964|       |
  965|  11.4k|        if ((off_before | off_after) >= 0) {
  ------------------
  |  Branch (965:13): [True: 2.16k, False: 9.33k]
  ------------------
  966|  2.16k|            hdr->skip_mode_refs[0] = imin(off_before_idx, off_after_idx);
  967|  2.16k|            hdr->skip_mode_refs[1] = imax(off_before_idx, off_after_idx);
  968|  2.16k|            hdr->skip_mode_allowed = 1;
  969|  9.33k|        } else if (off_before >= 0) {
  ------------------
  |  Branch (969:20): [True: 3.52k, False: 5.80k]
  ------------------
  970|  3.52k|            int off_before2 = -1;
  971|  3.52k|            int off_before2_idx;
  972|  28.1k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (972:29): [True: 24.6k, False: 3.52k]
  ------------------
  973|  24.6k|                if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (973:21): [True: 0, False: 24.6k]
  ------------------
  974|  24.6k|                const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  975|  24.6k|                if (get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (975:21): [True: 8.83k, False: 15.8k]
  ------------------
  976|  24.6k|                                 refpoc, off_before) < 0) {
  977|  8.83k|                    if (off_before2 < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (977:25): [True: 2.11k, False: 6.72k]
  |  Branch (977:44): [True: 245, False: 6.47k]
  ------------------
  978|  6.72k|                                                        refpoc, off_before2) > 0)
  979|  2.36k|                    {
  980|  2.36k|                        off_before2 = refpoc;
  981|  2.36k|                        off_before2_idx = i;
  982|  2.36k|                    }
  983|  8.83k|                }
  984|  24.6k|            }
  985|       |
  986|  3.52k|            if (off_before2 >= 0) {
  ------------------
  |  Branch (986:17): [True: 2.11k, False: 1.40k]
  ------------------
  987|  2.11k|                hdr->skip_mode_refs[0] = imin(off_before_idx, off_before2_idx);
  988|  2.11k|                hdr->skip_mode_refs[1] = imax(off_before_idx, off_before2_idx);
  989|  2.11k|                hdr->skip_mode_allowed = 1;
  990|  2.11k|            }
  991|  3.52k|        }
  992|  11.4k|    }
  993|  62.7k|    if (hdr->skip_mode_allowed)
  ------------------
  |  Branch (993:9): [True: 4.28k, False: 58.4k]
  ------------------
  994|  4.28k|        hdr->skip_mode_enabled = dav1d_get_bit(gb);
  995|       |#if DEBUG_FRAME_HDR
  996|       |    printf("HDR: post-extskip: off=%td\n",
  997|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  998|       |#endif
  999|  62.7k|    if (!hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) && seqhdr->warped_motion)
  ------------------
  |  |   36|  83.8k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 18.9k, False: 2.19k]
  |  |  ------------------
  ------------------
  |  Branch (999:9): [True: 21.1k, False: 41.5k]
  |  Branch (999:66): [True: 14.4k, False: 4.57k]
  ------------------
 1000|  14.4k|        hdr->warp_motion = dav1d_get_bit(gb);
 1001|       |#if DEBUG_FRAME_HDR
 1002|       |    printf("HDR: post-warpmotionbit: off=%td\n",
 1003|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1004|       |#endif
 1005|  62.7k|    hdr->reduced_txtp_set = dav1d_get_bit(gb);
 1006|       |#if DEBUG_FRAME_HDR
 1007|       |    printf("HDR: post-reducedtxtpset: off=%td\n",
 1008|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1009|       |#endif
 1010|       |
 1011|   501k|    for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (1011:21): [True: 438k, False: 62.7k]
  ------------------
 1012|   438k|        hdr->gmv[i] = dav1d_default_wm_params;
 1013|       |
 1014|  62.7k|    if (IS_INTER_OR_SWITCH(hdr)) {
  ------------------
  |  |   36|  62.7k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 21.4k, False: 41.3k]
  |  |  ------------------
  ------------------
 1015|   170k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (1015:25): [True: 149k, False: 21.2k]
  ------------------
 1016|   149k|            hdr->gmv[i].type = !dav1d_get_bit(gb) ? DAV1D_WM_TYPE_IDENTITY :
  ------------------
  |  Branch (1016:32): [True: 139k, False: 9.90k]
  ------------------
 1017|   149k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_ROT_ZOOM :
  ------------------
  |  Branch (1017:33): [True: 5.95k, False: 3.95k]
  ------------------
 1018|  9.90k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_TRANSLATION :
  ------------------
  |  Branch (1018:33): [True: 1.48k, False: 2.46k]
  ------------------
 1019|  3.95k|                                                    DAV1D_WM_TYPE_AFFINE;
 1020|       |
 1021|   149k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_IDENTITY) continue;
  ------------------
  |  Branch (1021:17): [True: 139k, False: 9.90k]
  ------------------
 1022|       |
 1023|  9.90k|            const Dav1dWarpedMotionParams *ref_gmv;
 1024|  9.90k|            if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  9.90k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (1024:17): [True: 1.07k, False: 8.82k]
  ------------------
 1025|  1.07k|                ref_gmv = &dav1d_default_wm_params;
 1026|  8.82k|            } else {
 1027|  8.82k|                const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
 1028|  8.82k|                if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1028:21): [True: 206, False: 8.61k]
  ------------------
 1029|  8.61k|                ref_gmv = &c->refs[pri_ref].p.p.frame_hdr->gmv[i];
 1030|  8.61k|            }
 1031|  9.69k|            int32_t *const mat = hdr->gmv[i].matrix;
 1032|  9.69k|            const int32_t *const ref_mat = ref_gmv->matrix;
 1033|  9.69k|            int bits, shift;
 1034|       |
 1035|  9.69k|            if (hdr->gmv[i].type >= DAV1D_WM_TYPE_ROT_ZOOM) {
  ------------------
  |  Branch (1035:17): [True: 8.22k, False: 1.47k]
  ------------------
 1036|  8.22k|                mat[2] = (1 << 16) + 2 *
 1037|  8.22k|                    dav1d_get_bits_subexp(gb, (ref_mat[2] - (1 << 16)) >> 1, 12);
 1038|  8.22k|                mat[3] = 2 * dav1d_get_bits_subexp(gb, ref_mat[3] >> 1, 12);
 1039|       |
 1040|  8.22k|                bits = 12;
 1041|  8.22k|                shift = 10;
 1042|  8.22k|            } else {
 1043|  1.47k|                bits = 9 - !hdr->hp;
 1044|  1.47k|                shift = 13 + !hdr->hp;
 1045|  1.47k|            }
 1046|       |
 1047|  9.69k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1047:17): [True: 2.42k, False: 7.27k]
  ------------------
 1048|  2.42k|                mat[4] = 2 * dav1d_get_bits_subexp(gb, ref_mat[4] >> 1, 12);
 1049|  2.42k|                mat[5] = (1 << 16) + 2 *
 1050|  2.42k|                    dav1d_get_bits_subexp(gb, (ref_mat[5] - (1 << 16)) >> 1, 12);
 1051|  7.27k|            } else {
 1052|  7.27k|                mat[4] = -mat[3];
 1053|  7.27k|                mat[5] = mat[2];
 1054|  7.27k|            }
 1055|       |
 1056|  9.69k|            mat[0] = dav1d_get_bits_subexp(gb, ref_mat[0] >> shift, bits) * (1 << shift);
 1057|  9.69k|            mat[1] = dav1d_get_bits_subexp(gb, ref_mat[1] >> shift, bits) * (1 << shift);
 1058|  9.69k|        }
 1059|  21.4k|    }
 1060|       |#if DEBUG_FRAME_HDR
 1061|       |    printf("HDR: post-gmv: off=%td\n",
 1062|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1063|       |#endif
 1064|       |
 1065|  62.5k|    if (seqhdr->film_grain_present && (hdr->show_frame || hdr->showable_frame)) {
  ------------------
  |  Branch (1065:9): [True: 18.7k, False: 43.7k]
  |  Branch (1065:40): [True: 15.4k, False: 3.27k]
  |  Branch (1065:59): [True: 2.24k, False: 1.02k]
  ------------------
 1066|  17.7k|        hdr->film_grain.present = dav1d_get_bit(gb);
 1067|  17.7k|        if (hdr->film_grain.present) {
  ------------------
  |  Branch (1067:13): [True: 6.31k, False: 11.4k]
  ------------------
 1068|  6.31k|            const unsigned seed = dav1d_get_bits(gb, 16);
 1069|  6.31k|            hdr->film_grain.update = hdr->frame_type != DAV1D_FRAME_TYPE_INTER || dav1d_get_bit(gb);
  ------------------
  |  Branch (1069:38): [True: 4.60k, False: 1.70k]
  |  Branch (1069:83): [True: 114, False: 1.59k]
  ------------------
 1070|  6.31k|            if (!hdr->film_grain.update) {
  ------------------
  |  Branch (1070:17): [True: 1.59k, False: 4.72k]
  ------------------
 1071|  1.59k|                const int refidx = dav1d_get_bits(gb, 3);
 1072|  1.59k|                int i;
 1073|  6.71k|                for (i = 0; i < 7; i++)
  ------------------
  |  Branch (1073:29): [True: 6.42k, False: 294]
  ------------------
 1074|  6.42k|                    if (hdr->refidx[i] == refidx)
  ------------------
  |  Branch (1074:25): [True: 1.29k, False: 5.12k]
  ------------------
 1075|  1.29k|                        break;
 1076|  1.59k|                if (i == 7 || !c->refs[refidx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1076:21): [True: 294, False: 1.29k]
  |  Branch (1076:31): [True: 84, False: 1.21k]
  ------------------
 1077|  1.21k|                hdr->film_grain.data = c->refs[refidx].p.p.frame_hdr->film_grain.data;
 1078|  1.21k|                hdr->film_grain.data.seed = seed;
 1079|  4.72k|            } else {
 1080|  4.72k|                Dav1dFilmGrainData *const fgd = &hdr->film_grain.data;
 1081|  4.72k|                fgd->seed = seed;
 1082|       |
 1083|  4.72k|                fgd->num_y_points = dav1d_get_bits(gb, 4);
 1084|  4.72k|                if (fgd->num_y_points > 14) goto error;
  ------------------
  |  Branch (1084:21): [True: 203, False: 4.51k]
  ------------------
 1085|  7.50k|                for (int i = 0; i < fgd->num_y_points; i++) {
  ------------------
  |  Branch (1085:33): [True: 3.37k, False: 4.13k]
  ------------------
 1086|  3.37k|                    fgd->y_points[i][0] = dav1d_get_bits(gb, 8);
 1087|  3.37k|                    if (i && fgd->y_points[i - 1][0] >= fgd->y_points[i][0])
  ------------------
  |  Branch (1087:25): [True: 1.09k, False: 2.27k]
  |  Branch (1087:30): [True: 389, False: 704]
  ------------------
 1088|    389|                        goto error;
 1089|  2.98k|                    fgd->y_points[i][1] = dav1d_get_bits(gb, 8);
 1090|  2.98k|                }
 1091|       |
 1092|  4.13k|                if (!seqhdr->monochrome)
  ------------------
  |  Branch (1092:21): [True: 3.22k, False: 905]
  ------------------
 1093|  3.22k|                    fgd->chroma_scaling_from_luma = dav1d_get_bit(gb);
 1094|  4.13k|                if (seqhdr->monochrome || fgd->chroma_scaling_from_luma ||
  ------------------
  |  Branch (1094:21): [True: 905, False: 3.22k]
  |  Branch (1094:43): [True: 973, False: 2.25k]
  ------------------
 1095|  2.25k|                    (seqhdr->ss_ver == 1 && seqhdr->ss_hor == 1 && !fgd->num_y_points))
  ------------------
  |  Branch (1095:22): [True: 619, False: 1.63k]
  |  Branch (1095:45): [True: 619, False: 0]
  |  Branch (1095:68): [True: 228, False: 391]
  ------------------
 1096|  2.10k|                {
 1097|  2.10k|                    fgd->num_uv_points[0] = fgd->num_uv_points[1] = 0;
 1098|  5.12k|                } else for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1098:41): [True: 3.77k, False: 1.34k]
  ------------------
 1099|  3.77k|                    fgd->num_uv_points[pl] = dav1d_get_bits(gb, 4);
 1100|  3.77k|                    if (fgd->num_uv_points[pl] > 10) goto error;
  ------------------
  |  Branch (1100:25): [True: 219, False: 3.55k]
  ------------------
 1101|  6.13k|                    for (int i = 0; i < fgd->num_uv_points[pl]; i++) {
  ------------------
  |  Branch (1101:37): [True: 3.03k, False: 3.09k]
  ------------------
 1102|  3.03k|                        fgd->uv_points[pl][i][0] = dav1d_get_bits(gb, 8);
 1103|  3.03k|                        if (i && fgd->uv_points[pl][i - 1][0] >= fgd->uv_points[pl][i][0])
  ------------------
  |  Branch (1103:29): [True: 1.76k, False: 1.26k]
  |  Branch (1103:34): [True: 459, False: 1.30k]
  ------------------
 1104|    459|                            goto error;
 1105|  2.57k|                        fgd->uv_points[pl][i][1] = dav1d_get_bits(gb, 8);
 1106|  2.57k|                    }
 1107|  3.55k|                }
 1108|       |
 1109|  3.45k|                if (seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1 &&
  ------------------
  |  Branch (1109:21): [True: 2.99k, False: 462]
  |  Branch (1109:44): [True: 1.50k, False: 1.48k]
  ------------------
 1110|  1.50k|                    !!fgd->num_uv_points[0] != !!fgd->num_uv_points[1])
  ------------------
  |  Branch (1110:21): [True: 66, False: 1.43k]
  ------------------
 1111|     66|                {
 1112|     66|                    goto error;
 1113|     66|                }
 1114|       |
 1115|  3.38k|                fgd->scaling_shift = dav1d_get_bits(gb, 2) + 8;
 1116|  3.38k|                fgd->ar_coeff_lag = dav1d_get_bits(gb, 2);
 1117|  3.38k|                const int num_y_pos = 2 * fgd->ar_coeff_lag * (fgd->ar_coeff_lag + 1);
 1118|  3.38k|                if (fgd->num_y_points)
  ------------------
  |  Branch (1118:21): [True: 1.77k, False: 1.61k]
  ------------------
 1119|  11.8k|                    for (int i = 0; i < num_y_pos; i++)
  ------------------
  |  Branch (1119:37): [True: 10.1k, False: 1.77k]
  ------------------
 1120|  10.1k|                        fgd->ar_coeffs_y[i] = dav1d_get_bits(gb, 8) - 128;
 1121|  10.1k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1121:34): [True: 6.77k, False: 3.38k]
  ------------------
 1122|  6.77k|                    if (fgd->num_uv_points[pl] || fgd->chroma_scaling_from_luma) {
  ------------------
  |  Branch (1122:25): [True: 648, False: 6.12k]
  |  Branch (1122:51): [True: 1.94k, False: 4.17k]
  ------------------
 1123|  2.59k|                        const int num_uv_pos = num_y_pos + !!fgd->num_y_points;
 1124|  30.0k|                        for (int i = 0; i < num_uv_pos; i++)
  ------------------
  |  Branch (1124:41): [True: 27.4k, False: 2.59k]
  ------------------
 1125|  27.4k|                            fgd->ar_coeffs_uv[pl][i] = dav1d_get_bits(gb, 8) - 128;
 1126|  2.59k|                        if (!fgd->num_y_points)
  ------------------
  |  Branch (1126:29): [True: 1.17k, False: 1.42k]
  ------------------
 1127|  1.17k|                            fgd->ar_coeffs_uv[pl][num_uv_pos] = 0;
 1128|  2.59k|                    }
 1129|  3.38k|                fgd->ar_coeff_shift = dav1d_get_bits(gb, 2) + 6;
 1130|  3.38k|                fgd->grain_scale_shift = dav1d_get_bits(gb, 2);
 1131|  10.1k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1131:34): [True: 6.77k, False: 3.38k]
  ------------------
 1132|  6.77k|                    if (fgd->num_uv_points[pl]) {
  ------------------
  |  Branch (1132:25): [True: 648, False: 6.12k]
  ------------------
 1133|    648|                        fgd->uv_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1134|    648|                        fgd->uv_luma_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1135|    648|                        fgd->uv_offset[pl] = dav1d_get_bits(gb, 9) - 256;
 1136|    648|                    }
 1137|  3.38k|                fgd->overlap_flag = dav1d_get_bit(gb);
 1138|  3.38k|                fgd->clip_to_restricted_range = dav1d_get_bit(gb);
 1139|  3.38k|            }
 1140|  6.31k|        }
 1141|  17.7k|    }
 1142|       |#if DEBUG_FRAME_HDR
 1143|       |    printf("HDR: post-filmgrain: off=%td\n",
 1144|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1145|       |#endif
 1146|       |
 1147|  60.7k|    return 0;
 1148|       |
 1149|  4.78k|error:
 1150|  4.78k|    dav1d_log(c, "Error parsing frame header\n");
  ------------------
  |  |   44|  4.78k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 4.78k]
  |  |  ------------------
  ------------------
 1151|  4.78k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  4.78k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1152|  62.5k|}
obu.c:read_frame_size:
  343|  64.0k|{
  344|  64.0k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  345|  64.0k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  346|       |
  347|  64.0k|    if (use_ref) {
  ------------------
  |  Branch (347:9): [True: 11.5k, False: 52.5k]
  ------------------
  348|  24.6k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (348:25): [True: 23.6k, False: 965]
  ------------------
  349|  23.6k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (349:17): [True: 10.5k, False: 13.1k]
  ------------------
  350|  10.5k|                const Dav1dThreadPicture *const ref =
  351|  10.5k|                    &c->refs[c->frame_hdr->refidx[i]].p;
  352|  10.5k|                if (!ref->p.frame_hdr) return -1;
  ------------------
  |  Branch (352:21): [True: 68, False: 10.4k]
  ------------------
  353|  10.4k|                hdr->width[1] = ref->p.frame_hdr->width[1];
  354|  10.4k|                hdr->height = ref->p.frame_hdr->height;
  355|  10.4k|                hdr->render_width = ref->p.frame_hdr->render_width;
  356|  10.4k|                hdr->render_height = ref->p.frame_hdr->render_height;
  357|  10.4k|                hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (357:42): [True: 1.55k, False: 8.92k]
  |  Branch (357:63): [True: 1.03k, False: 519]
  ------------------
  358|  10.4k|                if (hdr->super_res.enabled) {
  ------------------
  |  Branch (358:21): [True: 1.03k, False: 9.43k]
  ------------------
  359|  1.03k|                    const int d = hdr->super_res.width_scale_denominator =
  360|  1.03k|                        9 + dav1d_get_bits(gb, 3);
  361|  1.03k|                    hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,
  362|  1.03k|                                         imin(16, hdr->width[1]));
  363|  9.43k|                } else {
  364|  9.43k|                    hdr->super_res.width_scale_denominator = 8;
  365|  9.43k|                    hdr->width[0] = hdr->width[1];
  366|  9.43k|                }
  367|  10.4k|                return 0;
  368|  10.5k|            }
  369|  23.6k|        }
  370|  11.5k|    }
  371|       |
  372|  53.5k|    if (hdr->frame_size_override) {
  ------------------
  |  Branch (372:9): [True: 8.17k, False: 45.3k]
  ------------------
  373|  8.17k|        hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
  374|  8.17k|        hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;
  375|  45.3k|    } else {
  376|  45.3k|        hdr->width[1] = seqhdr->max_width;
  377|  45.3k|        hdr->height = seqhdr->max_height;
  378|  45.3k|    }
  379|  53.5k|    hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (379:30): [True: 30.1k, False: 23.3k]
  |  Branch (379:51): [True: 7.36k, False: 22.7k]
  ------------------
  380|  53.5k|    if (hdr->super_res.enabled) {
  ------------------
  |  Branch (380:9): [True: 7.36k, False: 46.1k]
  ------------------
  381|  7.36k|        const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);
  382|  7.36k|        hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));
  383|  46.1k|    } else {
  384|  46.1k|        hdr->super_res.width_scale_denominator = 8;
  385|  46.1k|        hdr->width[0] = hdr->width[1];
  386|  46.1k|    }
  387|  53.5k|    hdr->have_render_size = dav1d_get_bit(gb);
  388|  53.5k|    if (hdr->have_render_size) {
  ------------------
  |  Branch (388:9): [True: 2.06k, False: 51.4k]
  ------------------
  389|  2.06k|        hdr->render_width = dav1d_get_bits(gb, 16) + 1;
  390|  2.06k|        hdr->render_height = dav1d_get_bits(gb, 16) + 1;
  391|  51.4k|    } else {
  392|  51.4k|        hdr->render_width = hdr->width[1];
  393|  51.4k|        hdr->render_height = hdr->height;
  394|  51.4k|    }
  395|  53.5k|    return 0;
  396|  64.0k|}
obu.c:tile_log2:
  398|   307k|static inline int tile_log2(const int sz, const int tgt) {
  399|   307k|    int k;
  400|   442k|    for (k = 0; (sz << k) < tgt; k++) ;
  ------------------
  |  Branch (400:17): [True: 134k, False: 307k]
  ------------------
  401|   307k|    return k;
  402|   307k|}
obu.c:check_trailing_bits:
   50|  45.3k|{
   51|  45.3k|    const int trailing_one_bit = dav1d_get_bit(gb);
   52|       |
   53|  45.3k|    if (gb->error)
  ------------------
  |  Branch (53:9): [True: 8.63k, False: 36.7k]
  ------------------
   54|  8.63k|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  8.63k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   55|       |
   56|  36.7k|    if (!strict_std_compliance)
  ------------------
  |  Branch (56:9): [True: 36.7k, False: 0]
  ------------------
   57|  36.7k|        return 0;
   58|       |
   59|      0|    if (!trailing_one_bit || gb->state)
  ------------------
  |  Branch (59:9): [True: 0, False: 0]
  |  Branch (59:30): [True: 0, False: 0]
  ------------------
   60|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   61|       |
   62|      0|    ptrdiff_t size = gb->ptr_end - gb->ptr;
   63|      0|    while (size > 0 && gb->ptr[size - 1] == 0)
  ------------------
  |  Branch (63:12): [True: 0, False: 0]
  |  Branch (63:24): [True: 0, False: 0]
  ------------------
   64|      0|        size--;
   65|       |
   66|      0|    if (size)
  ------------------
  |  Branch (66:9): [True: 0, False: 0]
  ------------------
   67|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   68|       |
   69|      0|    return 0;
   70|      0|}
obu.c:parse_tile_hdr:
 1154|  53.7k|static void parse_tile_hdr(Dav1dContext *const c, GetBits *const gb) {
 1155|  53.7k|    const int n_tiles = c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows;
 1156|  53.7k|    const int have_tile_pos = n_tiles > 1 ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (1156:31): [True: 5.62k, False: 48.1k]
  ------------------
 1157|       |
 1158|  53.7k|    if (have_tile_pos) {
  ------------------
  |  Branch (1158:9): [True: 1.87k, False: 51.8k]
  ------------------
 1159|  1.87k|        const int n_bits = c->frame_hdr->tiling.log2_cols +
 1160|  1.87k|                           c->frame_hdr->tiling.log2_rows;
 1161|  1.87k|        c->tile[c->n_tile_data].start = dav1d_get_bits(gb, n_bits);
 1162|  1.87k|        c->tile[c->n_tile_data].end = dav1d_get_bits(gb, n_bits);
 1163|  51.8k|    } else {
 1164|  51.8k|        c->tile[c->n_tile_data].start = 0;
 1165|  51.8k|        c->tile[c->n_tile_data].end = n_tiles - 1;
 1166|  51.8k|    }
 1167|  53.7k|}

dav1d_pal_dsp_init:
   71|  9.51k|COLD void dav1d_pal_dsp_init(Dav1dPalDSPContext *const c) {
   72|  9.51k|    c->pal_idx_finish = pal_idx_finish_c;
   73|       |
   74|  9.51k|#if HAVE_ASM
   75|       |#if ARCH_RISCV
   76|       |    pal_dsp_init_riscv(c);
   77|       |#elif ARCH_X86
   78|       |    pal_dsp_init_x86(c);
   79|  9.51k|#endif
   80|  9.51k|#endif
   81|  9.51k|}

dav1d_default_picture_alloc:
   46|  50.5k|int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) {
   47|  50.5k|    const int hbd = p->p.bpc > 8;
   48|  50.5k|    const int aligned_w = (p->p.w + 127) & ~127;
   49|  50.5k|    const int aligned_h = (p->p.h + 127) & ~127;
   50|  50.5k|    const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
   51|  50.5k|    const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
   52|  50.5k|    const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
   53|  50.5k|    ptrdiff_t y_stride = aligned_w << hbd;
   54|  50.5k|    ptrdiff_t uv_stride = has_chroma ? y_stride >> ss_hor : 0;
  ------------------
  |  Branch (54:27): [True: 27.4k, False: 23.1k]
  ------------------
   55|       |    /* Due to how mapping of addresses to sets works in most L1 and L2 cache
   56|       |     * implementations, strides of multiples of certain power-of-two numbers
   57|       |     * may cause multiple rows of the same superblock to map to the same set,
   58|       |     * causing evictions of previous rows resulting in a reduction in cache
   59|       |     * hit rate. Avoid that by slightly padding the stride when necessary. */
   60|  50.5k|    if (!(y_stride & 1023))
  ------------------
  |  Branch (60:9): [True: 4.50k, False: 46.0k]
  ------------------
   61|  4.50k|        y_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  4.50k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   62|  50.5k|    if (!(uv_stride & 1023) && has_chroma)
  ------------------
  |  Branch (62:9): [True: 25.8k, False: 24.6k]
  |  Branch (62:32): [True: 2.78k, False: 23.1k]
  ------------------
   63|  2.78k|        uv_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  2.78k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   64|  50.5k|    p->stride[0] = y_stride;
   65|  50.5k|    p->stride[1] = uv_stride;
   66|  50.5k|    const size_t y_sz = y_stride * aligned_h;
   67|  50.5k|    const size_t uv_sz = uv_stride * (aligned_h >> ss_ver);
   68|  50.5k|    const size_t pic_size = y_sz + 2 * uv_sz;
   69|       |
   70|  50.5k|    uint8_t *const buf = dav1d_mem_pool_pop(cookie, pic_size + DAV1D_PICTURE_ALIGNMENT);
  ------------------
  |  |   44|  50.5k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   71|  50.5k|    if (!buf) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (71:9): [True: 0, False: 50.5k]
  ------------------
   72|  50.5k|    p->allocator_data = buf;
   73|  50.5k|    p->data[0] = buf;
   74|  50.5k|    p->data[1] = has_chroma ? buf + y_sz : NULL;
  ------------------
  |  Branch (74:18): [True: 27.4k, False: 23.1k]
  ------------------
   75|  50.5k|    p->data[2] = has_chroma ? buf + y_sz + uv_sz : NULL;
  ------------------
  |  Branch (75:18): [True: 27.4k, False: 23.1k]
  ------------------
   76|       |
   77|  50.5k|    return 0;
   78|  50.5k|}
dav1d_default_picture_release:
   80|  50.5k|void dav1d_default_picture_release(Dav1dPicture *const p, void *const cookie) {
   81|  50.5k|    dav1d_mem_pool_push(cookie, p->allocator_data);
   82|  50.5k|}
dav1d_picture_free_itut_t35:
   99|    622|void dav1d_picture_free_itut_t35(const uint8_t *const data, void *const user_data) {
  100|    622|    struct itut_t35_ctx_context *itut_t35_ctx = user_data;
  101|       |
  102|  1.75k|    for (size_t i = 0; i < itut_t35_ctx->n_itut_t35; i++)
  ------------------
  |  Branch (102:24): [True: 1.13k, False: 622]
  ------------------
  103|  1.13k|        dav1d_free(itut_t35_ctx->itut_t35[i].payload);
  ------------------
  |  |  135|  1.13k|#define dav1d_free(ptr) free(ptr)
  ------------------
  104|    622|    dav1d_free(itut_t35_ctx->itut_t35);
  ------------------
  |  |  135|    622|#define dav1d_free(ptr) free(ptr)
  ------------------
  105|    622|    dav1d_free(itut_t35_ctx);
  ------------------
  |  |  135|    622|#define dav1d_free(ptr) free(ptr)
  ------------------
  106|    622|}
dav1d_picture_copy_props:
  164|  52.4k|{
  165|  52.4k|    dav1d_data_props_copy(&p->m, props);
  166|       |
  167|  52.4k|    dav1d_ref_dec(&p->content_light_ref);
  168|  52.4k|    p->content_light_ref = content_light_ref;
  169|  52.4k|    p->content_light = content_light;
  170|  52.4k|    if (content_light_ref) dav1d_ref_inc(content_light_ref);
  ------------------
  |  Branch (170:9): [True: 2.28k, False: 50.1k]
  ------------------
  171|       |
  172|  52.4k|    dav1d_ref_dec(&p->mastering_display_ref);
  173|  52.4k|    p->mastering_display_ref = mastering_display_ref;
  174|  52.4k|    p->mastering_display = mastering_display;
  175|  52.4k|    if (mastering_display_ref) dav1d_ref_inc(mastering_display_ref);
  ------------------
  |  Branch (175:9): [True: 1.66k, False: 50.8k]
  ------------------
  176|       |
  177|  52.4k|    dav1d_ref_dec(&p->itut_t35_ref);
  178|  52.4k|    p->itut_t35_ref = itut_t35_ref;
  179|  52.4k|    p->itut_t35 = itut_t35;
  180|  52.4k|    p->n_itut_t35 = n_itut_t35;
  181|  52.4k|    if (itut_t35_ref) dav1d_ref_inc(itut_t35_ref);
  ------------------
  |  Branch (181:9): [True: 875, False: 51.6k]
  ------------------
  182|  52.4k|}
dav1d_thread_picture_alloc:
  186|  41.8k|{
  187|  41.8k|    Dav1dThreadPicture *const p = &f->sr_cur;
  188|       |
  189|  41.8k|    const int res = picture_alloc(c, &p->p, f->frame_hdr->width[1], f->frame_hdr->height,
  190|  41.8k|                                  f->seq_hdr, f->seq_hdr_ref,
  191|  41.8k|                                  f->frame_hdr, f->frame_hdr_ref,
  192|  41.8k|                                  bpc, &f->tile[0].data.m, &c->allocator,
  193|  41.8k|                                  (void **) &p->progress);
  194|  41.8k|    if (res) return res;
  ------------------
  |  Branch (194:9): [True: 0, False: 41.8k]
  ------------------
  195|       |
  196|       |    // Don't clear these flags from c->frame_flags if the frame is not going to be output.
  197|       |    // This way they will be added to the next visible frame too.
  198|  41.8k|    const int flags_mask = ((f->frame_hdr->show_frame || c->output_invisible_frames) &&
  ------------------
  |  Branch (198:30): [True: 38.2k, False: 3.60k]
  |  Branch (198:58): [True: 0, False: 3.60k]
  ------------------
  199|  38.2k|                            c->max_spatial_id == f->frame_hdr->spatial_id)
  ------------------
  |  Branch (199:29): [True: 33.3k, False: 4.90k]
  ------------------
  200|  41.8k|                           ? 0 : (PICTURE_FLAG_NEW_SEQUENCE | PICTURE_FLAG_NEW_OP_PARAMS_INFO);
  201|  41.8k|    p->flags = c->frame_flags;
  202|  41.8k|    c->frame_flags &= flags_mask;
  203|       |
  204|  41.8k|    p->visible = f->frame_hdr->show_frame;
  205|  41.8k|    p->showable = f->frame_hdr->showable_frame;
  206|       |
  207|  41.8k|    if (p->visible) {
  ------------------
  |  Branch (207:9): [True: 38.2k, False: 3.60k]
  ------------------
  208|       |        // Only add HDR10+ and T35 metadata when show frame flag is enabled
  209|  38.2k|        dav1d_picture_copy_props(&p->p, c->content_light, c->content_light_ref,
  210|  38.2k|                                 c->mastering_display, c->mastering_display_ref,
  211|  38.2k|                                 c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
  212|  38.2k|                                 &f->tile[0].data.m);
  213|       |
  214|       |        // Must be removed from the context after being attached to the frame
  215|  38.2k|        dav1d_ref_dec(&c->itut_t35_ref);
  216|  38.2k|        c->itut_t35 = NULL;
  217|  38.2k|        c->n_itut_t35 = 0;
  218|  38.2k|    } else {
  219|  3.60k|        dav1d_data_props_copy(&p->p.m, &f->tile[0].data.m);
  220|  3.60k|    }
  221|       |
  222|  41.8k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (222:9): [True: 0, False: 41.8k]
  ------------------
  223|      0|        atomic_init(&p->progress[0], 0);
  224|       |        atomic_init(&p->progress[1], 0);
  225|      0|    }
  226|  41.8k|    return res;
  227|  41.8k|}
dav1d_picture_alloc_copy:
  231|  8.73k|{
  232|  8.73k|    struct pic_ctx_context *const pic_ctx = (struct pic_ctx_context*)src->ref->const_data;
  233|  8.73k|    const int res = picture_alloc(c, dst, w, src->p.h,
  234|  8.73k|                                  src->seq_hdr, src->seq_hdr_ref,
  235|  8.73k|                                  src->frame_hdr, src->frame_hdr_ref,
  236|  8.73k|                                  src->p.bpc, &src->m, &pic_ctx->allocator,
  237|  8.73k|                                  NULL);
  238|  8.73k|    if (res) return res;
  ------------------
  |  Branch (238:9): [True: 0, False: 8.73k]
  ------------------
  239|       |
  240|  8.73k|    dav1d_picture_copy_props(dst, src->content_light, src->content_light_ref,
  241|  8.73k|                             src->mastering_display, src->mastering_display_ref,
  242|  8.73k|                             src->itut_t35, src->itut_t35_ref, src->n_itut_t35,
  243|  8.73k|                             &src->m);
  244|       |
  245|  8.73k|    return 0;
  246|  8.73k|}
dav1d_picture_ref:
  248|   477k|void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
  249|   477k|    assert(dst != NULL);
  ------------------
  |  Branch (249:5): [True: 477k, False: 0]
  ------------------
  250|   477k|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (250:5): [True: 477k, False: 0]
  ------------------
  251|   477k|    assert(src != NULL);
  ------------------
  |  Branch (251:5): [True: 477k, False: 0]
  ------------------
  252|       |
  253|   477k|    if (src->ref) {
  ------------------
  |  Branch (253:9): [True: 477k, False: 0]
  ------------------
  254|   477k|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (254:9): [True: 477k, False: 0]
  ------------------
  255|   477k|        dav1d_ref_inc(src->ref);
  256|   477k|    }
  257|   477k|    if (src->frame_hdr_ref) dav1d_ref_inc(src->frame_hdr_ref);
  ------------------
  |  Branch (257:9): [True: 477k, False: 0]
  ------------------
  258|   477k|    if (src->seq_hdr_ref) dav1d_ref_inc(src->seq_hdr_ref);
  ------------------
  |  Branch (258:9): [True: 477k, False: 0]
  ------------------
  259|   477k|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (259:9): [True: 0, False: 477k]
  ------------------
  260|   477k|    if (src->content_light_ref) dav1d_ref_inc(src->content_light_ref);
  ------------------
  |  Branch (260:9): [True: 10.5k, False: 467k]
  ------------------
  261|   477k|    if (src->mastering_display_ref) dav1d_ref_inc(src->mastering_display_ref);
  ------------------
  |  Branch (261:9): [True: 7.99k, False: 469k]
  ------------------
  262|   477k|    if (src->itut_t35_ref) dav1d_ref_inc(src->itut_t35_ref);
  ------------------
  |  Branch (262:9): [True: 2.46k, False: 475k]
  ------------------
  263|   477k|    *dst = *src;
  264|   477k|}
dav1d_picture_move_ref:
  266|  16.9k|void dav1d_picture_move_ref(Dav1dPicture *const dst, Dav1dPicture *const src) {
  267|  16.9k|    assert(dst != NULL);
  ------------------
  |  Branch (267:5): [True: 16.9k, False: 0]
  ------------------
  268|  16.9k|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (268:5): [True: 16.9k, False: 0]
  ------------------
  269|  16.9k|    assert(src != NULL);
  ------------------
  |  Branch (269:5): [True: 16.9k, False: 0]
  ------------------
  270|       |
  271|  16.9k|    if (src->ref)
  ------------------
  |  Branch (271:9): [True: 16.9k, False: 0]
  ------------------
  272|  16.9k|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (272:9): [True: 16.9k, False: 0]
  ------------------
  273|       |
  274|  16.9k|    *dst = *src;
  275|  16.9k|    memset(src, 0, sizeof(*src));
  276|  16.9k|}
dav1d_thread_picture_ref:
  280|   440k|{
  281|   440k|    dav1d_picture_ref(&dst->p, &src->p);
  282|   440k|    dst->visible = src->visible;
  283|   440k|    dst->showable = src->showable;
  284|   440k|    dst->progress = src->progress;
  285|   440k|    dst->flags = src->flags;
  286|   440k|}
dav1d_picture_unref_internal:
  299|   628k|void dav1d_picture_unref_internal(Dav1dPicture *const p) {
  300|   628k|    validate_input(p != NULL);
  ------------------
  |  |   59|   628k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   628k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 628k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  301|       |
  302|   628k|    if (p->ref) {
  ------------------
  |  Branch (302:9): [True: 528k, False: 99.8k]
  ------------------
  303|   528k|        validate_input(p->data[0] != NULL);
  ------------------
  |  |   59|   528k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   528k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 528k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  304|   528k|        dav1d_ref_dec(&p->ref);
  305|   528k|    }
  306|   628k|    dav1d_ref_dec(&p->seq_hdr_ref);
  307|   628k|    dav1d_ref_dec(&p->frame_hdr_ref);
  308|   628k|    dav1d_ref_dec(&p->m.user_data.ref);
  309|   628k|    dav1d_ref_dec(&p->content_light_ref);
  310|   628k|    dav1d_ref_dec(&p->mastering_display_ref);
  311|   628k|    dav1d_ref_dec(&p->itut_t35_ref);
  312|   628k|    memset(p, 0, sizeof(*p));
  313|   628k|    dav1d_data_props_set_defaults(&p->m);
  314|   628k|}
dav1d_thread_picture_unref:
  316|   537k|void dav1d_thread_picture_unref(Dav1dThreadPicture *const p) {
  317|   537k|    dav1d_picture_unref_internal(&p->p);
  318|       |
  319|       |    p->progress = NULL;
  320|   537k|}
dav1d_picture_get_event_flags:
  322|  43.7k|enum Dav1dEventFlags dav1d_picture_get_event_flags(const Dav1dThreadPicture *const p) {
  323|  43.7k|    if (!p->flags)
  ------------------
  |  Branch (323:9): [True: 24.6k, False: 19.0k]
  ------------------
  324|  24.6k|        return 0;
  325|       |
  326|  19.0k|    enum Dav1dEventFlags flags = 0;
  327|  19.0k|    if (p->flags & PICTURE_FLAG_NEW_SEQUENCE)
  ------------------
  |  Branch (327:9): [True: 16.5k, False: 2.57k]
  ------------------
  328|  16.5k|       flags |= DAV1D_EVENT_FLAG_NEW_SEQUENCE;
  329|  19.0k|    if (p->flags & PICTURE_FLAG_NEW_OP_PARAMS_INFO)
  ------------------
  |  Branch (329:9): [True: 194, False: 18.9k]
  ------------------
  330|    194|       flags |= DAV1D_EVENT_FLAG_NEW_OP_PARAMS_INFO;
  331|       |
  332|  19.0k|    return flags;
  333|  43.7k|}
picture.c:picture_alloc:
  117|  50.5k|{
  118|  50.5k|    if (p->data[0]) {
  ------------------
  |  Branch (118:9): [True: 0, False: 50.5k]
  ------------------
  119|      0|        dav1d_log(c, "Picture already allocated!\n");
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  120|      0|        return -1;
  121|      0|    }
  122|  50.5k|    assert(bpc > 0 && bpc <= 16);
  ------------------
  |  Branch (122:5): [True: 50.5k, False: 0]
  |  Branch (122:5): [True: 50.5k, False: 0]
  ------------------
  123|       |
  124|  50.5k|    size_t extra = c->n_fc > 1 ? sizeof(atomic_int) * 2 : 0;
  ------------------
  |  Branch (124:20): [True: 0, False: 50.5k]
  ------------------
  125|  50.5k|    struct pic_ctx_context *pic_ctx = dav1d_mem_pool_pop(c->pic_ctx_pool, extra +
  126|  50.5k|                                                         sizeof(struct pic_ctx_context));
  127|  50.5k|    if (!pic_ctx)
  ------------------
  |  Branch (127:9): [True: 0, False: 50.5k]
  ------------------
  128|      0|        return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  129|       |
  130|  50.5k|    p->p.w = w;
  131|  50.5k|    p->p.h = h;
  132|  50.5k|    p->seq_hdr = seq_hdr;
  133|  50.5k|    p->frame_hdr = frame_hdr;
  134|  50.5k|    p->p.layout = seq_hdr->layout;
  135|  50.5k|    p->p.bpc = bpc;
  136|  50.5k|    dav1d_data_props_set_defaults(&p->m);
  137|  50.5k|    const int res = p_allocator->alloc_picture_callback(p, p_allocator->cookie);
  138|  50.5k|    if (res < 0) {
  ------------------
  |  Branch (138:9): [True: 0, False: 50.5k]
  ------------------
  139|      0|        dav1d_mem_pool_push(c->pic_ctx_pool, pic_ctx);
  140|      0|        return res;
  141|      0|    }
  142|       |
  143|  50.5k|    pic_ctx->allocator = *p_allocator;
  144|  50.5k|    pic_ctx->pic = *p;
  145|  50.5k|    p->ref = dav1d_ref_init(&pic_ctx->ref, pic_ctx, free_buffer, c->pic_ctx_pool, 0);
  146|       |
  147|  50.5k|    p->seq_hdr_ref = seq_hdr_ref;
  148|  50.5k|    if (seq_hdr_ref) dav1d_ref_inc(seq_hdr_ref);
  ------------------
  |  Branch (148:9): [True: 50.5k, False: 0]
  ------------------
  149|       |
  150|  50.5k|    p->frame_hdr_ref = frame_hdr_ref;
  151|  50.5k|    if (frame_hdr_ref) dav1d_ref_inc(frame_hdr_ref);
  ------------------
  |  Branch (151:9): [True: 50.5k, False: 0]
  ------------------
  152|       |
  153|  50.5k|    if (extra && extra_ptr)
  ------------------
  |  Branch (153:9): [True: 0, False: 50.5k]
  |  Branch (153:18): [True: 0, False: 0]
  ------------------
  154|      0|        *extra_ptr = &pic_ctx->extra_data;
  155|       |
  156|  50.5k|    return 0;
  157|  50.5k|}
picture.c:free_buffer:
   91|  50.5k|static void free_buffer(const uint8_t *const data, void *const user_data) {
   92|  50.5k|    struct pic_ctx_context *pic_ctx = (struct pic_ctx_context*)data;
   93|       |
   94|  50.5k|    pic_ctx->allocator.release_picture_callback(&pic_ctx->pic,
   95|  50.5k|                                                pic_ctx->allocator.cookie);
   96|  50.5k|    dav1d_mem_pool_push(user_data, pic_ctx);
   97|  50.5k|}

dav1d_init_qm_tables:
 1648|      1|COLD void dav1d_init_qm_tables(void) {
 1649|       |    // This function is guaranteed to be called only once
 1650|       |
 1651|     16|    for (int i = 0; i < 15; i++)
  ------------------
  |  Branch (1651:21): [True: 15, False: 1]
  ------------------
 1652|     45|        for (int j = 0; j < 2; j++) {
  ------------------
  |  Branch (1652:25): [True: 30, False: 15]
  ------------------
 1653|       |            // note that the w/h in the assignment is inverted, this is on purpose
 1654|       |            // because we store coefficients transposed
 1655|     30|            dav1d_qm_tbl[i][j][RTX_4X8  ] = qm_tbl_8x4[i][j];
 1656|     30|            dav1d_qm_tbl[i][j][RTX_8X4  ] = qm_tbl_4x8[i][j];
 1657|     30|            dav1d_qm_tbl[i][j][RTX_4X16 ] = qm_tbl_16x4[i][j];
 1658|     30|            dav1d_qm_tbl[i][j][RTX_16X4 ] = qm_tbl_4x16[i][j];
 1659|     30|            dav1d_qm_tbl[i][j][RTX_8X16 ] = qm_tbl_16x8[i][j];
 1660|     30|            dav1d_qm_tbl[i][j][RTX_16X8 ] = qm_tbl_8x16[i][j];
 1661|     30|            dav1d_qm_tbl[i][j][RTX_8X32 ] = qm_tbl_32x8[i][j];
 1662|     30|            dav1d_qm_tbl[i][j][RTX_32X8 ] = qm_tbl_8x32[i][j];
 1663|     30|            dav1d_qm_tbl[i][j][RTX_16X32] = qm_tbl_32x16[i][j];
 1664|     30|            dav1d_qm_tbl[i][j][RTX_32X16] = qm_tbl_16x32[i][j];
 1665|       |
 1666|     30|            dav1d_qm_tbl[i][j][ TX_4X4  ] = qm_tbl_4x4[i][j];
 1667|     30|            dav1d_qm_tbl[i][j][ TX_8X8  ] = qm_tbl_8x8[i][j];
 1668|     30|            dav1d_qm_tbl[i][j][ TX_16X16] = qm_tbl_16x16[i][j];
 1669|     30|            dav1d_qm_tbl[i][j][ TX_32X32] = qm_tbl_32x32[i][j];
 1670|       |
 1671|     30|            untriangle(qm_tbl_32x32[i][j], qm_tbl_32x32_t[i][j], 32);
 1672|     30|            subsample(qm_tbl_4x4[i][j],   &qm_tbl_32x32[i][j][32*3+3], 32, 8, 8);
 1673|     30|            subsample(qm_tbl_8x4[i][j],   &qm_tbl_32x16[i][j][32*1+1], 16, 4, 4);
 1674|     30|            subsample(qm_tbl_8x8[i][j],   &qm_tbl_32x32[i][j][32*1+1], 32, 4, 4);
 1675|     30|            subsample(qm_tbl_16x4[i][j],  &qm_tbl_32x16[i][j][32*1+0], 16, 2, 4);
 1676|     30|            subsample(qm_tbl_16x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 2, 2);
 1677|     30|            subsample(qm_tbl_16x16[i][j], &qm_tbl_32x32[i][j][32*0+0], 32, 2, 2);
 1678|     30|            subsample(qm_tbl_32x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 1, 2);
 1679|     30|            transpose(qm_tbl_4x8[i][j], qm_tbl_8x4[i][j], 8, 4);
 1680|     30|            transpose(qm_tbl_4x16[i][j], qm_tbl_16x4[i][j], 16, 4);
 1681|     30|            transpose(qm_tbl_8x16[i][j], qm_tbl_16x8[i][j], 16, 8);
 1682|     30|            transpose(qm_tbl_8x32[i][j], qm_tbl_32x8[i][j], 32, 8);
 1683|     30|            transpose(qm_tbl_16x32[i][j], qm_tbl_32x16[i][j], 32, 16);
 1684|       |
 1685|     30|            dav1d_qm_tbl[i][j][ TX_64X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1686|     30|            dav1d_qm_tbl[i][j][RTX_64X32] = dav1d_qm_tbl[i][j][ TX_32X32];
 1687|     30|            dav1d_qm_tbl[i][j][RTX_64X16] = dav1d_qm_tbl[i][j][RTX_32X16];
 1688|     30|            dav1d_qm_tbl[i][j][RTX_32X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1689|     30|            dav1d_qm_tbl[i][j][RTX_16X64] = dav1d_qm_tbl[i][j][RTX_16X32];
 1690|     30|        }
 1691|       |
 1692|       |    // dav1d_qm_tbl[15][*][*] == NULL
 1693|      1|}
qm.c:untriangle:
 1635|     30|static void untriangle(uint8_t *dst, const uint8_t *src, const int sz) {
 1636|    990|    for (int y = 0; y < sz; y++) {
  ------------------
  |  Branch (1636:21): [True: 960, False: 30]
  ------------------
 1637|    960|        memcpy(dst, src, y + 1);
 1638|    960|        const uint8_t *src_ptr = &src[y];
 1639|  15.8k|        for (int x = y + 1; x < sz; x++) {
  ------------------
  |  Branch (1639:29): [True: 14.8k, False: 960]
  ------------------
 1640|  14.8k|            src_ptr += x;
 1641|  14.8k|            dst[x] = *src_ptr;
 1642|  14.8k|        }
 1643|    960|        dst += sz;
 1644|    960|        src += y + 1;
 1645|    960|    }
 1646|     30|}
qm.c:subsample:
 1621|    210|{
 1622|  1.77k|    for (int y = 0; y < h; y += vstep)
  ------------------
  |  Branch (1622:21): [True: 1.56k, False: 210]
  ------------------
 1623|  26.0k|        for (int x = 0; x < 32; x += hstep)
  ------------------
  |  Branch (1623:25): [True: 24.4k, False: 1.56k]
  ------------------
 1624|  24.4k|            *dst++ = src[y * 32 + x];
 1625|    210|}
qm.c:transpose:
 1629|    150|{
 1630|  1.35k|    for (int y = 0, y_off = 0; y < h; y++, y_off += w)
  ------------------
  |  Branch (1630:32): [True: 1.20k, False: 150]
  ------------------
 1631|  30.9k|        for (int x = 0, x_off = 0; x < w; x++, x_off += h)
  ------------------
  |  Branch (1631:36): [True: 29.7k, False: 1.20k]
  ------------------
 1632|  29.7k|            dst[x_off + y] = src[y_off + x];
 1633|    150|}

dav1d_recon_b_intra_8bpc:
 1179|   755k|{
 1180|   755k|    Dav1dTileState *const ts = t->ts;
 1181|   755k|    const Dav1dFrameContext *const f = t->f;
 1182|   755k|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|   755k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|   755k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|   755k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|   755k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|   755k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|   755k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|   755k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|   755k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|   755k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 629k, False: 125k]
  ------------------
 1192|   629k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 596k, False: 32.8k]
  |  Branch (1192:45): [True: 16.5k, False: 16.3k]
  ------------------
 1193|   613k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 586k, False: 26.7k]
  |  Branch (1193:45): [True: 13.3k, False: 13.3k]
  ------------------
 1194|   755k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|   755k|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|   755k|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   51|   755k|#define bitfn(x) x##_8bpc
  ------------------
 1199|   755k|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|   755k|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  1.54M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 792k, False: 755k]
  ------------------
 1204|   792k|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|   792k|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  1.65M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 862k, False: 792k]
  ------------------
 1207|   862k|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 20.5k, False: 841k]
  ------------------
 1208|  20.5k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  20.5k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|  20.5k|#define PXSTRIDE(x) (x)
  ------------------
 1210|  20.5k|                const uint8_t *pal_idx;
 1211|  20.5k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 0, False: 20.5k]
  ------------------
 1212|      0|                    const int p = t->frame_thread.pass & 1;
 1213|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 0, False: 0]
  ------------------
 1214|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|      0|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  20.5k|                } else {
 1217|  20.5k|                    pal_idx = t->scratch.pal_idx_y;
 1218|  20.5k|                }
 1219|  20.5k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 0, False: 20.5k]
  ------------------
 1220|      0|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|      0|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  20.5k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  20.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  20.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1223|  20.5k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  20.5k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  20.5k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  20.5k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 20.5k]
  |  |  ------------------
  |  |   35|  20.5k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  20.5k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  20.5k|            }
 1229|       |
 1230|   862k|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|   862k|                                     sm_flag(&t->l, by4) |
 1232|   862k|                                     intra_edge_filter_flag);
 1233|   862k|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 70.4k, False: 792k]
  |  Branch (1233:58): [True: 36.1k, False: 755k]
  ------------------
 1234|   792k|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|   862k|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 70.4k, False: 792k]
  |  Branch (1235:48): [True: 36.1k, False: 755k]
  ------------------
 1236|   792k|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|   862k|            int y, x;
 1238|   862k|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  1.98M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.12M, False: 862k]
  ------------------
 1240|  1.12M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.12M|            {
 1242|  1.12M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.12M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
  ------------------
  |  |   53|  1.12M|#define PXSTRIDE(x) (x)
  ------------------
 1244|  1.12M|                                    t->bx + init_x);
 1245|  3.68M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 2.56M, False: 1.12M]
  ------------------
 1246|  2.56M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  2.56M|                {
 1248|  2.56M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 31.3k, False: 2.53M]
  ------------------
 1249|       |
 1250|  2.53M|                    int angle = b->y_angle;
 1251|  2.53M|                    const enum EdgeFlags edge_flags =
 1252|  2.53M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 1.39M, False: 1.14M]
  |  Branch (1252:42): [True: 403k, False: 737k]
  |  Branch (1252:57): [True: 546k, False: 1.25M]
  ------------------
 1253|  1.99M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  2.53M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 1.43M, False: 1.09M]
  |  Branch (1254:42): [True: 684k, False: 413k]
  |  Branch (1254:56): [True: 499k, False: 185k]
  ------------------
 1255|  1.93M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  2.53M|                    const pixel *top_sb_edge = NULL;
 1257|  2.53M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 419k, False: 2.11M]
  ------------------
 1258|   419k|                        top_sb_edge = f->ipred_edge[0];
 1259|   419k|                        const int sby = t->by >> f->sb_shift;
 1260|   419k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   419k|                    }
 1262|  2.53M|                    const enum IntraPredMode m =
 1263|  2.53M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  2.53M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  2.53M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1264|  2.53M|                                                          t->bx > ts->tiling.col_start,
 1265|  2.53M|                                                          t->by,
 1266|  2.53M|                                                          t->by > ts->tiling.row_start,
 1267|  2.53M|                                                          ts->tiling.col_end,
 1268|  2.53M|                                                          ts->tiling.row_end,
 1269|  2.53M|                                                          edge_flags, dst,
 1270|  2.53M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  2.53M|                                                          b->y_mode, &angle,
 1272|  2.53M|                                                          t_dim->w, t_dim->h,
 1273|  2.53M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  2.53M|                                                          edge HIGHBD_CALL_SUFFIX);
 1275|  2.53M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  2.53M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  2.53M|                                             angle | intra_flags,
 1278|  2.53M|                                             4 * f->bw - 4 * t->bx,
 1279|  2.53M|                                             4 * f->bh - 4 * t->by
 1280|  2.53M|                                             HIGHBD_CALL_SUFFIX);
 1281|       |
 1282|  2.53M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  2.53M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.53M]
  |  |  ------------------
  |  |   35|  2.53M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.53M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  2.56M|                skip_y_pred: {}
 1293|  2.56M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 1.15M, False: 1.41M]
  ------------------
 1294|  1.15M|                        coef *cf;
 1295|  1.15M|                        int eob;
 1296|  1.15M|                        enum TxfmType txtp;
 1297|  1.15M|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 0, False: 1.15M]
  ------------------
 1298|      0|                            const int p = t->frame_thread.pass & 1;
 1299|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|      0|                            cf = ts->frame_thread[p].cf;
 1301|      0|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|      0|                            eob  = cbi >> 5;
 1303|      0|                            txtp = cbi & 0x1f;
 1304|  1.15M|                        } else {
 1305|  1.15M|                            uint8_t cf_ctx;
 1306|  1.15M|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.15M|#define bitfn(x) x##_8bpc
  ------------------
 1307|  1.15M|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  1.15M|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  1.15M|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  1.15M|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.15M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.15M]
  |  |  ------------------
  |  |   35|  1.15M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.15M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  1.15M|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  1.15M|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  1.15M|                        }
 1316|  1.15M|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 698k, False: 458k]
  ------------------
 1317|   698k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   698k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 698k]
  |  |  ------------------
  |  |   35|   698k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   698k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|   698k|                            dsp->itx.itxfm_add[b->tx]
 1321|   698k|                                              [txtp](dst,
 1322|   698k|                                                     f->cur.stride[0],
 1323|   698k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1324|   698k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   698k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 698k]
  |  |  ------------------
  |  |   35|   698k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   698k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|   698k|                        }
 1328|  1.41M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 1.41M, False: 0]
  ------------------
 1329|  1.41M|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|  1.41M|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|  1.41M|                    }
 1332|  2.56M|                    dst += 4 * t_dim->w;
 1333|  2.56M|                }
 1334|  1.12M|                t->bx -= x;
 1335|  1.12M|            }
 1336|   862k|            t->by -= y;
 1337|       |
 1338|   862k|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 181k, False: 680k]
  ------------------
 1339|       |
 1340|   680k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   680k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 115k, False: 565k]
  ------------------
 1343|   115k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 115k, False: 0]
  |  Branch (1343:17): [True: 115k, False: 0]
  ------------------
 1344|       |
 1345|   115k|                int16_t *const ac = t->scratch.ac;
 1346|   115k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|   115k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   115k|#define PXSTRIDE(x) (x)
  ------------------
 1348|   115k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|   115k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
  ------------------
  |  |   53|   115k|#define PXSTRIDE(x) (x)
  ------------------
 1350|   115k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|   115k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|   115k|                const int furthest_r =
 1354|   115k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|   115k|                const int furthest_b =
 1356|   115k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|   115k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|   115k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|   115k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|   115k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   346k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 231k, False: 115k]
  ------------------
 1362|   231k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 43.7k, False: 187k]
  ------------------
 1363|   187k|                    int angle = 0;
 1364|   187k|                    const pixel *top_sb_edge = NULL;
 1365|   187k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 51.7k, False: 135k]
  ------------------
 1366|  51.7k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  51.7k|                        const int sby = t->by >> f->sb_shift;
 1368|  51.7k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  51.7k|                    }
 1370|   187k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|   187k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|   187k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|   187k|                    const enum IntraPredMode m =
 1374|   187k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|   187k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   187k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1375|   187k|                                                          ypos, ypos > ystart,
 1376|   187k|                                                          ts->tiling.col_end >> ss_hor,
 1377|   187k|                                                          ts->tiling.row_end >> ss_ver,
 1378|   187k|                                                          0, uv_dst[pl], stride,
 1379|   187k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|   187k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|   187k|                                                          edge HIGHBD_CALL_SUFFIX);
 1382|   187k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|   187k|                                           uv_t_dim->w * 4,
 1384|   187k|                                           uv_t_dim->h * 4,
 1385|   187k|                                           ac, b->cfl_alpha[pl]
 1386|   187k|                                           HIGHBD_CALL_SUFFIX);
 1387|   187k|                }
 1388|   115k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   115k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 115k]
  |  |  ------------------
  |  |   35|   115k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   115k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   565k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 7.20k, False: 558k]
  ------------------
 1394|  7.20k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  7.20k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|  7.20k|#define PXSTRIDE(x) (x)
  ------------------
 1396|  7.20k|                const pixel (*pal)[8];
 1397|  7.20k|                const uint8_t *pal_idx;
 1398|  7.20k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 0, False: 7.20k]
  ------------------
 1399|      0|                    const int p = t->frame_thread.pass & 1;
 1400|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 0, False: 0]
  ------------------
 1401|      0|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|      0|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|      0|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  7.20k|                } else {
 1406|  7.20k|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|  7.20k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  7.20k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1407|  7.20k|                    pal_idx = t->scratch.pal_idx_uv;
 1408|  7.20k|                }
 1409|       |
 1410|  7.20k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  7.20k|                                       f->cur.stride[1], pal[1],
 1412|  7.20k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  7.20k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  7.20k|                                       f->cur.stride[1], pal[2],
 1415|  7.20k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  7.20k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  7.20k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.20k]
  |  |  ------------------
  |  |   35|  7.20k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.20k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  7.20k|            }
 1425|       |
 1426|   680k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   680k|                                 sm_uv_flag(&t->l, cby4);
 1428|   680k|            const int uv_sb_has_tr =
 1429|   680k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 53.4k, False: 627k]
  |  Branch (1429:55): [True: 27.3k, False: 600k]
  ------------------
 1430|   627k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   680k|            const int uv_sb_has_bl =
 1432|   680k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 53.4k, False: 627k]
  |  Branch (1432:30): [True: 27.3k, False: 600k]
  ------------------
 1433|   627k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   680k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.04M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.36M, False: 680k]
  ------------------
 1436|  2.99M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 1.63M, False: 1.36M]
  ------------------
 1437|  1.63M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  1.63M|                {
 1439|  1.63M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  1.63M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
  ------------------
  |  |   53|  1.63M|#define PXSTRIDE(x) (x)
  ------------------
 1441|  1.63M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  4.39M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 2.76M, False: 1.63M]
  ------------------
 1443|  2.76M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  2.76M|                    {
 1445|  2.76M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 231k, False: 2.53M]
  |  Branch (1445:56): [True: 187k, False: 43.7k]
  ------------------
 1446|  2.57M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 30.0k, False: 2.54M]
  ------------------
 1447|   217k|                        {
 1448|   217k|                            goto skip_uv_pred;
 1449|   217k|                        }
 1450|       |
 1451|  2.54M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  2.54M|                        const enum EdgeFlags edge_flags =
 1456|  2.54M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 1.15M, False: 1.38M]
  |  Branch (1456:58): [True: 481k, False: 905k]
  ------------------
 1457|  1.63M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 703k, False: 934k]
  ------------------
 1458|  1.84M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  2.54M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 1.11M, False: 1.43M]
  ------------------
 1460|  1.43M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 870k, False: 562k]
  |  Branch (1460:49): [True: 663k, False: 206k]
  ------------------
 1461|  1.77M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  2.54M|                        const pixel *top_sb_edge = NULL;
 1463|  2.54M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 492k, False: 2.05M]
  ------------------
 1464|   492k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   492k|                            const int sby = t->by >> f->sb_shift;
 1466|   492k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   492k|                        }
 1468|  2.54M|                        const enum IntraPredMode uv_mode =
 1469|  2.54M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 43.7k, False: 2.50M]
  ------------------
 1470|  2.54M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  2.54M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  2.54M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  2.54M|                        const enum IntraPredMode m =
 1474|  2.54M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  2.54M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  2.54M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1475|  2.54M|                                                              ypos, ypos > ystart,
 1476|  2.54M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  2.54M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  2.54M|                                                              edge_flags, dst, stride,
 1479|  2.54M|                                                              top_sb_edge, uv_mode,
 1480|  2.54M|                                                              &angle, uv_t_dim->w,
 1481|  2.54M|                                                              uv_t_dim->h,
 1482|  2.54M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  2.54M|                                                              edge HIGHBD_CALL_SUFFIX);
 1484|  2.54M|                        angle |= intra_edge_filter_flag;
 1485|  2.54M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  2.54M|                                                 uv_t_dim->w * 4,
 1487|  2.54M|                                                 uv_t_dim->h * 4,
 1488|  2.54M|                                                 angle | sm_uv_fl,
 1489|  2.54M|                                                 (4 * f->bw + ss_hor -
 1490|  2.54M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  2.54M|                                                 (4 * f->bh + ss_ver -
 1492|  2.54M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  2.54M|                                                 HIGHBD_CALL_SUFFIX);
 1494|  2.54M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  2.54M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.54M]
  |  |  ------------------
  |  |   35|  2.54M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.54M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  2.76M|                    skip_uv_pred: {}
 1505|  2.76M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.19M, False: 1.56M]
  ------------------
 1506|  1.19M|                            enum TxfmType txtp;
 1507|  1.19M|                            int eob;
 1508|  1.19M|                            coef *cf;
 1509|  1.19M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 0, False: 1.19M]
  ------------------
 1510|      0|                                const int p = t->frame_thread.pass & 1;
 1511|      0|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|      0|                                cf = ts->frame_thread[p].cf;
 1513|      0|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|      0|                                eob  = cbi >> 5;
 1515|      0|                                txtp = cbi & 0x1f;
 1516|  1.19M|                            } else {
 1517|  1.19M|                                uint8_t cf_ctx;
 1518|  1.19M|                                cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.19M|#define bitfn(x) x##_8bpc
  ------------------
 1519|  1.19M|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  1.19M|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  1.19M|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  1.19M|                                                   &txtp, &cf_ctx);
 1523|  1.19M|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.19M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.19M]
  |  |  ------------------
  |  |   35|  1.19M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.19M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  1.19M|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  1.19M|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  1.19M|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  1.19M|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  1.19M|                            }
 1532|  1.19M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 315k, False: 879k]
  ------------------
 1533|   315k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   315k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 315k]
  |  |  ------------------
  |  |   35|   315k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   315k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   315k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   315k|                                                  [txtp](dst, stride,
 1538|   315k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
 1539|   315k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   315k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 315k]
  |  |  ------------------
  |  |   35|   315k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   315k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   315k|                            }
 1543|  1.56M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 1.56M, False: 0]
  ------------------
 1544|  1.56M|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|  1.56M|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|  1.56M|                        }
 1547|  2.76M|                        dst += uv_t_dim->w * 4;
 1548|  2.76M|                    }
 1549|  1.63M|                    t->bx -= x << ss_hor;
 1550|  1.63M|                }
 1551|  1.36M|                t->by -= y << ss_ver;
 1552|  1.36M|            }
 1553|   680k|        }
 1554|   792k|    }
 1555|   755k|}
dav1d_recon_b_inter_8bpc:
 1559|   773k|{
 1560|   773k|    Dav1dTileState *const ts = t->ts;
 1561|   773k|    const Dav1dFrameContext *const f = t->f;
 1562|   773k|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|   773k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|   773k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|   773k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|   773k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|   773k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|   773k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|   773k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|   773k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 522k, False: 250k]
  ------------------
 1571|   522k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 473k, False: 49.2k]
  |  Branch (1571:45): [True: 24.4k, False: 24.8k]
  ------------------
 1572|   497k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 465k, False: 31.8k]
  |  Branch (1572:45): [True: 15.9k, False: 15.9k]
  ------------------
 1573|   773k|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 250k, False: 522k]
  ------------------
 1574|   773k|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|   773k|    int res;
 1576|       |
 1577|       |    // prediction
 1578|   773k|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|   773k|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|   773k|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|   773k|#define PXSTRIDE(x) (x)
  ------------------
 1581|   773k|    const ptrdiff_t uvdstoff =
 1582|   773k|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|   773k|#define PXSTRIDE(x) (x)
  ------------------
 1583|   773k|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|   773k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   773k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 163k, False: 610k]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|   163k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 163k, False: 0]
  ------------------
 1586|   163k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|   163k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|   163k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 163k]
  ------------------
 1589|   407k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 135k, False: 27.1k]
  |  Branch (1589:42): [True: 271k, False: 135k]
  ------------------
 1590|   271k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|   271k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|   271k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|   271k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|   271k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 271k]
  ------------------
 1595|   271k|        }
 1596|   610k|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 477k, False: 132k]
  ------------------
 1597|   477k|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|   477k|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|   477k|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 301k, False: 175k]
  ------------------
 1601|   301k|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 79.3k, False: 222k]
  |  Branch (1601:44): [True: 4.82k, False: 74.4k]
  ------------------
 1602|   296k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 43.4k, False: 253k]
  |  Branch (1602:44): [True: 39.4k, False: 3.98k]
  ------------------
 1603|  44.2k|        {
 1604|  44.2k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|  44.2k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 39.4k, False: 4.82k]
  ------------------
 1606|  44.2k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|  44.2k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 44.2k]
  ------------------
 1608|   433k|        } else {
 1609|   433k|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|   433k|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|   433k|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 433k]
  ------------------
 1612|   433k|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 102k, False: 330k]
  ------------------
 1613|   102k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|   102k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 102k]
  ------------------
 1615|   102k|            }
 1616|   433k|        }
 1617|   477k|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 22.8k, False: 454k]
  ------------------
 1618|  22.8k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|  22.8k|#define bitfn(x) x##_8bpc
  ------------------
 1619|  22.8k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 3.22k, False: 19.6k]
  ------------------
 1620|  19.6k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  22.8k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|  22.8k|#define bitfn(x) x##_8bpc
  ------------------
 1622|  22.8k|            int angle = 0;
 1623|  22.8k|            const pixel *top_sb_edge = NULL;
 1624|  22.8k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 3.70k, False: 19.1k]
  ------------------
 1625|  3.70k|                top_sb_edge = f->ipred_edge[0];
 1626|  3.70k|                const int sby = t->by >> f->sb_shift;
 1627|  3.70k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  3.70k|            }
 1629|  22.8k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  22.8k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  22.8k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1630|  22.8k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  22.8k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  22.8k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  22.8k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  22.8k|                                                  HIGHBD_CALL_SUFFIX);
 1635|  22.8k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  22.8k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  22.8k|                                     HIGHBD_CALL_SUFFIX);
 1638|  22.8k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  22.8k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  22.8k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  22.8k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 17.3k, False: 5.47k]
  |  |  ------------------
  |  |   85|  22.8k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  22.8k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  22.8k|        }
 1641|       |
 1642|   477k|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 214k, False: 263k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   263k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 12.8k, False: 250k]
  |  Branch (1645:42): [True: 7.86k, False: 242k]
  ------------------
 1646|   263k|        refmvs_block *const *r;
 1647|   263k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 20.7k, False: 242k]
  ------------------
 1648|  20.7k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 20.7k, False: 0]
  ------------------
 1649|  20.7k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  20.7k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 12.8k, False: 7.86k]
  ------------------
 1651|  20.7k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 12.5k, False: 8.21k]
  ------------------
 1652|  20.7k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 12.8k, False: 7.86k]
  |  Branch (1652:29): [True: 4.67k, False: 8.21k]
  ------------------
 1653|  4.67k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  20.7k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   263k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 19.1k, False: 244k]
  ------------------
 1658|  19.1k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 19.1k, False: 0]
  ------------------
 1659|  19.1k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  19.1k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 12.0k, False: 7.06k]
  |  Branch (1660:29): [True: 4.34k, False: 7.73k]
  ------------------
 1661|  13.0k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 8.69k, False: 4.34k]
  ------------------
 1662|  8.69k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  8.69k|                             NULL, f->cur.stride[1],
 1664|  8.69k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  8.69k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  8.69k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  8.69k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  8.69k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 8.69k, False: 0]
  ------------------
 1669|  8.69k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  8.69k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 8.69k]
  ------------------
 1671|  8.69k|                }
 1672|  4.34k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  4.34k|#define PXSTRIDE(x) (x)
  ------------------
 1673|  4.34k|                h_off = 2;
 1674|  4.34k|            }
 1675|  19.1k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 12.0k, False: 7.06k]
  ------------------
 1676|  12.0k|                const enum Filter2d left_filter_2d =
 1677|  12.0k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|  36.2k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 24.1k, False: 12.0k]
  ------------------
 1679|  24.1k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  24.1k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  24.1k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  24.1k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  24.1k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  24.1k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 24.1k, False: 0]
  ------------------
 1685|  24.1k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  24.1k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 24.1k]
  ------------------
 1687|  24.1k|                }
 1688|  12.0k|                h_off = 2;
 1689|  12.0k|            }
 1690|  19.1k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 11.4k, False: 7.73k]
  ------------------
 1691|  11.4k|                const enum Filter2d top_filter_2d =
 1692|  11.4k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|  34.2k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 22.8k, False: 11.4k]
  ------------------
 1694|  22.8k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  22.8k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  22.8k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  22.8k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  22.8k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  22.8k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 22.8k, False: 0]
  ------------------
 1700|  22.8k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  22.8k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 22.8k]
  ------------------
 1702|  22.8k|                }
 1703|  11.4k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  11.4k|#define PXSTRIDE(x) (x)
  ------------------
 1704|  11.4k|            }
 1705|  57.4k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 38.3k, False: 19.1k]
  ------------------
 1706|  38.3k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|  38.3k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|  38.3k|                         refp, b->ref[0], filter_2d);
 1709|  38.3k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 38.3k]
  ------------------
 1710|  38.3k|            }
 1711|   244k|        } else {
 1712|   244k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 133k, False: 110k]
  ------------------
 1713|   133k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 22.8k, False: 111k]
  |  Branch (1713:48): [True: 751, False: 22.0k]
  ------------------
 1714|   133k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 11.9k, False: 121k]
  |  Branch (1714:48): [True: 11.3k, False: 620]
  ------------------
 1715|  12.1k|            {
 1716|  36.3k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 24.2k, False: 12.1k]
  ------------------
 1717|  24.2k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|  24.2k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|  24.2k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 22.7k, False: 1.50k]
  ------------------
 1720|  24.2k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|  24.2k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 24.2k]
  ------------------
 1722|  24.2k|                }
 1723|   232k|            } else {
 1724|   697k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 464k, False: 232k]
  ------------------
 1725|   464k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|   464k|                             NULL, f->cur.stride[1],
 1727|   464k|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|   464k|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|   464k|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|   464k|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 464k]
  ------------------
 1731|   464k|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 146k, False: 317k]
  ------------------
 1732|   146k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|   146k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|   146k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 146k]
  ------------------
 1735|   146k|                    }
 1736|   464k|                }
 1737|   232k|            }
 1738|   244k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 16.6k, False: 227k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  16.6k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  16.6k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  16.6k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 12.6k, False: 3.99k]
  |  |  ------------------
  |  |   85|  16.6k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  16.6k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|  50.0k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 33.3k, False: 16.6k]
  ------------------
 1745|  33.3k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|  33.3k|#define bitfn(x) x##_8bpc
  ------------------
 1746|  33.3k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|  33.3k|#define bitfn(x) x##_8bpc
  ------------------
 1747|  33.3k|                    enum IntraPredMode m =
 1748|  33.3k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 4.10k, False: 29.2k]
  ------------------
 1749|  29.2k|                        SMOOTH_PRED : b->interintra_mode;
 1750|  33.3k|                    int angle = 0;
 1751|  33.3k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|  33.3k|                    const pixel *top_sb_edge = NULL;
 1753|  33.3k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 6.17k, False: 27.1k]
  ------------------
 1754|  6.17k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  6.17k|                        const int sby = t->by >> f->sb_shift;
 1756|  6.17k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  6.17k|                    }
 1758|  33.3k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|  33.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  33.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1759|  33.3k|                                                          (t->bx >> ss_hor) >
 1760|  33.3k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|  33.3k|                                                          t->by >> ss_ver,
 1762|  33.3k|                                                          (t->by >> ss_ver) >
 1763|  33.3k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|  33.3k|                                                          ts->tiling.col_end >> ss_hor,
 1765|  33.3k|                                                          ts->tiling.row_end >> ss_ver,
 1766|  33.3k|                                                          0, uvdst, f->cur.stride[1],
 1767|  33.3k|                                                          top_sb_edge, m,
 1768|  33.3k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|  33.3k|                                                          HIGHBD_CALL_SUFFIX);
 1770|  33.3k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|  33.3k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|  33.3k|                                             HIGHBD_CALL_SUFFIX);
 1773|  33.3k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|  33.3k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|  33.3k|                }
 1776|  16.6k|            }
 1777|   244k|        }
 1778|       |
 1779|   477k|    skip_inter_chroma_pred: {}
 1780|   477k|        t->tl_4x4_filter = filter_2d;
 1781|   477k|    } else {
 1782|   132k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|   132k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|   132k|        int jnt_weight;
 1786|   132k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|   132k|        const uint8_t *mask;
 1788|       |
 1789|   397k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 265k, False: 132k]
  ------------------
 1790|   265k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   265k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 15.5k, False: 249k]
  |  Branch (1792:55): [True: 1.38k, False: 14.2k]
  ------------------
 1793|  1.38k|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|  1.38k|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|  1.38k|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 1.38k]
  ------------------
 1796|   263k|            } else {
 1797|   263k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   263k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   263k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 263k]
  ------------------
 1800|   263k|            }
 1801|   265k|        }
 1802|   132k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 132k, False: 0]
  ------------------
 1803|  80.2k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 80.2k, False: 52.3k]
  ------------------
 1804|  80.2k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  80.2k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
 1806|  80.2k|            break;
 1807|  22.6k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 22.6k, False: 109k]
  ------------------
 1808|  22.6k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  22.6k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  22.6k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
 1811|  22.6k|            break;
 1812|  20.7k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 20.7k, False: 111k]
  ------------------
 1813|  20.7k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  20.7k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  20.7k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  20.7k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
 1817|  20.7k|            mask = seg_mask;
 1818|  20.7k|            break;
 1819|  8.96k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 8.96k, False: 123k]
  ------------------
 1820|  8.96k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  8.96k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  8.96k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  8.96k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  8.96k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  8.96k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
 1824|  8.96k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 6.68k, False: 2.27k]
  ------------------
 1825|  6.68k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  6.68k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  6.68k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  8.96k|            break;
 1827|   132k|        }
 1828|       |
 1829|       |        // chroma
 1830|   246k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 82.1k, False: 50.4k]
  |  Branch (1830:42): [True: 164k, False: 82.1k]
  ------------------
 1831|   492k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 328k, False: 164k]
  ------------------
 1832|   328k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   328k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 19.8k, False: 308k]
  ------------------
 1834|  19.8k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 17.7k, False: 2.06k]
  |  Branch (1834:45): [True: 1.17k, False: 16.5k]
  ------------------
 1835|  1.17k|                {
 1836|  1.17k|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|  1.17k|                                      b_dim, 1 + pl,
 1838|  1.17k|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|  1.17k|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 1.17k]
  ------------------
 1840|   327k|                } else {
 1841|   327k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|   327k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|   327k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 327k]
  ------------------
 1844|   327k|                }
 1845|   328k|            }
 1846|   164k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|   164k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 164k, False: 0]
  ------------------
 1848|  85.9k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 85.9k, False: 78.3k]
  ------------------
 1849|  85.9k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|  85.9k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|  85.9k|                            HIGHBD_CALL_SUFFIX);
 1852|  85.9k|                break;
 1853|  36.7k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 36.7k, False: 127k]
  ------------------
 1854|  36.7k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  36.7k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  36.7k|                              HIGHBD_CALL_SUFFIX);
 1857|  36.7k|                break;
 1858|  13.3k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 13.3k, False: 150k]
  ------------------
 1859|  41.5k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 28.2k, False: 136k]
  ------------------
 1860|  41.5k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  41.5k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  41.5k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  41.5k|                             HIGHBD_CALL_SUFFIX);
 1864|  41.5k|                break;
 1865|   164k|            }
 1866|   164k|        }
 1867|   132k|    }
 1868|       |
 1869|   773k|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   773k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 773k]
  |  |  ------------------
  |  |   35|   773k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   773k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|   773k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|   773k|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 451k, False: 322k]
  ------------------
 1882|       |        // reset coef contexts
 1883|   451k|        BlockContext *const a = t->a;
 1884|   451k|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|   451k|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|   451k|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 250k, False: 200k]
  ------------------
 1887|   250k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   250k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   250k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   250k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   250k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   250k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   250k|        }
 1894|   451k|        return 0;
 1895|   451k|    }
 1896|       |
 1897|   322k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   322k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   322k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   648k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 326k, False: 322k]
  ------------------
 1902|   660k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 333k, False: 326k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   333k|            int y_off = !!init_y, y;
 1905|   333k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
  ------------------
  |  |   53|   333k|#define PXSTRIDE(x) (x)
  ------------------
 1906|   694k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 360k, False: 333k]
  ------------------
 1907|   360k|                 y += ytx->h, y_off++)
 1908|   360k|            {
 1909|   360k|                int x, x_off = !!init_x;
 1910|   867k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 506k, False: 360k]
  ------------------
 1911|   506k|                     x += ytx->w, x_off++)
 1912|   506k|                {
 1913|   506k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   506k|                                   x_off, y_off, &dst[x * 4]);
 1915|   506k|                    t->bx += ytx->w;
 1916|   506k|                }
 1917|   360k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
  ------------------
  |  |   53|   360k|#define PXSTRIDE(x) (x)
  ------------------
 1918|   360k|                t->bx -= x;
 1919|   360k|                t->by += ytx->h;
 1920|   360k|            }
 1921|   333k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
  ------------------
  |  |   53|   333k|#define PXSTRIDE(x) (x)
  ------------------
 1922|   333k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|   709k|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 236k, False: 97.2k]
  |  Branch (1925:46): [True: 473k, False: 236k]
  ------------------
 1926|   473k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   473k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
  ------------------
  |  |   53|   473k|#define PXSTRIDE(x) (x)
  ------------------
 1928|   473k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|   977k|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 504k, False: 473k]
  ------------------
 1930|   504k|                {
 1931|   504k|                    int x;
 1932|   504k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|  1.10M|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 599k, False: 504k]
  ------------------
 1934|   599k|                    {
 1935|   599k|                        coef *cf;
 1936|   599k|                        int eob;
 1937|   599k|                        enum TxfmType txtp;
 1938|   599k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 0, False: 599k]
  ------------------
 1939|      0|                            const int p = t->frame_thread.pass & 1;
 1940|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|      0|                            cf = ts->frame_thread[p].cf;
 1942|      0|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|      0|                            eob  = cbi >> 5;
 1944|      0|                            txtp = cbi & 0x1f;
 1945|   599k|                        } else {
 1946|   599k|                            uint8_t cf_ctx;
 1947|   599k|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|   599k|#define bitfn(x) x##_8bpc
  ------------------
 1948|   599k|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|   599k|                                                        bx4 + (x << ss_hor)];
 1950|   599k|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|   599k|                                               &t->l.ccoef[pl][cby4 + y],
 1952|   599k|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|   599k|                                               cf, &txtp, &cf_ctx);
 1954|   599k|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   599k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 599k]
  |  |  ------------------
  |  |   35|   599k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   599k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|   599k|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|   599k|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|   599k|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|   599k|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|   599k|                        }
 1963|   599k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 183k, False: 416k]
  ------------------
 1964|   183k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   183k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 183k]
  |  |  ------------------
  |  |   35|   183k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   183k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   183k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   183k|                                              [txtp](&uvdst[4 * x],
 1968|   183k|                                                     f->cur.stride[1],
 1969|   183k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1970|   183k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   183k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 183k]
  |  |  ------------------
  |  |   35|   183k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   183k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   183k|                        }
 1974|   599k|                        t->bx += uvtx->w << ss_hor;
 1975|   599k|                    }
 1976|   504k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
  ------------------
  |  |   53|   504k|#define PXSTRIDE(x) (x)
  ------------------
 1977|   504k|                    t->bx -= x << ss_hor;
 1978|   504k|                    t->by += uvtx->h << ss_ver;
 1979|   504k|                }
 1980|   473k|                t->by -= y << ss_ver;
 1981|   473k|            }
 1982|   333k|        }
 1983|   326k|    }
 1984|   322k|    return 0;
 1985|   773k|}
dav1d_filter_sbrow_deblock_cols_8bpc:
 1987|  76.3k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|  76.3k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 0, False: 76.3k]
  ------------------
 1989|  76.3k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 51.7k, False: 24.5k]
  |  Branch (1989:50): [True: 45.8k, False: 5.97k]
  ------------------
 1990|  45.8k|    {
 1991|  45.8k|        return;
 1992|  45.8k|    }
 1993|  30.5k|    const int y = sby * f->sb_step * 4;
 1994|  30.5k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|  30.5k|    pixel *const p[3] = {
 1996|  30.5k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  30.5k|#define PXSTRIDE(x) (x)
  ------------------
 1997|  30.5k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  30.5k|#define PXSTRIDE(x) (x)
  ------------------
 1998|  30.5k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  30.5k|#define PXSTRIDE(x) (x)
  ------------------
 1999|  30.5k|    };
 2000|  30.5k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|  30.5k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|  30.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  30.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2002|  30.5k|                                        f->lf.start_of_tile_row[sby]);
 2003|  30.5k|}
dav1d_filter_sbrow_deblock_rows_8bpc:
 2005|  76.3k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|  76.3k|    const int y = sby * f->sb_step * 4;
 2007|  76.3k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|  76.3k|    pixel *const p[3] = {
 2009|  76.3k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  76.3k|#define PXSTRIDE(x) (x)
  ------------------
 2010|  76.3k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  76.3k|#define PXSTRIDE(x) (x)
  ------------------
 2011|  76.3k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  76.3k|#define PXSTRIDE(x) (x)
  ------------------
 2012|  76.3k|    };
 2013|  76.3k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|  76.3k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 76.3k, False: 0]
  ------------------
 2015|  76.3k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 24.5k, False: 51.7k]
  |  Branch (2015:49): [True: 5.97k, False: 45.8k]
  ------------------
 2016|  30.5k|    {
 2017|  30.5k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|  30.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  30.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2018|  30.5k|    }
 2019|  76.3k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 35.9k, False: 40.4k]
  |  Branch (2019:29): [True: 10.1k, False: 30.3k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|  46.0k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|  46.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  46.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2022|  46.0k|    }
 2023|  76.3k|}
dav1d_filter_sbrow_cdef_8bpc:
 2025|  35.9k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|  35.9k|    const Dav1dFrameContext *const f = tc->f;
 2027|  35.9k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 35.9k]
  ------------------
 2028|  35.9k|    const int sbsz = f->sb_step;
 2029|  35.9k|    const int y = sby * sbsz * 4;
 2030|  35.9k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|  35.9k|    pixel *const p[3] = {
 2032|  35.9k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  35.9k|#define PXSTRIDE(x) (x)
  ------------------
 2033|  35.9k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  35.9k|#define PXSTRIDE(x) (x)
  ------------------
 2034|  35.9k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  35.9k|#define PXSTRIDE(x) (x)
  ------------------
 2035|  35.9k|    };
 2036|  35.9k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|  35.9k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|  35.9k|    const int start = sby * sbsz;
 2039|  35.9k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 33.0k, False: 2.86k]
  ------------------
 2040|  33.0k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|  33.0k|        pixel *p_up[3] = {
 2042|  33.0k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  33.0k|#define PXSTRIDE(x) (x)
  ------------------
 2043|  33.0k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  33.0k|#define PXSTRIDE(x) (x)
  ------------------
 2044|  33.0k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  33.0k|#define PXSTRIDE(x) (x)
  ------------------
 2045|  33.0k|        };
 2046|  33.0k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|  33.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  33.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2047|  33.0k|    }
 2048|  35.9k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|  35.9k|    const int end = imin(start + n_blks, f->bh);
 2050|  35.9k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|  35.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  35.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2051|  35.9k|}
dav1d_filter_sbrow_resize_8bpc:
 2053|  2.92k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  2.92k|    const int sbsz = f->sb_step;
 2055|  2.92k|    const int y = sby * sbsz * 4;
 2056|  2.92k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  2.92k|    const pixel *const p[3] = {
 2058|  2.92k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2059|  2.92k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2060|  2.92k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2061|  2.92k|    };
 2062|  2.92k|    pixel *const sr_p[3] = {
 2063|  2.92k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2064|  2.92k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2065|  2.92k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  2.92k|#define PXSTRIDE(x) (x)
  ------------------
 2066|  2.92k|    };
 2067|  2.92k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  10.4k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 7.54k, False: 2.92k]
  ------------------
 2069|  7.54k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 4.61k, False: 2.92k]
  |  Branch (2069:34): [True: 1.59k, False: 3.01k]
  ------------------
 2070|  7.54k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  7.54k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  7.54k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2073|  7.54k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  7.54k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2075|  7.54k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  7.54k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 4.61k, False: 2.92k]
  |  Branch (2076:34): [True: 2.59k, False: 2.02k]
  ------------------
 2077|  7.54k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  7.54k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  7.54k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  7.54k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  7.54k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  7.54k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  7.54k|                          HIGHBD_CALL_SUFFIX);
 2085|  7.54k|    }
 2086|  2.92k|}
dav1d_filter_sbrow_lr_8bpc:
 2088|  19.1k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|  19.1k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 19.1k]
  ------------------
 2090|  19.1k|    const int y = sby * f->sb_step * 4;
 2091|  19.1k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|  19.1k|    pixel *const sr_p[3] = {
 2093|  19.1k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|  19.1k|#define PXSTRIDE(x) (x)
  ------------------
 2094|  19.1k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  19.1k|#define PXSTRIDE(x) (x)
  ------------------
 2095|  19.1k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  19.1k|#define PXSTRIDE(x) (x)
  ------------------
 2096|  19.1k|    };
 2097|  19.1k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|  19.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  19.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2098|  19.1k|}
dav1d_filter_sbrow_8bpc:
 2100|  76.3k|void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
 2101|  76.3k|    bytefn(dav1d_filter_sbrow_deblock_cols)(f, sby);
  ------------------
  |  |   87|  76.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  76.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2102|  76.3k|    bytefn(dav1d_filter_sbrow_deblock_rows)(f, sby);
  ------------------
  |  |   87|  76.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  76.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2103|  76.3k|    if (f->seq_hdr->cdef)
  ------------------
  |  Branch (2103:9): [True: 35.9k, False: 40.4k]
  ------------------
 2104|  35.9k|        bytefn(dav1d_filter_sbrow_cdef)(f->c->tc, sby);
  ------------------
  |  |   87|  35.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  35.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2105|  76.3k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
  ------------------
  |  Branch (2105:9): [True: 2.92k, False: 73.4k]
  ------------------
 2106|  2.92k|        bytefn(dav1d_filter_sbrow_resize)(f, sby);
  ------------------
  |  |   87|  2.92k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  2.92k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2107|  76.3k|    if (f->lf.restore_planes)
  ------------------
  |  Branch (2107:9): [True: 19.1k, False: 57.1k]
  ------------------
 2108|  19.1k|        bytefn(dav1d_filter_sbrow_lr)(f, sby);
  ------------------
  |  |   87|  19.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  19.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2109|  76.3k|}
dav1d_backup_ipred_edge_8bpc:
 2111|  82.3k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|  82.3k|    const Dav1dFrameContext *const f = t->f;
 2113|  82.3k|    Dav1dTileState *const ts = t->ts;
 2114|  82.3k|    const int sby = t->by >> f->sb_shift;
 2115|  82.3k|    const int sby_off = f->sb128w * 128 * sby;
 2116|  82.3k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|  82.3k|    const pixel *const y =
 2119|  82.3k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|  82.3k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  82.3k|#define PXSTRIDE(x) (x)
  ------------------
 2121|  82.3k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   47|  82.3k|#define pixel_copy memcpy
  ------------------
 2122|  82.3k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|  82.3k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 38.8k, False: 43.5k]
  ------------------
 2125|  38.8k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|  38.8k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|  38.8k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|  38.8k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  38.8k|#define PXSTRIDE(x) (x)
  ------------------
 2130|   116k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 77.6k, False: 38.8k]
  ------------------
 2131|  77.6k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   47|  77.6k|#define pixel_copy memcpy
  ------------------
 2132|  77.6k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|  77.6k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|  38.8k|    }
 2135|  82.3k|}
dav1d_copy_pal_block_y_8bpc:
 2141|  20.5k|{
 2142|  20.5k|    const Dav1dFrameContext *const f = t->f;
 2143|  20.5k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 0, False: 20.5k]
  ------------------
 2144|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|      0|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  20.5k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  20.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  20.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2147|  95.4k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 74.9k, False: 20.5k]
  ------------------
 2148|  74.9k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|  74.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  74.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2149|  84.1k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 63.5k, False: 20.5k]
  ------------------
 2150|  63.5k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|  63.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  63.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2151|  20.5k|}
dav1d_copy_pal_block_uv_8bpc:
 2157|  7.20k|{
 2158|  7.20k|    const Dav1dFrameContext *const f = t->f;
 2159|  7.20k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 0, False: 7.20k]
  ------------------
 2160|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|      0|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  7.20k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|  7.20k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  7.20k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  21.6k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 14.4k, False: 7.20k]
  ------------------
 2165|  67.0k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 52.6k, False: 14.4k]
  ------------------
 2166|  52.6k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  52.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  52.6k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2167|  65.6k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 51.2k, False: 14.4k]
  ------------------
 2168|  51.2k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  51.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  51.2k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2169|  14.4k|    }
 2170|  7.20k|}
dav1d_read_pal_plane_8bpc:
 2175|  27.7k|{
 2176|  27.7k|    Dav1dTileState *const ts = t->ts;
 2177|  27.7k|    const Dav1dFrameContext *const f = t->f;
 2178|  27.7k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  27.7k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  27.7k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  27.7k|    pixel cache[16], used_cache[8];
 2181|  27.7k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 7.20k, False: 20.5k]
  ------------------
 2182|  27.7k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  27.7k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 23.7k, False: 3.96k]
  |  Branch (2184:30): [True: 5.98k, False: 17.7k]
  ------------------
 2185|  27.7k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  27.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  27.7k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2186|  27.7k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  27.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  27.7k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|  63.3k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 45.2k, False: 18.0k]
  |  Branch (2189:23): [True: 35.5k, False: 9.70k]
  ------------------
 2190|  35.5k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 12.8k, False: 22.6k]
  ------------------
 2191|  12.8k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 2.05k, False: 10.8k]
  |  Branch (2191:29): [True: 10.5k, False: 323]
  ------------------
 2192|  12.5k|                cache[n_cache++] = *l;
 2193|  12.8k|            l++;
 2194|  12.8k|            l_cache--;
 2195|  22.6k|        } else {
 2196|  22.6k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 9.51k, False: 13.1k]
  ------------------
 2197|  9.51k|                l++;
 2198|  9.51k|                l_cache--;
 2199|  9.51k|            }
 2200|  22.6k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 3.91k, False: 18.7k]
  |  Branch (2200:29): [True: 17.6k, False: 1.15k]
  ------------------
 2201|  21.5k|                cache[n_cache++] = *a;
 2202|  22.6k|            a++;
 2203|  22.6k|            a_cache--;
 2204|  22.6k|        }
 2205|  35.5k|    }
 2206|  27.7k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 9.70k, False: 18.0k]
  ------------------
 2207|  38.1k|        do {
 2208|  38.1k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 7.05k, False: 31.1k]
  |  Branch (2208:29): [True: 24.8k, False: 6.21k]
  ------------------
 2209|  31.9k|                cache[n_cache++] = *l;
 2210|  38.1k|            l++;
 2211|  38.1k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 28.4k, False: 9.70k]
  ------------------
 2212|  18.0k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 7.04k, False: 10.9k]
  ------------------
 2213|  29.6k|        do {
 2214|  29.6k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 4.72k, False: 24.9k]
  |  Branch (2214:29): [True: 18.6k, False: 6.32k]
  ------------------
 2215|  23.3k|                cache[n_cache++] = *a;
 2216|  29.6k|            a++;
 2217|  29.6k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 22.6k, False: 7.04k]
  ------------------
 2218|  7.04k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  27.7k|    int i = 0;
 2222|   106k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 82.6k, False: 24.1k]
  |  Branch (2222:36): [True: 79.0k, False: 3.56k]
  ------------------
 2223|  79.0k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|  79.0k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 39.9k, False: 39.1k]
  ------------------
 2224|  39.9k|            used_cache[i++] = cache[n];
 2225|  27.7k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  27.7k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 0, False: 27.7k]
  ------------------
 2229|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|      0|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  27.7k|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|  27.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  27.7k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2232|  27.7k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 22.6k, False: 5.03k]
  ------------------
 2233|  22.6k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [True: 22.6k, Folded]
  ------------------
 2234|  22.6k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  22.6k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 19.5k, False: 3.10k]
  ------------------
 2237|  19.5k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  19.5k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|  43.8k|            do {
 2241|  43.8k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|  43.8k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|  43.8k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 9.17k, False: 34.6k]
  ------------------
 2244|  25.4k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 16.3k, False: 9.17k]
  ------------------
 2245|  16.3k|                        pal[i] = max;
 2246|  9.17k|                    break;
 2247|  9.17k|                }
 2248|  34.6k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|  34.6k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 24.2k, False: 10.4k]
  ------------------
 2250|  19.5k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  22.6k|        int n = 0, m = n_used_cache;
 2254|   130k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 107k, False: 22.6k]
  ------------------
 2255|   107k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 40.3k, False: 67.5k]
  |  Branch (2255:38): [True: 8.66k, False: 31.6k]
  |  Branch (2255:53): [True: 16.4k, False: 15.2k]
  ------------------
 2256|  25.0k|                pal[i] = used_cache[n++];
 2257|  82.8k|            } else {
 2258|  82.8k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 82.8k, False: 0]
  ------------------
 2259|  82.8k|                pal[i] = pal[m++];
 2260|  82.8k|            }
 2261|   107k|        }
 2262|  22.6k|    } else {
 2263|  5.03k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  5.03k|    }
 2265|       |
 2266|  27.7k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  27.7k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 27.7k]
  |  |  ------------------
  |  |   35|  27.7k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  27.7k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  27.7k|}
dav1d_read_pal_uv_8bpc:
 2280|  7.20k|{
 2281|  7.20k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  7.20k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  7.20k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  7.20k|    Dav1dTileState *const ts = t->ts;
 2285|  7.20k|    const Dav1dFrameContext *const f = t->f;
 2286|  7.20k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 0, False: 7.20k]
  ------------------
 2287|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|      0|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  7.20k|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  7.20k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  7.20k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2290|  7.20k|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [True: 7.20k, Folded]
  ------------------
 2291|  7.20k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  7.20k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 4.15k, False: 3.05k]
  ------------------
 2292|  4.15k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  4.15k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  4.15k|        const int max = (1 << bpc) - 1;
 2295|  16.3k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 12.1k, False: 4.15k]
  ------------------
 2296|  12.1k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  12.1k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  11.6k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 11.6k, False: 524]
  |  Branch (2297:26): [True: 4.85k, False: 6.79k]
  ------------------
 2298|  12.1k|            prev = pal[i] = (prev + delta) & max;
 2299|  12.1k|        }
 2300|  4.15k|    } else {
 2301|  14.5k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 11.5k, False: 3.05k]
  ------------------
 2302|  11.5k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  3.05k|    }
 2304|  7.20k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  7.20k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.20k]
  |  |  ------------------
  |  |   35|  7.20k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.20k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  7.20k|}
recon_tmpl.c:read_coef_tree:
  736|  1.10M|{
  737|  1.10M|    const Dav1dFrameContext *const f = t->f;
  738|  1.10M|    Dav1dTileState *const ts = t->ts;
  739|  1.10M|    const Dav1dDSPContext *const dsp = f->dsp;
  740|  1.10M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[ytx];
  741|  1.10M|    const int txw = t_dim->w, txh = t_dim->h;
  742|       |
  743|       |    /* y_off can be larger than 3 since lossless blocks use TX_4X4 but can't
  744|       |     * be splitted. Aviods an undefined left shift. */
  745|  1.10M|    if (depth < 2 && tx_split[depth] &&
  ------------------
  |  Branch (745:9): [True: 1.02M, False: 81.2k]
  |  Branch (745:22): [True: 95.2k, False: 925k]
  ------------------
  746|  95.2k|        tx_split[depth] & (1 << (y_off * 4 + x_off)))
  ------------------
  |  Branch (746:9): [True: 72.6k, False: 22.6k]
  ------------------
  747|  72.6k|    {
  748|  72.6k|        const enum RectTxfmSize sub = t_dim->sub;
  749|  72.6k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  750|  72.6k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  751|       |
  752|  72.6k|        read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  753|  72.6k|                       x_off * 2 + 0, y_off * 2 + 0, dst);
  754|  72.6k|        t->bx += txsw;
  755|  72.6k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (755:13): [True: 57.7k, False: 14.8k]
  |  Branch (755:27): [True: 57.2k, False: 559]
  ------------------
  756|  57.2k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  757|  57.2k|                           y_off * 2 + 0, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (757:43): [True: 57.2k, False: 0]
  ------------------
  758|  72.6k|        t->bx -= txsw;
  759|  72.6k|        t->by += txsh;
  760|  72.6k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (760:13): [True: 51.7k, False: 20.9k]
  |  Branch (760:27): [True: 50.6k, False: 1.08k]
  ------------------
  761|  50.6k|            if (dst)
  ------------------
  |  Branch (761:17): [True: 50.6k, False: 0]
  ------------------
  762|  50.6k|                dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  50.6k|#define PXSTRIDE(x) (x)
  ------------------
  763|  50.6k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  764|  50.6k|                           x_off * 2 + 0, y_off * 2 + 1, dst);
  765|  50.6k|            t->bx += txsw;
  766|  50.6k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (766:17): [True: 35.8k, False: 14.7k]
  |  Branch (766:31): [True: 35.2k, False: 541]
  ------------------
  767|  35.2k|                read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  768|  35.2k|                               y_off * 2 + 1, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (768:47): [True: 35.2k, False: 0]
  ------------------
  769|  50.6k|            t->bx -= txsw;
  770|  50.6k|        }
  771|  72.6k|        t->by -= txsh;
  772|  1.02M|    } else {
  773|  1.02M|        const int bx4 = t->bx & 31, by4 = t->by & 31;
  774|  1.02M|        enum TxfmType txtp;
  775|  1.02M|        uint8_t cf_ctx;
  776|  1.02M|        int eob;
  777|  1.02M|        coef *cf;
  778|       |
  779|  1.02M|        if (t->frame_thread.pass) {
  ------------------
  |  Branch (779:13): [True: 0, False: 1.02M]
  ------------------
  780|      0|            const int p = t->frame_thread.pass & 1;
  781|      0|            assert(ts->frame_thread[p].cf);
  ------------------
  |  Branch (781:13): [True: 0, False: 0]
  ------------------
  782|      0|            cf = ts->frame_thread[p].cf;
  783|      0|            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
  784|  1.02M|        } else {
  785|  1.02M|            cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.02M|#define bitfn(x) x##_8bpc
  ------------------
  786|  1.02M|        }
  787|  1.02M|        if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (787:13): [True: 1.02M, False: 0]
  ------------------
  788|  1.02M|            eob = decode_coefs(t, &t->a->lcoef[bx4], &t->l.lcoef[by4],
  789|  1.02M|                               ytx, bs, b, 0, 0, cf, &txtp, &cf_ctx);
  790|  1.02M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.02M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.02M]
  |  |  ------------------
  |  |   35|  1.02M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.02M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  791|      0|                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
  792|      0|                       ytx, txtp, eob, ts->msac.rng);
  793|  1.02M|            dav1d_memset_likely_pow2(&t->a->lcoef[bx4], cf_ctx, imin(txw, f->bw - t->bx));
  794|  1.02M|            dav1d_memset_likely_pow2(&t->l.lcoef[by4], cf_ctx, imin(txh, f->bh - t->by));
  795|  1.02M|#define set_ctx(rep_macro) \
  796|  1.02M|            for (int y = 0; y < txh; y++) { \
  797|  1.02M|                rep_macro(txtp_map, 0, txtp); \
  798|  1.02M|                txtp_map += 32; \
  799|  1.02M|            }
  800|  1.02M|            uint8_t *txtp_map = &t->scratch.txtp_map[by4 * 32 + bx4];
  801|  1.02M|            case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  1.02M|    switch (var) { \
  |  |   81|   487k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  796|  1.05M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 562k, False: 487k]
  |  |  |  |  ------------------
  |  |  |  |  797|   562k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   562k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   562k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   562k|                txtp_map += 32; \
  |  |  |  |  799|   562k|            }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 487k, False: 542k]
  |  |  ------------------
  |  |   82|   257k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  796|   866k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 608k, False: 257k]
  |  |  |  |  ------------------
  |  |  |  |  797|   608k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   608k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   608k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   608k|                txtp_map += 32; \
  |  |  |  |  799|   608k|            }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 257k, False: 771k]
  |  |  ------------------
  |  |   83|   182k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  796|   760k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 578k, False: 182k]
  |  |  |  |  ------------------
  |  |  |  |  797|   578k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   578k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   578k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   578k|                txtp_map += 32; \
  |  |  |  |  799|   578k|            }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 182k, False: 847k]
  |  |  ------------------
  |  |   84|  59.2k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  796|   390k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 331k, False: 59.2k]
  |  |  |  |  ------------------
  |  |  |  |  797|   331k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   331k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   331k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   331k|                txtp_map += 32; \
  |  |  |  |  799|   331k|            }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 59.2k, False: 970k]
  |  |  ------------------
  |  |   85|  42.8k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  796|   640k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 597k, False: 42.8k]
  |  |  |  |  ------------------
  |  |  |  |  797|   597k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   597k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   597k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   597k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   597k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 597k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   597k|                txtp_map += 32; \
  |  |  |  |  799|   597k|            }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 42.8k, False: 986k]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 1.02M]
  |  |  ------------------
  |  |   87|  1.02M|    }
  ------------------
  |  Branch (801:13): [Folded, False: 0]
  ------------------
  802|  1.02M|#undef set_ctx
  803|  1.02M|            if (t->frame_thread.pass == 1)
  ------------------
  |  Branch (803:17): [True: 0, False: 1.02M]
  ------------------
  804|      0|                *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  805|  1.02M|        } else {
  806|      0|            const int cbi = *ts->frame_thread[0].cbi++;
  807|      0|            eob  = cbi >> 5;
  808|      0|            txtp = cbi & 0x1f;
  809|      0|        }
  810|  1.02M|        if (!(t->frame_thread.pass & 1)) {
  ------------------
  |  Branch (810:13): [True: 1.02M, False: 0]
  ------------------
  811|  1.02M|            assert(dst);
  ------------------
  |  Branch (811:13): [True: 1.02M, False: 0]
  ------------------
  812|  1.02M|            if (eob >= 0) {
  ------------------
  |  Branch (812:17): [True: 739k, False: 289k]
  ------------------
  813|   739k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   739k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 739k]
  |  |  ------------------
  |  |   35|   739k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   739k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  814|      0|                    coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
  815|   739k|                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob
  816|   739k|                                              HIGHBD_CALL_SUFFIX);
  817|   739k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   739k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 739k]
  |  |  ------------------
  |  |   35|   739k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   739k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  818|      0|                    hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
  819|   739k|            }
  820|  1.02M|        }
  821|  1.02M|    }
  822|  1.10M|}
recon_tmpl.c:decode_coefs:
  327|  7.65M|{
  328|  7.65M|    Dav1dTileState *const ts = t->ts;
  329|  7.65M|    const int chroma = !!plane;
  330|  7.65M|    const Dav1dFrameContext *const f = t->f;
  331|  7.65M|    const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id];
  332|  7.65M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  333|  7.65M|    const int dbg = DEBUG_BLOCK_INFO && plane && 0;
  ------------------
  |  |   34|  7.65M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.65M]
  |  |  ------------------
  |  |   35|  7.65M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.65M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (333:41): [True: 0, False: 0]
  |  Branch (333:50): [Folded, False: 0]
  ------------------
  334|       |
  335|  7.65M|    if (dbg)
  ------------------
  |  Branch (335:9): [Folded, False: 7.65M]
  ------------------
  336|      0|        printf("Start: r=%d\n", ts->msac.rng);
  337|       |
  338|       |    // does this block have any non-zero coefficients
  339|  7.65M|    const int sctx = get_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
  340|  7.65M|    const int all_skip = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  7.65M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  341|  7.65M|                             ts->cdf.coef.skip[t_dim->ctx][sctx]);
  342|  7.65M|    if (dbg)
  ------------------
  |  Branch (342:9): [Folded, False: 7.65M]
  ------------------
  343|      0|        printf("Post-non-zero[%d][%d][%d]: r=%d\n",
  344|      0|               t_dim->ctx, sctx, all_skip, ts->msac.rng);
  345|  7.65M|    if (all_skip) {
  ------------------
  |  Branch (345:9): [True: 3.95M, False: 3.69M]
  ------------------
  346|  3.95M|        *res_ctx = 0x40;
  347|  3.95M|        *txtp = lossless * WHT_WHT; /* lossless ? WHT_WHT : DCT_DCT */
  348|  3.95M|        return -1;
  349|  3.95M|    }
  350|       |
  351|       |    // transform type (chroma: derived, luma: explicitly coded)
  352|  3.69M|    if (lossless) {
  ------------------
  |  Branch (352:9): [True: 600k, False: 3.09M]
  ------------------
  353|   600k|        assert(t_dim->max == TX_4X4);
  ------------------
  |  Branch (353:9): [True: 600k, False: 0]
  ------------------
  354|   600k|        *txtp = WHT_WHT;
  355|  3.09M|    } else if (t_dim->max + intra >= TX_64X64) {
  ------------------
  |  Branch (355:16): [True: 684k, False: 2.41M]
  ------------------
  356|   684k|        *txtp = DCT_DCT;
  357|  2.41M|    } else if (chroma) {
  ------------------
  |  Branch (357:16): [True: 602k, False: 1.81M]
  ------------------
  358|       |        // inferred from either the luma txtp (inter) or a LUT (intra)
  359|   602k|        *txtp = intra ? dav1d_txtp_from_uvmode[b->uv_mode] :
  ------------------
  |  Branch (359:17): [True: 375k, False: 226k]
  ------------------
  360|   602k|                        get_uv_inter_txtp(t_dim, *txtp);
  361|  1.81M|    } else if (!f->frame_hdr->segmentation.qidx[b->seg_id]) {
  ------------------
  |  Branch (361:16): [True: 9.63k, False: 1.80M]
  ------------------
  362|       |        // In libaom, lossless is checked by a literal qidx == 0, but not all
  363|       |        // such blocks are actually lossless. The remainder gets an implicit
  364|       |        // transform type (for luma)
  365|  9.63k|        *txtp = DCT_DCT;
  366|  1.80M|    } else {
  367|  1.80M|        unsigned idx;
  368|  1.80M|        if (intra) {
  ------------------
  |  Branch (368:13): [True: 1.26M, False: 537k]
  ------------------
  369|  1.26M|            const enum IntraPredMode y_mode_nofilt = b->y_mode == FILTER_PRED ?
  ------------------
  |  Branch (369:54): [True: 254k, False: 1.00M]
  ------------------
  370|  1.00M|                dav1d_filter_mode_to_y_mode[b->y_angle] : b->y_mode;
  371|  1.26M|            if (f->frame_hdr->reduced_txtp_set || t_dim->min == TX_16X16) {
  ------------------
  |  Branch (371:17): [True: 205k, False: 1.05M]
  |  Branch (371:51): [True: 166k, False: 891k]
  ------------------
  372|   372k|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   372k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  373|   372k|                          ts->cdf.m.txtp_intra2[t_dim->min][y_mode_nofilt], 4);
  374|   372k|                *txtp = dav1d_tx_types_per_set[idx + 0];
  375|   891k|            } else {
  376|   891k|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   891k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  377|   891k|                          ts->cdf.m.txtp_intra1[t_dim->min][y_mode_nofilt], 6);
  378|   891k|                *txtp = dav1d_tx_types_per_set[idx + 5];
  379|   891k|            }
  380|  1.26M|            if (dbg)
  ------------------
  |  Branch (380:17): [Folded, False: 1.26M]
  ------------------
  381|      0|                printf("Post-txtp-intra[%d->%d][%d][%d->%d]: r=%d\n",
  382|      0|                       tx, t_dim->min, y_mode_nofilt, idx, *txtp, ts->msac.rng);
  383|  1.26M|        } else {
  384|   537k|            if (f->frame_hdr->reduced_txtp_set || t_dim->max == TX_32X32) {
  ------------------
  |  Branch (384:17): [True: 78.2k, False: 459k]
  |  Branch (384:51): [True: 55.7k, False: 403k]
  ------------------
  385|   134k|                idx = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   134k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  386|   134k|                          ts->cdf.m.txtp_inter3[t_dim->min]);
  387|   134k|                *txtp = (idx - 1) & IDTX; /* idx ? DCT_DCT : IDTX */
  388|   403k|            } else if (t_dim->min == TX_16X16) {
  ------------------
  |  Branch (388:24): [True: 57.3k, False: 346k]
  ------------------
  389|  57.3k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  57.3k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  390|  57.3k|                          ts->cdf.m.txtp_inter2, 11);
  391|  57.3k|                *txtp = dav1d_tx_types_per_set[idx + 12];
  392|   346k|            } else {
  393|   346k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   346k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  394|   346k|                          ts->cdf.m.txtp_inter1[t_dim->min], 15);
  395|   346k|                *txtp = dav1d_tx_types_per_set[idx + 24];
  396|   346k|            }
  397|   537k|            if (dbg)
  ------------------
  |  Branch (397:17): [Folded, False: 537k]
  ------------------
  398|      0|                printf("Post-txtp-inter[%d->%d][%d->%d]: r=%d\n",
  399|      0|                       tx, t_dim->min, idx, *txtp, ts->msac.rng);
  400|   537k|        }
  401|  1.80M|    }
  402|       |
  403|       |    // find end-of-block (eob)
  404|  3.69M|    int eob;
  405|  3.69M|    const int slw = imin(t_dim->lw, TX_32X32), slh = imin(t_dim->lh, TX_32X32);
  406|  3.69M|    const int tx2dszctx = slw + slh;
  407|  3.69M|    const enum TxClass tx_class = dav1d_tx_type_class[*txtp];
  408|  3.69M|    const int is_1d = tx_class != TX_CLASS_2D;
  409|  3.69M|    switch (tx2dszctx) {
  ------------------
  |  Branch (409:13): [True: 3.69M, False: 0]
  ------------------
  410|      0|#define case_sz(sz, bin, ns, is_1d) \
  411|      0|    case sz: { \
  412|      0|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  413|      0|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  414|      0|        break; \
  415|      0|    }
  416|  1.02M|    case_sz(0,   16,  8, [is_1d]);
  ------------------
  |  |  411|  1.02M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 1.02M, False: 2.67M]
  |  |  ------------------
  |  |  412|  1.02M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  1.02M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|  1.02M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|  1.02M|        break; \
  |  |  415|  1.02M|    }
  ------------------
  417|   315k|    case_sz(1,   32,  8, [is_1d]);
  ------------------
  |  |  411|   315k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 315k, False: 3.38M]
  |  |  ------------------
  |  |  412|   315k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   315k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   315k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   315k|        break; \
  |  |  415|   315k|    }
  ------------------
  418|   794k|    case_sz(2,   64,  8, [is_1d]);
  ------------------
  |  |  411|   794k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 794k, False: 2.90M]
  |  |  ------------------
  |  |  412|   794k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   794k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   794k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   794k|        break; \
  |  |  415|   794k|    }
  ------------------
  419|   417k|    case_sz(3,  128,  8, [is_1d]);
  ------------------
  |  |  411|   417k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 417k, False: 3.28M]
  |  |  ------------------
  |  |  412|   417k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   417k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   417k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   417k|        break; \
  |  |  415|   417k|    }
  ------------------
  420|   488k|    case_sz(4,  256, 16, [is_1d]);
  ------------------
  |  |  411|   488k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 488k, False: 3.21M]
  |  |  ------------------
  |  |  412|   488k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   488k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   488k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   488k|        break; \
  |  |  415|   488k|    }
  ------------------
  421|   210k|    case_sz(5,  512, 16,        );
  ------------------
  |  |  411|   210k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 210k, False: 3.48M]
  |  |  ------------------
  |  |  412|   210k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   210k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   210k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   210k|        break; \
  |  |  415|   210k|    }
  ------------------
  422|   450k|    case_sz(6, 1024, 16,        );
  ------------------
  |  |  411|   450k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 450k, False: 3.24M]
  |  |  ------------------
  |  |  412|   450k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   450k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   450k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   450k|        break; \
  |  |  415|   450k|    }
  ------------------
  423|  3.69M|#undef case_sz
  424|  3.69M|    }
  425|  3.69M|    if (dbg)
  ------------------
  |  Branch (425:9): [Folded, False: 3.69M]
  ------------------
  426|      0|        printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
  427|      0|               16 << tx2dszctx, chroma, is_1d, eob, ts->msac.rng);
  428|  3.69M|    if (eob > 1) {
  ------------------
  |  Branch (428:9): [True: 2.61M, False: 1.07M]
  ------------------
  429|  2.61M|        const int eob_bin = eob - 2;
  430|  2.61M|        uint16_t *const eob_hi_bit_cdf =
  431|  2.61M|            ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
  432|  2.61M|        const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
  ------------------
  |  |   52|  2.61M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  433|  2.61M|        if (dbg)
  ------------------
  |  Branch (433:13): [Folded, False: 2.61M]
  ------------------
  434|      0|            printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
  435|      0|                   t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
  436|  2.61M|        eob = ((eob_hi_bit | 2) << eob_bin) | dav1d_msac_decode_bools(&ts->msac, eob_bin);
  437|  2.61M|        if (dbg)
  ------------------
  |  Branch (437:13): [Folded, False: 2.61M]
  ------------------
  438|      0|            printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
  439|  2.61M|    }
  440|  3.69M|    assert(eob >= 0);
  ------------------
  |  Branch (440:5): [True: 3.69M, False: 0]
  ------------------
  441|       |
  442|       |    // base tokens
  443|  3.69M|    uint16_t (*const eob_cdf)[4] = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma];
  444|  3.69M|    uint16_t (*const hi_cdf)[4] = ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
  445|  3.69M|    unsigned rc, dc_tok;
  446|       |
  447|  3.69M|    if (eob) {
  ------------------
  |  Branch (447:9): [True: 2.76M, False: 936k]
  ------------------
  448|  2.76M|        uint16_t (*const lo_cdf)[4] = ts->cdf.coef.base_tok[t_dim->ctx][chroma];
  449|  2.76M|        uint8_t *const levels = t->scratch.levels; // bits 0-5: tok, 6-7: lo_tok
  450|       |
  451|       |        /* eob */
  452|  2.76M|        unsigned ctx = 1 + (eob > 2 << tx2dszctx) + (eob > 4 << tx2dszctx);
  453|  2.76M|        int eob_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[ctx], 2);
  ------------------
  |  |   47|  2.76M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  454|  2.76M|        int tok = eob_tok + 1;
  455|  2.76M|        int level_tok = tok * 0x41;
  456|  2.76M|        unsigned mag;
  457|       |
  458|  2.76M|#define DECODE_COEFS_CLASS(tx_class) \
  459|  2.76M|        unsigned x, y; \
  460|  2.76M|        uint8_t *level; \
  461|  2.76M|        if (tx_class == TX_CLASS_2D) \
  462|  2.76M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  463|  2.76M|        else if (tx_class == TX_CLASS_H) \
  464|       |            /* Transposing reduces the stride and padding requirements */ \
  465|  2.76M|            x = eob & mask, y = eob >> shift, rc = eob; \
  466|  2.76M|        else /* tx_class == TX_CLASS_V */ \
  467|  2.76M|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  468|  2.76M|        if (dbg) \
  469|  2.76M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  470|  2.76M|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  471|  2.76M|        if (eob_tok == 2) { \
  472|  2.76M|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  473|  2.76M|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  474|  2.76M|            level_tok = tok + (3 << 6); \
  475|  2.76M|            if (dbg) \
  476|  2.76M|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  477|  2.76M|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  478|  2.76M|                       ts->msac.rng); \
  479|  2.76M|        } \
  480|  2.76M|        cf[rc] = tok << 11; \
  481|  2.76M|        if (tx_class == TX_CLASS_2D) \
  482|  2.76M|            level = levels + rc; \
  483|  2.76M|        else \
  484|  2.76M|            level = levels + x * stride + y; \
  485|  2.76M|        *level = (uint8_t) level_tok; \
  486|  2.76M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  487|  2.76M|            unsigned rc_i; \
  488|  2.76M|            if (tx_class == TX_CLASS_2D) \
  489|  2.76M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  490|  2.76M|            else if (tx_class == TX_CLASS_H) \
  491|  2.76M|                x = i & mask, y = i >> shift, rc_i = i; \
  492|  2.76M|            else /* tx_class == TX_CLASS_V */ \
  493|  2.76M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  494|  2.76M|            assert(x < 32 && y < 32); \
  495|  2.76M|            if (tx_class == TX_CLASS_2D) \
  496|  2.76M|                level = levels + rc_i; \
  497|  2.76M|            else \
  498|  2.76M|                level = levels + x * stride + y; \
  499|  2.76M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  500|  2.76M|            if (tx_class == TX_CLASS_2D) \
  501|  2.76M|                y |= x; \
  502|  2.76M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  503|  2.76M|            if (dbg) \
  504|  2.76M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  505|  2.76M|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  506|  2.76M|            if (tok == 3) { \
  507|  2.76M|                mag &= 63; \
  508|  2.76M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  509|  2.76M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  510|  2.76M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  511|  2.76M|                if (dbg) \
  512|  2.76M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  513|  2.76M|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  514|  2.76M|                           ts->msac.rng); \
  515|  2.76M|                *level = (uint8_t) (tok + (3 << 6)); \
  516|  2.76M|                cf[rc_i] = (tok << 11) | rc; \
  517|  2.76M|                rc = rc_i; \
  518|  2.76M|            } else { \
  519|       |                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  520|  2.76M|                tok *= 0x17ff41; \
  521|  2.76M|                *level = (uint8_t) tok; \
  522|       |                /* tok ? (tok << 11) | rc : 0 */ \
  523|  2.76M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  524|  2.76M|                if (tok) rc = rc_i; \
  525|  2.76M|                cf[rc_i] = tok; \
  526|  2.76M|            } \
  527|  2.76M|        } \
  528|       |        /* dc */ \
  529|  2.76M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  530|  2.76M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  531|  2.76M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  532|  2.76M|        if (dbg) \
  533|  2.76M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  534|  2.76M|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  535|  2.76M|        if (dc_tok == 3) { \
  536|  2.76M|            if (tx_class == TX_CLASS_2D) \
  537|  2.76M|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  538|  2.76M|                      levels[1 * stride + 1]; \
  539|  2.76M|            mag &= 63; \
  540|  2.76M|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  541|  2.76M|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  542|  2.76M|            if (dbg) \
  543|  2.76M|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  544|  2.76M|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  545|  2.76M|        } \
  546|  2.76M|        break
  547|       |
  548|  2.76M|        const uint16_t *scan;
  549|  2.76M|        switch (tx_class) {
  550|  2.49M|        case TX_CLASS_2D: {
  ------------------
  |  Branch (550:9): [True: 2.49M, False: 263k]
  ------------------
  551|  2.49M|            const unsigned nonsquare_tx = tx >= RTX_4X8;
  552|  2.49M|            const uint8_t (*const lo_ctx_offsets)[5] =
  553|  2.49M|                dav1d_lo_ctx_offsets[nonsquare_tx + (tx & nonsquare_tx)];
  554|  2.49M|            scan = dav1d_scans[tx];
  555|  2.49M|            const ptrdiff_t stride = 4 << slh;
  556|  2.49M|            const unsigned shift = slh + 2, shift2 = 0;
  557|  2.49M|            const unsigned mask = (4 << slh) - 1;
  558|  2.49M|            memset(levels, 0, stride * ((4 << slw) + 2));
  559|  2.49M|            DECODE_COEFS_CLASS(TX_CLASS_2D);
  ------------------
  |  |  459|  2.49M|        unsigned x, y; \
  |  |  460|  2.49M|        uint8_t *level; \
  |  |  461|  2.49M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [True: 2.49M, Folded]
  |  |  ------------------
  |  |  462|  2.49M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|  2.49M|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 0]
  |  |  ------------------
  |  |  464|      0|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|      0|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|      0|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|      0|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|  2.49M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 2.49M]
  |  |  ------------------
  |  |  469|  2.49M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|  2.49M|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 58.8k, False: 2.43M]
  |  |  ------------------
  |  |  472|  58.8k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 54.9k, False: 3.90k]
  |  |  |  Branch (472:20): [True: 58.8k, Folded]
  |  |  ------------------
  |  |  473|  58.8k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  58.8k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  58.8k|            level_tok = tok + (3 << 6); \
  |  |  475|  58.8k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 58.8k]
  |  |  ------------------
  |  |  476|  58.8k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  58.8k|        } \
  |  |  480|  2.49M|        cf[rc] = tok << 11; \
  |  |  481|  2.49M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [True: 2.49M, Folded]
  |  |  ------------------
  |  |  482|  2.49M|            level = levels + rc; \
  |  |  483|  2.49M|        else \
  |  |  484|  2.49M|            level = levels + x * stride + y; \
  |  |  485|  2.49M|        *level = (uint8_t) level_tok; \
  |  |  486|  75.0M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 72.5M, False: 2.49M]
  |  |  ------------------
  |  |  487|  72.5M|            unsigned rc_i; \
  |  |  488|  72.5M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [True: 72.5M, Folded]
  |  |  ------------------
  |  |  489|  72.5M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  72.5M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 0]
  |  |  ------------------
  |  |  491|      0|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|      0|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|      0|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  72.5M|            assert(x < 32 && y < 32); \
  |  |  495|  72.5M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [True: 72.5M, Folded]
  |  |  ------------------
  |  |  496|  72.5M|                level = levels + rc_i; \
  |  |  497|  72.5M|            else \
  |  |  498|  72.5M|                level = levels + x * stride + y; \
  |  |  499|  72.5M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  72.5M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [True: 72.5M, Folded]
  |  |  ------------------
  |  |  501|  72.5M|                y |= x; \
  |  |  502|  72.5M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  72.5M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  72.5M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 72.5M]
  |  |  ------------------
  |  |  504|  72.5M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  72.5M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 6.21M, False: 66.3M]
  |  |  ------------------
  |  |  507|  6.21M|                mag &= 63; \
  |  |  508|  6.21M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 5.15M, False: 1.05M]
  |  |  ------------------
  |  |  509|  6.21M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 1.66M, False: 4.54M]
  |  |  ------------------
  |  |  510|  6.21M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  6.21M|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|  6.21M|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 6.21M]
  |  |  ------------------
  |  |  512|  6.21M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|  6.21M|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|  6.21M|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|  6.21M|                rc = rc_i; \
  |  |  518|  66.3M|            } else { \
  |  |  519|  66.3M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  66.3M|                tok *= 0x17ff41; \
  |  |  521|  66.3M|                *level = (uint8_t) tok; \
  |  |  522|  66.3M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  66.3M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  66.3M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 19.0M, False: 47.2M]
  |  |  ------------------
  |  |  525|  66.3M|                cf[rc_i] = tok; \
  |  |  526|  66.3M|            } \
  |  |  527|  72.5M|        } \
  |  |  528|  2.49M|        /* dc */ \
  |  |  529|  2.49M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [True: 2.49M, Folded]
  |  |  ------------------
  |  |  530|  2.49M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|  2.49M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  2.49M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|  2.49M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 2.49M]
  |  |  ------------------
  |  |  533|  2.49M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|  2.49M|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 962k, False: 1.53M]
  |  |  ------------------
  |  |  536|   962k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [True: 962k, Folded]
  |  |  ------------------
  |  |  537|   962k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|   962k|                      levels[1 * stride + 1]; \
  |  |  539|   962k|            mag &= 63; \
  |  |  540|   962k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 130k, False: 831k]
  |  |  ------------------
  |  |  541|   962k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   962k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|   962k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 962k]
  |  |  ------------------
  |  |  543|   962k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|   962k|        } \
  |  |  546|  2.49M|        break
  ------------------
  |  Branch (559:13): [True: 72.5M, False: 0]
  |  Branch (559:13): [True: 72.5M, False: 0]
  ------------------
  560|  2.49M|        }
  561|   167k|        case TX_CLASS_H: {
  ------------------
  |  Branch (561:9): [True: 167k, False: 2.59M]
  ------------------
  562|   167k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  563|   167k|            const ptrdiff_t stride = 16;
  564|   167k|            const unsigned shift = slh + 2, shift2 = 0;
  565|   167k|            const unsigned mask = (4 << slh) - 1;
  566|   167k|            memset(levels, 0, stride * ((4 << slh) + 2));
  567|   167k|            DECODE_COEFS_CLASS(TX_CLASS_H);
  ------------------
  |  |  459|   167k|        unsigned x, y; \
  |  |  460|   167k|        uint8_t *level; \
  |  |  461|   167k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 167k]
  |  |  ------------------
  |  |  462|   167k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|   167k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [True: 167k, Folded]
  |  |  ------------------
  |  |  464|   167k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|   167k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|   167k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|   167k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|   167k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 167k]
  |  |  ------------------
  |  |  469|   167k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|   167k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 4.20k, False: 163k]
  |  |  ------------------
  |  |  472|  4.20k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 3.68k, False: 519]
  |  |  |  Branch (472:20): [Folded, False: 4.20k]
  |  |  ------------------
  |  |  473|  4.20k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  4.20k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  4.20k|            level_tok = tok + (3 << 6); \
  |  |  475|  4.20k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 4.20k]
  |  |  ------------------
  |  |  476|  4.20k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  4.20k|        } \
  |  |  480|   167k|        cf[rc] = tok << 11; \
  |  |  481|   167k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 167k]
  |  |  ------------------
  |  |  482|   167k|            level = levels + rc; \
  |  |  483|   167k|        else \
  |  |  484|   167k|            level = levels + x * stride + y; \
  |  |  485|   167k|        *level = (uint8_t) level_tok; \
  |  |  486|  3.69M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 3.52M, False: 167k]
  |  |  ------------------
  |  |  487|  3.52M|            unsigned rc_i; \
  |  |  488|  3.52M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 3.52M]
  |  |  ------------------
  |  |  489|  3.52M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  3.52M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [True: 3.52M, Folded]
  |  |  ------------------
  |  |  491|  3.52M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  3.52M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  3.52M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  3.52M|            assert(x < 32 && y < 32); \
  |  |  495|  3.52M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 3.52M]
  |  |  ------------------
  |  |  496|  3.52M|                level = levels + rc_i; \
  |  |  497|  3.52M|            else \
  |  |  498|  3.52M|                level = levels + x * stride + y; \
  |  |  499|  3.52M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  3.52M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 3.52M]
  |  |  ------------------
  |  |  501|  3.52M|                y |= x; \
  |  |  502|  3.52M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  3.52M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  3.52M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 3.52M]
  |  |  ------------------
  |  |  504|  3.52M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  3.52M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 209k, False: 3.31M]
  |  |  ------------------
  |  |  507|   209k|                mag &= 63; \
  |  |  508|   209k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 140k, False: 68.9k]
  |  |  ------------------
  |  |  509|   209k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 30.4k, False: 178k]
  |  |  ------------------
  |  |  510|   209k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   209k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|   209k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 209k]
  |  |  ------------------
  |  |  512|   209k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|   209k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|   209k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|   209k|                rc = rc_i; \
  |  |  518|  3.31M|            } else { \
  |  |  519|  3.31M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  3.31M|                tok *= 0x17ff41; \
  |  |  521|  3.31M|                *level = (uint8_t) tok; \
  |  |  522|  3.31M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  3.31M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  3.31M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 925k, False: 2.38M]
  |  |  ------------------
  |  |  525|  3.31M|                cf[rc_i] = tok; \
  |  |  526|  3.31M|            } \
  |  |  527|  3.52M|        } \
  |  |  528|   167k|        /* dc */ \
  |  |  529|   167k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 167k]
  |  |  ------------------
  |  |  530|   167k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|   167k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|   167k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|   167k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 167k]
  |  |  ------------------
  |  |  533|   167k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|   167k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 23.4k, False: 143k]
  |  |  ------------------
  |  |  536|  23.4k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 23.4k]
  |  |  ------------------
  |  |  537|  23.4k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  23.4k|            mag &= 63; \
  |  |  540|  23.4k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 5.77k, False: 17.7k]
  |  |  ------------------
  |  |  541|  23.4k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  23.4k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  23.4k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 23.4k]
  |  |  ------------------
  |  |  543|  23.4k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  23.4k|        } \
  |  |  546|   167k|        break
  ------------------
  |  Branch (567:13): [True: 3.52M, False: 0]
  |  Branch (567:13): [True: 3.52M, False: 0]
  ------------------
  568|   167k|        }
  569|  96.6k|        case TX_CLASS_V: {
  ------------------
  |  Branch (569:9): [True: 96.6k, False: 2.66M]
  ------------------
  570|  96.6k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  571|  96.6k|            const ptrdiff_t stride = 16;
  572|  96.6k|            const unsigned shift = slw + 2, shift2 = slh + 2;
  573|  96.6k|            const unsigned mask = (4 << slw) - 1;
  574|  96.6k|            memset(levels, 0, stride * ((4 << slw) + 2));
  575|  96.6k|            DECODE_COEFS_CLASS(TX_CLASS_V);
  ------------------
  |  |  459|  96.6k|        unsigned x, y; \
  |  |  460|  96.6k|        uint8_t *level; \
  |  |  461|  96.6k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  462|  96.6k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|  96.6k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  464|  96.6k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|  96.6k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|  96.6k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|  96.6k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|  96.6k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  469|  96.6k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|  96.6k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 3.18k, False: 93.4k]
  |  |  ------------------
  |  |  472|  3.18k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 2.57k, False: 616]
  |  |  |  Branch (472:20): [Folded, False: 3.18k]
  |  |  ------------------
  |  |  473|  3.18k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  3.18k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  3.18k|            level_tok = tok + (3 << 6); \
  |  |  475|  3.18k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 3.18k]
  |  |  ------------------
  |  |  476|  3.18k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  3.18k|        } \
  |  |  480|  96.6k|        cf[rc] = tok << 11; \
  |  |  481|  96.6k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  482|  96.6k|            level = levels + rc; \
  |  |  483|  96.6k|        else \
  |  |  484|  96.6k|            level = levels + x * stride + y; \
  |  |  485|  96.6k|        *level = (uint8_t) level_tok; \
  |  |  486|  2.05M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 1.96M, False: 96.6k]
  |  |  ------------------
  |  |  487|  1.96M|            unsigned rc_i; \
  |  |  488|  1.96M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 1.96M]
  |  |  ------------------
  |  |  489|  1.96M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  1.96M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 1.96M]
  |  |  ------------------
  |  |  491|  1.96M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  1.96M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  1.96M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  1.96M|            assert(x < 32 && y < 32); \
  |  |  495|  1.96M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 1.96M]
  |  |  ------------------
  |  |  496|  1.96M|                level = levels + rc_i; \
  |  |  497|  1.96M|            else \
  |  |  498|  1.96M|                level = levels + x * stride + y; \
  |  |  499|  1.96M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  1.96M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 1.96M]
  |  |  ------------------
  |  |  501|  1.96M|                y |= x; \
  |  |  502|  1.96M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  1.96M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  1.96M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 1.96M]
  |  |  ------------------
  |  |  504|  1.96M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  1.96M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 85.8k, False: 1.87M]
  |  |  ------------------
  |  |  507|  85.8k|                mag &= 63; \
  |  |  508|  85.8k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 54.4k, False: 31.4k]
  |  |  ------------------
  |  |  509|  85.8k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 12.2k, False: 73.6k]
  |  |  ------------------
  |  |  510|  85.8k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  85.8k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|  85.8k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 85.8k]
  |  |  ------------------
  |  |  512|  85.8k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|  85.8k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|  85.8k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|  85.8k|                rc = rc_i; \
  |  |  518|  1.87M|            } else { \
  |  |  519|  1.87M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  1.87M|                tok *= 0x17ff41; \
  |  |  521|  1.87M|                *level = (uint8_t) tok; \
  |  |  522|  1.87M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  1.87M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  1.87M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 479k, False: 1.39M]
  |  |  ------------------
  |  |  525|  1.87M|                cf[rc_i] = tok; \
  |  |  526|  1.87M|            } \
  |  |  527|  1.96M|        } \
  |  |  528|  96.6k|        /* dc */ \
  |  |  529|  96.6k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  530|  96.6k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|  96.6k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  96.6k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|  96.6k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 96.6k]
  |  |  ------------------
  |  |  533|  96.6k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|  96.6k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 10.1k, False: 86.5k]
  |  |  ------------------
  |  |  536|  10.1k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 10.1k]
  |  |  ------------------
  |  |  537|  10.1k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  10.1k|            mag &= 63; \
  |  |  540|  10.1k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 1.90k, False: 8.20k]
  |  |  ------------------
  |  |  541|  10.1k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  10.1k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  10.1k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 10.1k]
  |  |  ------------------
  |  |  543|  10.1k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  10.1k|        } \
  |  |  546|  96.6k|        break
  ------------------
  |  Branch (575:13): [True: 1.96M, False: 0]
  |  Branch (575:13): [True: 1.96M, False: 0]
  ------------------
  576|  96.6k|        }
  577|      0|#undef DECODE_COEFS_CLASS
  578|      0|        default: assert(0);
  ------------------
  |  Branch (578:9): [True: 0, False: 2.76M]
  |  Branch (578:18): [Folded, False: 0]
  ------------------
  579|  2.76M|        }
  580|  2.76M|    } else { // dc-only
  581|   936k|        int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[0], 2);
  ------------------
  |  |   47|   936k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  582|   936k|        dc_tok = 1 + tok_br;
  583|   936k|        if (dbg)
  ------------------
  |  Branch (583:13): [Folded, False: 936k]
  ------------------
  584|      0|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
  585|      0|                   t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
  586|   936k|        if (tok_br == 2) {
  ------------------
  |  Branch (586:13): [True: 60.1k, False: 876k]
  ------------------
  587|  60.1k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[0]);
  ------------------
  |  |   49|  60.1k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  ------------------
  588|  60.1k|            if (dbg)
  ------------------
  |  Branch (588:17): [Folded, False: 60.1k]
  ------------------
  589|      0|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n",
  590|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng);
  591|  60.1k|        }
  592|   936k|        rc = 0;
  593|   936k|    }
  594|       |
  595|       |    // residual and sign
  596|  3.69M|    const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
  597|  3.69M|    const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL;
  ------------------
  |  Branch (597:35): [True: 2.66M, False: 1.03M]
  ------------------
  598|  3.69M|    const int dq_shift = imax(0, t_dim->ctx - 2);
  599|  3.69M|    const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
  ------------------
  |  Branch (599:36): [True: 1.63M, Folded]
  ------------------
  600|  3.69M|    unsigned cul_level, dc_sign_level;
  601|       |
  602|  3.69M|    if (!dc_tok) {
  ------------------
  |  Branch (602:9): [True: 694k, False: 3.00M]
  ------------------
  603|   694k|        cul_level = 0;
  604|   694k|        dc_sign_level = 1 << 6;
  605|   694k|        if (qm_tbl) goto ac_qm;
  ------------------
  |  Branch (605:13): [True: 73.1k, False: 620k]
  ------------------
  606|   620k|        goto ac_noqm;
  607|   694k|    }
  608|       |
  609|  3.00M|    const int dc_sign_ctx = get_dc_sign_ctx(tx, a, l);
  610|  3.00M|    uint16_t *const dc_sign_cdf = ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
  611|  3.00M|    const int dc_sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
  ------------------
  |  |   52|  3.00M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  612|  3.00M|    if (dbg)
  ------------------
  |  Branch (612:9): [Folded, False: 3.00M]
  ------------------
  613|      0|        printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
  614|      0|               chroma, dc_sign_ctx, dc_sign, ts->msac.rng);
  615|       |
  616|  3.00M|    int dc_dq = dq_tbl[0];
  617|  3.00M|    dc_sign_level = (dc_sign - 1) & (2 << 6);
  618|       |
  619|  3.00M|    if (qm_tbl) {
  ------------------
  |  Branch (619:9): [True: 378k, False: 2.62M]
  ------------------
  620|   378k|        dc_dq = (dc_dq * qm_tbl[0] + 16) >> 5;
  621|       |
  622|   378k|        if (dc_tok == 15) {
  ------------------
  |  Branch (622:13): [True: 21.0k, False: 357k]
  ------------------
  623|  21.0k|            dc_tok = read_golomb(&ts->msac) + 15;
  624|  21.0k|            if (dbg)
  ------------------
  |  Branch (624:17): [Folded, False: 21.0k]
  ------------------
  625|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  626|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  627|       |
  628|  21.0k|            dc_tok &= 0xfffff;
  629|  21.0k|            dc_dq = (dc_dq * dc_tok) & 0xffffff;
  630|   357k|        } else {
  631|   357k|            dc_dq *= dc_tok;
  632|   357k|            assert(dc_dq <= 0xffffff);
  ------------------
  |  Branch (632:13): [True: 357k, False: 0]
  ------------------
  633|   357k|        }
  634|   378k|        cul_level = dc_tok;
  635|   378k|        dc_dq >>= dq_shift;
  636|   378k|        dc_dq = umin(dc_dq, cf_max + dc_sign);
  637|   378k|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (637:25): [True: 171k, False: 207k]
  ------------------
  638|       |
  639|   653k|        if (rc) ac_qm: {
  ------------------
  |  Branch (639:13): [True: 290k, False: 88.5k]
  ------------------
  640|   653k|            const unsigned ac_dq = dq_tbl[1];
  641|  7.49M|            do {
  642|  7.49M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  7.49M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  643|  7.49M|                if (dbg)
  ------------------
  |  Branch (643:21): [Folded, False: 7.49M]
  ------------------
  644|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  645|  7.49M|                const unsigned rc_tok = cf[rc];
  646|  7.49M|                unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5;
  647|  7.49M|                int dq_sat;
  648|       |
  649|  7.49M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (649:21): [True: 546k, False: 6.94M]
  ------------------
  650|   546k|                    tok = read_golomb(&ts->msac) + 15;
  651|   546k|                    if (dbg)
  ------------------
  |  Branch (651:25): [Folded, False: 546k]
  ------------------
  652|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  653|      0|                               rc, tok - 15, tok, ts->msac.rng);
  654|       |
  655|   546k|                    tok &= 0xfffff;
  656|   546k|                    dq = (dq * tok) & 0xffffff;
  657|  6.94M|                } else {
  658|  6.94M|                    tok = rc_tok >> 11;
  659|  6.94M|                    dq *= tok;
  660|  6.94M|                    assert(dq <= 0xffffff);
  ------------------
  |  Branch (660:21): [True: 6.94M, False: 0]
  ------------------
  661|  6.94M|                }
  662|  7.49M|                cul_level += tok;
  663|  7.49M|                dq >>= dq_shift;
  664|  7.49M|                dq_sat = umin(dq, cf_max + sign);
  665|  7.49M|                cf[rc] = (coef) (sign ? -dq_sat : dq_sat);
  ------------------
  |  Branch (665:34): [True: 3.86M, False: 3.63M]
  ------------------
  666|       |
  667|  7.49M|                rc = rc_tok & 0x3ff;
  668|  7.49M|            } while (rc);
  ------------------
  |  Branch (668:22): [True: 7.13M, False: 363k]
  ------------------
  669|   653k|        }
  670|  2.62M|    } else {
  671|       |        // non-qmatrix is the common case and allows for additional optimizations
  672|  2.62M|        if (dc_tok == 15) {
  ------------------
  |  Branch (672:13): [True: 102k, False: 2.52M]
  ------------------
  673|   102k|            dc_tok = read_golomb(&ts->msac) + 15;
  674|   102k|            if (dbg)
  ------------------
  |  Branch (674:17): [Folded, False: 102k]
  ------------------
  675|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  676|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  677|       |
  678|   102k|            dc_tok &= 0xfffff;
  679|   102k|            dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift;
  680|   102k|            dc_dq = umin(dc_dq, cf_max + dc_sign);
  681|  2.52M|        } else {
  682|  2.52M|            dc_dq = ((dc_dq * dc_tok) >> dq_shift);
  683|  2.52M|            assert(dc_dq <= cf_max);
  ------------------
  |  Branch (683:13): [True: 2.52M, False: 0]
  ------------------
  684|  2.52M|        }
  685|  2.62M|        cul_level = dc_tok;
  686|  2.62M|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (686:25): [True: 1.32M, False: 1.30M]
  ------------------
  687|       |
  688|  4.17M|        if (rc) ac_noqm: {
  ------------------
  |  Branch (688:13): [True: 1.77M, False: 848k]
  ------------------
  689|  4.17M|            const unsigned ac_dq = dq_tbl[1];
  690|  22.2M|            do {
  691|  22.2M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  22.2M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  692|  22.2M|                if (dbg)
  ------------------
  |  Branch (692:21): [Folded, False: 22.2M]
  ------------------
  693|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  694|  22.2M|                const unsigned rc_tok = cf[rc];
  695|  22.2M|                unsigned tok;
  696|  22.2M|                int dq;
  697|       |
  698|       |                // residual
  699|  22.2M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (699:21): [True: 524k, False: 21.7M]
  ------------------
  700|   524k|                    tok = read_golomb(&ts->msac) + 15;
  701|   524k|                    if (dbg)
  ------------------
  |  Branch (701:25): [Folded, False: 524k]
  ------------------
  702|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  703|      0|                               rc, tok - 15, tok, ts->msac.rng);
  704|       |
  705|       |                    // coefficient parsing, see 5.11.39
  706|   524k|                    tok &= 0xfffff;
  707|       |
  708|       |                    // dequant, see 7.12.3
  709|   524k|                    dq = ((ac_dq * tok) & 0xffffff) >> dq_shift;
  710|   524k|                    dq = umin(dq, cf_max + sign);
  711|  21.7M|                } else {
  712|       |                    // cannot exceed cf_max, so we can avoid the clipping
  713|  21.7M|                    tok = rc_tok >> 11;
  714|  21.7M|                    dq = ((ac_dq * tok) >> dq_shift);
  715|  21.7M|                    assert(dq <= cf_max);
  ------------------
  |  Branch (715:21): [True: 21.7M, False: 0]
  ------------------
  716|  21.7M|                }
  717|  22.2M|                cul_level += tok;
  718|  22.2M|                cf[rc] = (coef) (sign ? -dq : dq);
  ------------------
  |  Branch (718:34): [True: 11.3M, False: 10.9M]
  ------------------
  719|       |
  720|  22.2M|                rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob
  721|  22.2M|            } while (rc);
  ------------------
  |  Branch (721:22): [True: 19.8M, False: 2.39M]
  ------------------
  722|  4.17M|        }
  723|  2.62M|    }
  724|       |
  725|       |    // context
  726|  3.69M|    *res_ctx = umin(cul_level, 63) | dc_sign_level;
  727|       |
  728|  3.69M|    return eob;
  729|  3.00M|}
recon_tmpl.c:get_skip_ctx:
   65|  7.65M|{
   66|  7.65M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
   67|       |
   68|  7.65M|    if (chroma) {
  ------------------
  |  Branch (68:9): [True: 4.00M, False: 3.64M]
  ------------------
   69|  4.00M|        const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   70|  4.00M|        const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   71|  4.00M|        const int not_one_blk = b_dim[2] - (!!b_dim[2] && ss_hor) > t_dim->lw ||
  ------------------
  |  Branch (71:33): [True: 1.19M, False: 2.81M]
  |  Branch (71:45): [True: 3.61M, False: 395k]
  |  Branch (71:59): [True: 601k, False: 3.00M]
  ------------------
   72|  2.81M|                                b_dim[3] - (!!b_dim[3] && ss_ver) > t_dim->lh;
  ------------------
  |  Branch (72:33): [True: 71.5k, False: 2.74M]
  |  Branch (72:45): [True: 2.17M, False: 641k]
  |  Branch (72:59): [True: 326k, False: 1.84M]
  ------------------
   73|  4.00M|        unsigned ca, cl;
   74|       |
   75|  4.00M|#define MERGE_CTX(dir, type, no_val) \
   76|  4.00M|        c##dir = *(const type *) dir != no_val; \
   77|  4.00M|        break
   78|       |
   79|  4.00M|        switch (t_dim->lw) {
   80|       |        /* For some reason the MSVC CRT _wassert() function is not flagged as
   81|       |         * __declspec(noreturn), so when using those headers the compiler will
   82|       |         * expect execution to continue after an assertion has been triggered
   83|       |         * and will therefore complain about the use of uninitialized variables
   84|       |         * when compiled in debug mode if we put the default case at the end. */
   85|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (85:9): [True: 0, False: 4.00M]
  |  Branch (85:18): [Folded, False: 0]
  ------------------
   86|   995k|        case TX_4X4:   MERGE_CTX(a, uint8_t,  0x40);
  ------------------
  |  |   76|   995k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   995k|        break
  ------------------
  |  Branch (86:9): [True: 995k, False: 3.01M]
  ------------------
   87|   866k|        case TX_8X8:   MERGE_CTX(a, uint16_t, 0x4040);
  ------------------
  |  |   76|   866k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   866k|        break
  ------------------
  |  Branch (87:9): [True: 866k, False: 3.13M]
  ------------------
   88|   920k|        case TX_16X16: MERGE_CTX(a, uint32_t, 0x40404040U);
  ------------------
  |  |   76|   920k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   920k|        break
  ------------------
  |  Branch (88:9): [True: 920k, False: 3.08M]
  ------------------
   89|  1.22M|        case TX_32X32: MERGE_CTX(a, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  1.22M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.22M|        break
  ------------------
  |  Branch (89:9): [True: 1.22M, False: 2.78M]
  ------------------
   90|  4.00M|        }
   91|  4.00M|        switch (t_dim->lh) {
   92|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (92:9): [True: 0, False: 4.00M]
  |  Branch (92:18): [Folded, False: 0]
  ------------------
   93|  1.26M|        case TX_4X4:   MERGE_CTX(l, uint8_t,  0x40);
  ------------------
  |  |   76|  1.26M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.26M|        break
  ------------------
  |  Branch (93:9): [True: 1.26M, False: 2.73M]
  ------------------
   94|   984k|        case TX_8X8:   MERGE_CTX(l, uint16_t, 0x4040);
  ------------------
  |  |   76|   984k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   984k|        break
  ------------------
  |  Branch (94:9): [True: 984k, False: 3.02M]
  ------------------
   95|   744k|        case TX_16X16: MERGE_CTX(l, uint32_t, 0x40404040U);
  ------------------
  |  |   76|   744k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   744k|        break
  ------------------
  |  Branch (95:9): [True: 744k, False: 3.26M]
  ------------------
   96|  1.01M|        case TX_32X32: MERGE_CTX(l, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  1.01M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.01M|        break
  ------------------
  |  Branch (96:9): [True: 1.01M, False: 2.99M]
  ------------------
   97|  4.00M|        }
   98|  4.00M|#undef MERGE_CTX
   99|       |
  100|  4.00M|        return 7 + not_one_blk * 3 + ca + cl;
  101|  4.00M|    } else if (b_dim[2] == t_dim->lw && b_dim[3] == t_dim->lh) {
  ------------------
  |  Branch (101:16): [True: 1.83M, False: 1.81M]
  |  Branch (101:41): [True: 1.73M, False: 97.7k]
  ------------------
  102|  1.73M|        return 0;
  103|  1.90M|    } else {
  104|  1.90M|        unsigned la, ll;
  105|       |
  106|  1.90M|#define MERGE_CTX(dir, type, tx) \
  107|  1.90M|        if (tx == TX_64X64) { \
  108|  1.90M|            uint64_t tmp = *(const uint64_t *) dir; \
  109|  1.90M|            tmp |= *(const uint64_t *) &dir[8]; \
  110|  1.90M|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  111|  1.90M|        } else \
  112|  1.90M|            l##dir = *(const type *) dir; \
  113|  1.90M|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  114|  1.90M|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  115|  1.90M|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  116|  1.90M|        break
  117|       |
  118|  1.90M|        switch (t_dim->lw) {
  119|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (119:9): [True: 0, False: 1.90M]
  |  Branch (119:18): [Folded, False: 0]
  ------------------
  120|   951k|        case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4);
  ------------------
  |  |  107|   951k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 951k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   951k|            l##dir = *(const type *) dir; \
  |  |  113|   951k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 951k]
  |  |  ------------------
  |  |  114|   951k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 951k]
  |  |  ------------------
  |  |  115|   951k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 951k]
  |  |  ------------------
  |  |  116|   951k|        break
  ------------------
  |  Branch (120:9): [True: 951k, False: 956k]
  ------------------
  121|   523k|        case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8);
  ------------------
  |  |  107|   523k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 523k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   523k|            l##dir = *(const type *) dir; \
  |  |  113|   523k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 523k]
  |  |  ------------------
  |  |  114|   523k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 523k]
  |  |  ------------------
  |  |  115|   523k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 523k, Folded]
  |  |  ------------------
  |  |  116|   523k|        break
  ------------------
  |  Branch (121:9): [True: 523k, False: 1.38M]
  ------------------
  122|   296k|        case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16);
  ------------------
  |  |  107|   296k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 296k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   296k|            l##dir = *(const type *) dir; \
  |  |  113|   296k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 296k]
  |  |  ------------------
  |  |  114|   296k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 296k, Folded]
  |  |  ------------------
  |  |  115|   296k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 296k, Folded]
  |  |  ------------------
  |  |  116|   296k|        break
  ------------------
  |  Branch (122:9): [True: 296k, False: 1.61M]
  ------------------
  123|  28.9k|        case TX_32X32: MERGE_CTX(a, uint32_t, TX_32X32);
  ------------------
  |  |  107|  28.9k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 28.9k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  28.9k|            l##dir = *(const type *) dir; \
  |  |  113|  28.9k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 28.9k, Folded]
  |  |  ------------------
  |  |  114|  28.9k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 28.9k, Folded]
  |  |  ------------------
  |  |  115|  28.9k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 28.9k, Folded]
  |  |  ------------------
  |  |  116|  28.9k|        break
  ------------------
  |  Branch (123:9): [True: 28.9k, False: 1.87M]
  ------------------
  124|   107k|        case TX_64X64: MERGE_CTX(a, uint32_t, TX_64X64);
  ------------------
  |  |  107|   107k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  108|   107k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   107k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   107k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   107k|        } else \
  |  |  112|   107k|            l##dir = *(const type *) dir; \
  |  |  113|   107k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 107k]
  |  |  ------------------
  |  |  114|   107k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  115|   107k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  116|   107k|        break
  ------------------
  |  Branch (124:9): [True: 107k, False: 1.80M]
  ------------------
  125|  1.90M|        }
  126|  1.90M|        switch (t_dim->lh) {
  127|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (127:9): [True: 0, False: 1.90M]
  |  Branch (127:18): [Folded, False: 0]
  ------------------
  128|   961k|        case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4);
  ------------------
  |  |  107|   961k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 961k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   961k|            l##dir = *(const type *) dir; \
  |  |  113|   961k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 961k]
  |  |  ------------------
  |  |  114|   961k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 961k]
  |  |  ------------------
  |  |  115|   961k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 961k]
  |  |  ------------------
  |  |  116|   961k|        break
  ------------------
  |  Branch (128:9): [True: 961k, False: 946k]
  ------------------
  129|   517k|        case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8);
  ------------------
  |  |  107|   517k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 517k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   517k|            l##dir = *(const type *) dir; \
  |  |  113|   517k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 517k]
  |  |  ------------------
  |  |  114|   517k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 517k]
  |  |  ------------------
  |  |  115|   517k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 517k, Folded]
  |  |  ------------------
  |  |  116|   517k|        break
  ------------------
  |  Branch (129:9): [True: 517k, False: 1.39M]
  ------------------
  130|   292k|        case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16);
  ------------------
  |  |  107|   292k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 292k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   292k|            l##dir = *(const type *) dir; \
  |  |  113|   292k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 292k]
  |  |  ------------------
  |  |  114|   292k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 292k, Folded]
  |  |  ------------------
  |  |  115|   292k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 292k, Folded]
  |  |  ------------------
  |  |  116|   292k|        break
  ------------------
  |  Branch (130:9): [True: 292k, False: 1.61M]
  ------------------
  131|  28.8k|        case TX_32X32: MERGE_CTX(l, uint32_t, TX_32X32);
  ------------------
  |  |  107|  28.8k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 28.8k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  28.8k|            l##dir = *(const type *) dir; \
  |  |  113|  28.8k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 28.8k, Folded]
  |  |  ------------------
  |  |  114|  28.8k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 28.8k, Folded]
  |  |  ------------------
  |  |  115|  28.8k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 28.8k, Folded]
  |  |  ------------------
  |  |  116|  28.8k|        break
  ------------------
  |  Branch (131:9): [True: 28.8k, False: 1.87M]
  ------------------
  132|   107k|        case TX_64X64: MERGE_CTX(l, uint32_t, TX_64X64);
  ------------------
  |  |  107|   107k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  108|   107k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   107k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   107k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   107k|        } else \
  |  |  112|   107k|            l##dir = *(const type *) dir; \
  |  |  113|   107k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 107k]
  |  |  ------------------
  |  |  114|   107k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  115|   107k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 107k, Folded]
  |  |  ------------------
  |  |  116|   107k|        break
  ------------------
  |  Branch (132:9): [True: 107k, False: 1.80M]
  ------------------
  133|  1.90M|        }
  134|  1.90M|#undef MERGE_CTX
  135|       |
  136|  1.90M|        return dav1d_skip_ctx[umin(la & 0x3F, 4)][umin(ll & 0x3F, 4)];
  137|  1.90M|    }
  138|  7.65M|}
recon_tmpl.c:get_lo_ctx:
  304|  78.3M|{
  305|  78.3M|    unsigned mag = levels[0 * stride + 1] + levels[1 * stride + 0];
  306|  78.3M|    unsigned offset;
  307|  78.3M|    if (tx_class == TX_CLASS_2D) {
  ------------------
  |  Branch (307:9): [True: 72.5M, False: 5.74M]
  ------------------
  308|  72.5M|        mag += levels[1 * stride + 1];
  309|  72.5M|        *hi_mag = mag;
  310|  72.5M|        mag += levels[0 * stride + 2] + levels[2 * stride + 0];
  311|  72.5M|        offset = ctx_offsets[umin(y, 4)][umin(x, 4)];
  312|  72.5M|    } else {
  313|  5.74M|        mag += levels[0 * stride + 2];
  314|  5.74M|        *hi_mag = mag;
  315|  5.74M|        mag += levels[0 * stride + 3] + levels[0 * stride + 4];
  316|  5.74M|        offset = 26 + (y > 1 ? 10 : y * 5);
  ------------------
  |  Branch (316:24): [True: 2.83M, False: 2.91M]
  ------------------
  317|  5.74M|    }
  318|  78.3M|    return offset + (mag > 512 ? 4 : (mag + 64) >> 7);
  ------------------
  |  Branch (318:22): [True: 5.59M, False: 72.7M]
  ------------------
  319|  78.3M|}
recon_tmpl.c:get_dc_sign_ctx:
  143|  3.00M|{
  144|  3.00M|    uint64_t mask = 0xC0C0C0C0C0C0C0C0ULL, mul = 0x0101010101010101ULL;
  145|  3.00M|    int s;
  146|       |
  147|  3.00M|#if ARCH_X86_64 && defined(__GNUC__)
  148|       |    /* Coerce compilers into producing better code. For some reason
  149|       |     * every x86-64 compiler is awful at handling 64-bit constants. */
  150|  3.00M|    __asm__("" : "+r"(mask), "+r"(mul));
  151|  3.00M|#endif
  152|       |
  153|  3.00M|    switch(tx) {
  154|      0|    default: assert(0); /* fall-through */
  ------------------
  |  Branch (154:5): [True: 0, False: 3.00M]
  |  Branch (154:14): [Folded, False: 0]
  ------------------
  155|   861k|    case TX_4X4: {
  ------------------
  |  Branch (155:5): [True: 861k, False: 2.14M]
  ------------------
  156|   861k|        int t = *(const uint8_t *) a >> 6;
  157|   861k|        t    += *(const uint8_t *) l >> 6;
  158|   861k|        s = t - 1 - 1;
  159|   861k|        break;
  160|      0|    }
  161|   395k|    case TX_8X8: {
  ------------------
  |  Branch (161:5): [True: 395k, False: 2.60M]
  ------------------
  162|   395k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  163|   395k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  164|   395k|        t *= 0x04040404U;
  165|   395k|        s = (int) (t >> 24) - 2 - 2;
  166|   395k|        break;
  167|      0|    }
  168|   267k|    case TX_16X16: {
  ------------------
  |  Branch (168:5): [True: 267k, False: 2.73M]
  ------------------
  169|   267k|        uint32_t t = (*(const uint32_t *) a & (uint32_t) mask) >> 6;
  170|   267k|        t         += (*(const uint32_t *) l & (uint32_t) mask) >> 6;
  171|   267k|        t *= (uint32_t) mul;
  172|   267k|        s = (int) (t >> 24) - 4 - 4;
  173|   267k|        break;
  174|      0|    }
  175|   271k|    case TX_32X32: {
  ------------------
  |  Branch (175:5): [True: 271k, False: 2.73M]
  ------------------
  176|   271k|        uint64_t t = (*(const uint64_t *) a & mask) >> 6;
  177|   271k|        t         += (*(const uint64_t *) l & mask) >> 6;
  178|   271k|        t *= mul;
  179|   271k|        s = (int) (t >> 56) - 8 - 8;
  180|   271k|        break;
  181|      0|    }
  182|  94.7k|    case TX_64X64: {
  ------------------
  |  Branch (182:5): [True: 94.7k, False: 2.90M]
  ------------------
  183|  94.7k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  184|  94.7k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  185|  94.7k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  186|  94.7k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  187|  94.7k|        t *= mul;
  188|  94.7k|        s = (int) (t >> 56) - 16 - 16;
  189|  94.7k|        break;
  190|      0|    }
  191|  89.1k|    case RTX_4X8: {
  ------------------
  |  Branch (191:5): [True: 89.1k, False: 2.91M]
  ------------------
  192|  89.1k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  193|  89.1k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  194|  89.1k|        t *= 0x04040404U;
  195|  89.1k|        s = (int) (t >> 24) - 1 - 2;
  196|  89.1k|        break;
  197|      0|    }
  198|   149k|    case RTX_8X4: {
  ------------------
  |  Branch (198:5): [True: 149k, False: 2.85M]
  ------------------
  199|   149k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  200|   149k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  201|   149k|        t *= 0x04040404U;
  202|   149k|        s = (int) (t >> 24) - 2 - 1;
  203|   149k|        break;
  204|      0|    }
  205|   108k|    case RTX_8X16: {
  ------------------
  |  Branch (205:5): [True: 108k, False: 2.89M]
  ------------------
  206|   108k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  207|   108k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  208|   108k|        t = (t >> 6) * (uint32_t) mul;
  209|   108k|        s = (int) (t >> 24) - 2 - 4;
  210|   108k|        break;
  211|      0|    }
  212|   209k|    case RTX_16X8: {
  ------------------
  |  Branch (212:5): [True: 209k, False: 2.79M]
  ------------------
  213|   209k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  214|   209k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  215|   209k|        t = (t >> 6) * (uint32_t) mul;
  216|   209k|        s = (int) (t >> 24) - 4 - 2;
  217|   209k|        break;
  218|      0|    }
  219|  50.2k|    case RTX_16X32: {
  ------------------
  |  Branch (219:5): [True: 50.2k, False: 2.95M]
  ------------------
  220|  50.2k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  221|  50.2k|        t         += *(const uint64_t *) l & mask;
  222|  50.2k|        t = (t >> 6) * mul;
  223|  50.2k|        s = (int) (t >> 56) - 4 - 8;
  224|  50.2k|        break;
  225|      0|    }
  226|   105k|    case RTX_32X16: {
  ------------------
  |  Branch (226:5): [True: 105k, False: 2.89M]
  ------------------
  227|   105k|        uint64_t t = *(const uint64_t *) a & mask;
  228|   105k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  229|   105k|        t = (t >> 6) * mul;
  230|   105k|        s = (int) (t >> 56) - 8 - 4;
  231|   105k|        break;
  232|      0|    }
  233|  7.31k|    case RTX_32X64: {
  ------------------
  |  Branch (233:5): [True: 7.31k, False: 2.99M]
  ------------------
  234|  7.31k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  235|  7.31k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  236|  7.31k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  237|  7.31k|        t *= mul;
  238|  7.31k|        s = (int) (t >> 56) - 8 - 16;
  239|  7.31k|        break;
  240|      0|    }
  241|  32.1k|    case RTX_64X32: {
  ------------------
  |  Branch (241:5): [True: 32.1k, False: 2.97M]
  ------------------
  242|  32.1k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  243|  32.1k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  244|  32.1k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  245|  32.1k|        t *= mul;
  246|  32.1k|        s = (int) (t >> 56) - 16 - 8;
  247|  32.1k|        break;
  248|      0|    }
  249|  61.2k|    case RTX_4X16: {
  ------------------
  |  Branch (249:5): [True: 61.2k, False: 2.94M]
  ------------------
  250|  61.2k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  251|  61.2k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  252|  61.2k|        t = (t >> 6) * (uint32_t) mul;
  253|  61.2k|        s = (int) (t >> 24) - 1 - 4;
  254|  61.2k|        break;
  255|      0|    }
  256|   151k|    case RTX_16X4: {
  ------------------
  |  Branch (256:5): [True: 151k, False: 2.85M]
  ------------------
  257|   151k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  258|   151k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  259|   151k|        t = (t >> 6) * (uint32_t) mul;
  260|   151k|        s = (int) (t >> 24) - 4 - 1;
  261|   151k|        break;
  262|      0|    }
  263|  35.1k|    case RTX_8X32: {
  ------------------
  |  Branch (263:5): [True: 35.1k, False: 2.96M]
  ------------------
  264|  35.1k|        uint64_t t = *(const uint16_t *) a & (uint32_t) mask;
  265|  35.1k|        t         += *(const uint64_t *) l & mask;
  266|  35.1k|        t = (t >> 6) * mul;
  267|  35.1k|        s = (int) (t >> 56) - 2 - 8;
  268|  35.1k|        break;
  269|      0|    }
  270|  89.5k|    case RTX_32X8: {
  ------------------
  |  Branch (270:5): [True: 89.5k, False: 2.91M]
  ------------------
  271|  89.5k|        uint64_t t = *(const uint64_t *) a & mask;
  272|  89.5k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  273|  89.5k|        t = (t >> 6) * mul;
  274|  89.5k|        s = (int) (t >> 56) - 8 - 2;
  275|  89.5k|        break;
  276|      0|    }
  277|  9.35k|    case RTX_16X64: {
  ------------------
  |  Branch (277:5): [True: 9.35k, False: 2.99M]
  ------------------
  278|  9.35k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  279|  9.35k|        t         += *(const uint64_t *) &l[0] & mask;
  280|  9.35k|        t = (t >> 6) + ((*(const uint64_t *) &l[8] & mask) >> 6);
  281|  9.35k|        t *= mul;
  282|  9.35k|        s = (int) (t >> 56) - 4 - 16;
  283|  9.35k|        break;
  284|      0|    }
  285|  15.7k|    case RTX_64X16: {
  ------------------
  |  Branch (285:5): [True: 15.7k, False: 2.98M]
  ------------------
  286|  15.7k|        uint64_t t = *(const uint64_t *) &a[0] & mask;
  287|  15.7k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  288|  15.7k|        t = (t >> 6) + ((*(const uint64_t *) &a[8] & mask) >> 6);
  289|  15.7k|        t *= mul;
  290|  15.7k|        s = (int) (t >> 56) - 16 - 4;
  291|  15.7k|        break;
  292|      0|    }
  293|  3.00M|    }
  294|       |
  295|  3.00M|    return (s != 0) + (s > 0);
  296|  3.00M|}
recon_tmpl.c:read_golomb:
   49|  1.19M|static inline unsigned read_golomb(MsacContext *const msac) {
   50|  1.19M|    int len = 0;
   51|  1.19M|    unsigned val = 1;
   52|       |
   53|  2.22M|    while (!dav1d_msac_decode_bool_equi(msac) && len < 32) len++;
  ------------------
  |  |   53|  2.22M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (53:12): [True: 1.03M, False: 1.19M]
  |  Branch (53:50): [True: 1.03M, False: 37]
  ------------------
   54|  2.22M|    while (len--) val = (val << 1) + dav1d_msac_decode_bool_equi(msac);
  ------------------
  |  |   53|  1.03M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (54:12): [True: 1.03M, False: 1.19M]
  ------------------
   55|       |
   56|  1.19M|    return val - 1;
   57|  1.19M|}
recon_tmpl.c:mc:
  944|  4.55M|{
  945|  4.55M|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (945:5): [True: 4.55M, False: 0]
  ------------------
  946|  4.55M|    const Dav1dFrameContext *const f = t->f;
  947|  4.55M|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (947:24): [True: 2.30M, False: 2.24M]
  |  Branch (947:32): [True: 659k, False: 1.64M]
  ------------------
  948|  4.55M|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (948:24): [True: 2.30M, False: 2.24M]
  |  Branch (948:32): [True: 666k, False: 1.64M]
  ------------------
  949|  4.55M|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
  950|  4.55M|    const int mvx = mv.x, mvy = mv.y;
  951|  4.55M|    const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
  952|  4.55M|    ptrdiff_t ref_stride = refp->p.stride[!!pl];
  953|  4.55M|    const pixel *ref;
  954|       |
  955|  4.55M|    if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
  ------------------
  |  Branch (955:9): [True: 3.43M, False: 1.11M]
  |  Branch (955:38): [True: 3.35M, False: 83.1k]
  ------------------
  956|  3.35M|        const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
  957|  3.35M|        const int dy = by * v_mul + (mvy >> (3 + ss_ver));
  958|  3.35M|        int w, h;
  959|       |
  960|  3.35M|        if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
  ------------------
  |  Branch (960:13): [True: 2.20M, False: 1.15M]
  ------------------
  961|  2.20M|            w = (f->cur.p.w + ss_hor) >> ss_hor;
  962|  2.20M|            h = (f->cur.p.h + ss_ver) >> ss_ver;
  963|  2.20M|        } else {
  964|  1.15M|            w = f->bw * 4 >> ss_hor;
  965|  1.15M|            h = f->bh * 4 >> ss_ver;
  966|  1.15M|        }
  967|  3.35M|        if (dx < !!mx * 3 || dy < !!my * 3 ||
  ------------------
  |  Branch (967:13): [True: 45.1k, False: 3.30M]
  |  Branch (967:30): [True: 61.8k, False: 3.24M]
  ------------------
  968|  3.24M|            dx + bw4 * h_mul + !!mx * 4 > w ||
  ------------------
  |  Branch (968:13): [True: 141k, False: 3.10M]
  ------------------
  969|  3.10M|            dy + bh4 * v_mul + !!my * 4 > h)
  ------------------
  |  Branch (969:13): [True: 320k, False: 2.78M]
  ------------------
  970|   568k|        {
  971|   568k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   568k|#define bitfn(x) x##_8bpc
  ------------------
  972|   568k|            f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
  973|   568k|                                w, h, dx - !!mx * 3, dy - !!my * 3,
  974|   568k|                                emu_edge_buf, 192 * sizeof(pixel),
  975|   568k|                                refp->p.data[pl], ref_stride);
  976|   568k|            ref = &emu_edge_buf[192 * !!my * 3 + !!mx * 3];
  977|   568k|            ref_stride = 192 * sizeof(pixel);
  978|  2.78M|        } else {
  979|  2.78M|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  2.78M|#define PXSTRIDE(x) (x)
  ------------------
  980|  2.78M|        }
  981|       |
  982|  3.35M|        if (dst8 != NULL) {
  ------------------
  |  Branch (982:13): [True: 2.71M, False: 638k]
  ------------------
  983|  2.71M|            f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
  984|  2.71M|                                     bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  985|  2.71M|                                     HIGHBD_CALL_SUFFIX);
  986|  2.71M|        } else {
  987|   638k|            f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
  988|   638k|                                      bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  989|   638k|                                      HIGHBD_CALL_SUFFIX);
  990|   638k|        }
  991|  3.35M|    } else {
  992|  1.19M|        assert(refp != &f->sr_cur);
  ------------------
  |  Branch (992:9): [True: 1.19M, False: 0]
  ------------------
  993|       |
  994|  1.19M|        const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
  995|  1.19M|        const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
  996|  1.19M|#define scale_mv(res, val, scale) do { \
  997|  1.19M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  998|  1.19M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  999|  1.19M|        } while (0)
 1000|  1.19M|        int pos_y, pos_x;
 1001|  1.19M|        scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
  ------------------
  |  |  996|  1.19M|#define scale_mv(res, val, scale) do { \
  |  |  997|  1.19M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|  1.19M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|  1.19M|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 1.19M]
  |  |  ------------------
  ------------------
 1002|  1.19M|        scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
  ------------------
  |  |  996|  1.19M|#define scale_mv(res, val, scale) do { \
  |  |  997|  1.19M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|  1.19M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|  1.19M|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 1.19M]
  |  |  ------------------
  ------------------
 1003|  1.19M|#undef scale_mv
 1004|  1.19M|        const int left = pos_x >> 10;
 1005|  1.19M|        const int top = pos_y >> 10;
 1006|  1.19M|        const int right =
 1007|  1.19M|            ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
 1008|  1.19M|        const int bottom =
 1009|  1.19M|            ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
 1010|       |
 1011|  1.19M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.19M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.19M]
  |  |  ------------------
  |  |   35|  1.19M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.19M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1012|      0|            printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
 1013|      0|                   left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
 1014|      0|                   right-left, bottom-top,
 1015|      0|                   f->svc[refidx][0].step, f->svc[refidx][1].step);
 1016|       |
 1017|  1.19M|        const int w = (refp->p.p.w + ss_hor) >> ss_hor;
 1018|  1.19M|        const int h = (refp->p.p.h + ss_ver) >> ss_ver;
 1019|  1.19M|        if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
  ------------------
  |  Branch (1019:13): [True: 116k, False: 1.08M]
  |  Branch (1019:25): [True: 208k, False: 875k]
  |  Branch (1019:36): [True: 71.7k, False: 803k]
  |  Branch (1019:53): [True: 65.1k, False: 738k]
  ------------------
 1020|   461k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   461k|#define bitfn(x) x##_8bpc
  ------------------
 1021|   461k|            f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
 1022|   461k|                                w, h, left - 3, top - 3,
 1023|   461k|                                emu_edge_buf, 320 * sizeof(pixel),
 1024|   461k|                                refp->p.data[pl], ref_stride);
 1025|   461k|            ref = &emu_edge_buf[320 * 3 + 3];
 1026|   461k|            ref_stride = 320 * sizeof(pixel);
 1027|   461k|            if (DEBUG_BLOCK_INFO) printf("Emu\n");
  ------------------
  |  |   34|   461k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 461k]
  |  |  ------------------
  |  |   35|   461k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   461k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1028|   738k|        } else {
 1029|   738k|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
  ------------------
  |  |   53|   738k|#define PXSTRIDE(x) (x)
  ------------------
 1030|   738k|        }
 1031|       |
 1032|  1.19M|        if (dst8 != NULL) {
  ------------------
  |  Branch (1032:13): [True: 857k, False: 342k]
  ------------------
 1033|   857k|            f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
 1034|   857k|                                            bw4 * h_mul, bh4 * v_mul,
 1035|   857k|                                            pos_x & 0x3ff, pos_y & 0x3ff,
 1036|   857k|                                            f->svc[refidx][0].step,
 1037|   857k|                                            f->svc[refidx][1].step
 1038|   857k|                                            HIGHBD_CALL_SUFFIX);
 1039|   857k|        } else {
 1040|   342k|            f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
 1041|   342k|                                             bw4 * h_mul, bh4 * v_mul,
 1042|   342k|                                             pos_x & 0x3ff, pos_y & 0x3ff,
 1043|   342k|                                             f->svc[refidx][0].step,
 1044|   342k|                                             f->svc[refidx][1].step
 1045|   342k|                                             HIGHBD_CALL_SUFFIX);
 1046|   342k|        }
 1047|  1.19M|    }
 1048|       |
 1049|  4.55M|    return 0;
 1050|  4.55M|}
recon_tmpl.c:warp_affine:
 1120|   145k|{
 1121|   145k|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (1121:5): [True: 145k, False: 0]
  ------------------
 1122|   145k|    const Dav1dFrameContext *const f = t->f;
 1123|   145k|    const Dav1dDSPContext *const dsp = f->dsp;
 1124|   145k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1124:24): [True: 38.8k, False: 106k]
  |  Branch (1124:32): [True: 15.1k, False: 23.7k]
  ------------------
 1125|   145k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1125:24): [True: 38.8k, False: 106k]
  |  Branch (1125:32): [True: 15.1k, False: 23.7k]
  ------------------
 1126|   145k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1127|   145k|    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
  ------------------
  |  Branch (1127:5): [True: 145k, False: 0]
  |  Branch (1127:5): [True: 145k, False: 0]
  ------------------
 1128|   145k|    const int32_t *const mat = wmp->matrix;
 1129|   145k|    const int width = (refp->p.p.w + ss_hor) >> ss_hor;
 1130|   145k|    const int height = (refp->p.p.h + ss_ver) >> ss_ver;
 1131|       |
 1132|   944k|    for (int y = 0; y < b_dim[1] * v_mul; y += 8) {
  ------------------
  |  Branch (1132:21): [True: 798k, False: 145k]
  ------------------
 1133|   798k|        const int src_y = t->by * 4 + ((y + 4) << ss_ver);
 1134|   798k|        const int64_t mat3_y = (int64_t) mat[3] * src_y + mat[0];
 1135|   798k|        const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
 1136|  6.07M|        for (int x = 0; x < b_dim[0] * h_mul; x += 8) {
  ------------------
  |  Branch (1136:25): [True: 5.27M, False: 798k]
  ------------------
 1137|       |            // calculate transformation relative to center of 8x8 block in
 1138|       |            // luma pixel units
 1139|  5.27M|            const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
 1140|  5.27M|            const int64_t mvx = ((int64_t) mat[2] * src_x + mat3_y) >> ss_hor;
 1141|  5.27M|            const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
 1142|       |
 1143|  5.27M|            const int dx = (int) (mvx >> 16) - 4;
 1144|  5.27M|            const int mx = (((int) mvx & 0xffff) - wmp->u.p.alpha * 4 -
 1145|  5.27M|                                                   wmp->u.p.beta  * 7) & ~0x3f;
 1146|  5.27M|            const int dy = (int) (mvy >> 16) - 4;
 1147|  5.27M|            const int my = (((int) mvy & 0xffff) - wmp->u.p.gamma * 4 -
 1148|  5.27M|                                                   wmp->u.p.delta * 4) & ~0x3f;
 1149|       |
 1150|  5.27M|            const pixel *ref_ptr;
 1151|  5.27M|            ptrdiff_t ref_stride = refp->p.stride[!!pl];
 1152|       |
 1153|  5.27M|            if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
  ------------------
  |  Branch (1153:17): [True: 828k, False: 4.44M]
  |  Branch (1153:27): [True: 1.17M, False: 3.27M]
  |  Branch (1153:49): [True: 67.2k, False: 3.20M]
  |  Branch (1153:59): [True: 135k, False: 3.06M]
  ------------------
 1154|  2.20M|                pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|  2.20M|#define bitfn(x) x##_8bpc
  ------------------
 1155|  2.20M|                f->dsp->mc.emu_edge(15, 15, width, height, dx - 3, dy - 3,
 1156|  2.20M|                                    emu_edge_buf, 32 * sizeof(pixel),
 1157|  2.20M|                                    refp->p.data[pl], ref_stride);
 1158|  2.20M|                ref_ptr = &emu_edge_buf[32 * 3 + 3];
 1159|  2.20M|                ref_stride = 32 * sizeof(pixel);
 1160|  3.06M|            } else {
 1161|  3.06M|                ref_ptr = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  3.06M|#define PXSTRIDE(x) (x)
  ------------------
 1162|  3.06M|            }
 1163|  5.27M|            if (dst16 != NULL)
  ------------------
  |  Branch (1163:17): [True: 64.5k, False: 5.21M]
  ------------------
 1164|  64.5k|                dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
 1165|  64.5k|                                 wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1166|  5.21M|            else
 1167|  5.21M|                dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
 1168|  5.21M|                                wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1169|  5.27M|        }
 1170|   798k|        if (dst8) dst8  += 8 * PXSTRIDE(dstride);
  ------------------
  |  |   53|   784k|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (1170:13): [True: 784k, False: 13.5k]
  ------------------
 1171|  13.5k|        else      dst16 += 8 * dstride;
 1172|   798k|    }
 1173|   145k|    return 0;
 1174|   145k|}
recon_tmpl.c:obmc:
 1056|   391k|{
 1057|   391k|    assert(!(t->bx & 1) && !(t->by & 1));
  ------------------
  |  Branch (1057:5): [True: 391k, False: 0]
  |  Branch (1057:5): [True: 391k, False: 0]
  ------------------
 1058|   391k|    const Dav1dFrameContext *const f = t->f;
 1059|   391k|    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
 1060|   391k|    pixel *const lap = bitfn(t->scratch.lap);
  ------------------
  |  |   51|   391k|#define bitfn(x) x##_8bpc
  ------------------
 1061|   391k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1061:24): [True: 201k, False: 189k]
  |  Branch (1061:32): [True: 71.5k, False: 130k]
  ------------------
 1062|   391k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1062:24): [True: 201k, False: 189k]
  |  Branch (1062:32): [True: 71.5k, False: 130k]
  ------------------
 1063|   391k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1064|   391k|    int res;
 1065|       |
 1066|   391k|    if (t->by > t->ts->tiling.row_start &&
  ------------------
  |  Branch (1066:9): [True: 363k, False: 27.4k]
  ------------------
 1067|   363k|        (!pl || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
  ------------------
  |  Branch (1067:10): [True: 177k, False: 186k]
  |  Branch (1067:17): [True: 135k, False: 50.8k]
  ------------------
 1068|   313k|    {
 1069|   660k|        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
  ------------------
  |  Branch (1069:32): [True: 348k, False: 312k]
  |  Branch (1069:42): [True: 347k, False: 876]
  ------------------
 1070|       |            // only odd blocks are considered for overlap handling, hence +1
 1071|   347k|            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
 1072|   347k|            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
 1073|   347k|            const int step4 = iclip(a_b_dim[0], 2, 16);
 1074|       |
 1075|   347k|            if (a_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1075:17): [True: 339k, False: 8.58k]
  ------------------
 1076|   339k|                const int ow4 = imin(step4, b_dim[0]);
 1077|   339k|                const int oh4 = imin(b_dim[1], 16) >> 1;
 1078|   339k|                res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, (oh4 * 3 + 3) >> 2,
 1079|   339k|                         t->bx + x, t->by, pl, a_r->mv.mv[0],
 1080|   339k|                         &f->refp[a_r->ref.ref[0] - 1], a_r->ref.ref[0] - 1,
 1081|   339k|                         dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
 1082|   339k|                if (res) return res;
  ------------------
  |  Branch (1082:21): [True: 0, False: 339k]
  ------------------
 1083|   339k|                f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
 1084|   339k|                                   h_mul * ow4, v_mul * oh4);
 1085|   339k|                i++;
 1086|   339k|            }
 1087|   347k|            x += step4;
 1088|   347k|        }
 1089|   313k|    }
 1090|       |
 1091|   391k|    if (t->bx > t->ts->tiling.col_start)
  ------------------
  |  Branch (1091:9): [True: 376k, False: 15.1k]
  ------------------
 1092|   790k|        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
  ------------------
  |  Branch (1092:32): [True: 415k, False: 374k]
  |  Branch (1092:42): [True: 413k, False: 1.66k]
  ------------------
 1093|       |            // only odd blocks are considered for overlap handling, hence +1
 1094|   413k|            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
 1095|   413k|            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
 1096|   413k|            const int step4 = iclip(l_b_dim[1], 2, 16);
 1097|       |
 1098|   413k|            if (l_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1098:17): [True: 399k, False: 14.5k]
  ------------------
 1099|   399k|                const int ow4 = imin(b_dim[0], 16) >> 1;
 1100|   399k|                const int oh4 = imin(step4, b_dim[1]);
 1101|   399k|                res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
 1102|   399k|                         t->bx, t->by + y, pl, l_r->mv.mv[0],
 1103|   399k|                         &f->refp[l_r->ref.ref[0] - 1], l_r->ref.ref[0] - 1,
 1104|   399k|                         dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
 1105|   399k|                if (res) return res;
  ------------------
  |  Branch (1105:21): [True: 0, False: 399k]
  ------------------
 1106|   399k|                f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
  ------------------
  |  |   53|   399k|#define PXSTRIDE(x) (x)
  ------------------
 1107|   399k|                                   dst_stride, lap, h_mul * ow4, v_mul * oh4);
 1108|   399k|                i++;
 1109|   399k|            }
 1110|   413k|            y += step4;
 1111|   413k|        }
 1112|   391k|    return 0;
 1113|   391k|}
dav1d_recon_b_intra_16bpc:
 1179|  1.45M|{
 1180|  1.45M|    Dav1dTileState *const ts = t->ts;
 1181|  1.45M|    const Dav1dFrameContext *const f = t->f;
 1182|  1.45M|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|  1.45M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|  1.45M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|  1.45M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|  1.45M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|  1.45M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|  1.45M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|  1.45M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|  1.45M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|  1.45M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 912k, False: 537k]
  ------------------
 1192|   912k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 881k, False: 31.2k]
  |  Branch (1192:45): [True: 15.1k, False: 16.0k]
  ------------------
 1193|   896k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 870k, False: 26.2k]
  |  Branch (1193:45): [True: 13.1k, False: 13.1k]
  ------------------
 1194|  1.45M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|  1.45M|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|  1.45M|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   77|  1.45M|#define bitfn(x) x##_16bpc
  ------------------
 1199|  1.45M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|  1.45M|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  2.94M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 1.49M, False: 1.45M]
  ------------------
 1204|  1.49M|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|  1.49M|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  3.08M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 1.59M, False: 1.49M]
  ------------------
 1207|  1.59M|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 51.7k, False: 1.53M]
  ------------------
 1208|  51.7k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  51.7k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1210|  51.7k|                const uint8_t *pal_idx;
 1211|  51.7k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 0, False: 51.7k]
  ------------------
 1212|      0|                    const int p = t->frame_thread.pass & 1;
 1213|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 0, False: 0]
  ------------------
 1214|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|      0|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  51.7k|                } else {
 1217|  51.7k|                    pal_idx = t->scratch.pal_idx_y;
 1218|  51.7k|                }
 1219|  51.7k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 0, False: 51.7k]
  ------------------
 1220|      0|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|      0|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  51.7k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  51.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  51.7k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1223|  51.7k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  51.7k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  51.7k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  51.7k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 51.7k]
  |  |  ------------------
  |  |   35|  51.7k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  51.7k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  51.7k|            }
 1229|       |
 1230|  1.59M|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|  1.59M|                                     sm_flag(&t->l, by4) |
 1232|  1.59M|                                     intra_edge_filter_flag);
 1233|  1.59M|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 93.6k, False: 1.49M]
  |  Branch (1233:58): [True: 47.1k, False: 1.45M]
  ------------------
 1234|  1.49M|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|  1.59M|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 93.6k, False: 1.49M]
  |  Branch (1235:48): [True: 47.1k, False: 1.45M]
  ------------------
 1236|  1.49M|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|  1.59M|            int y, x;
 1238|  1.59M|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  3.55M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.96M, False: 1.59M]
  ------------------
 1240|  1.96M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.96M|            {
 1242|  1.96M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.96M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
 1244|  1.96M|                                    t->bx + init_x);
 1245|  5.59M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 3.63M, False: 1.96M]
  ------------------
 1246|  3.63M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  3.63M|                {
 1248|  3.63M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 81.4k, False: 3.55M]
  ------------------
 1249|       |
 1250|  3.55M|                    int angle = b->y_angle;
 1251|  3.55M|                    const enum EdgeFlags edge_flags =
 1252|  3.55M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 1.73M, False: 1.81M]
  |  Branch (1252:42): [True: 534k, False: 1.27M]
  |  Branch (1252:57): [True: 849k, False: 1.42M]
  ------------------
 1253|  2.70M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  3.55M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 1.64M, False: 1.90M]
  |  Branch (1254:42): [True: 1.11M, False: 789k]
  |  Branch (1254:56): [True: 830k, False: 285k]
  ------------------
 1255|  2.47M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  3.55M|                    const pixel *top_sb_edge = NULL;
 1257|  3.55M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 822k, False: 2.72M]
  ------------------
 1258|   822k|                        top_sb_edge = f->ipred_edge[0];
 1259|   822k|                        const int sby = t->by >> f->sb_shift;
 1260|   822k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   822k|                    }
 1262|  3.55M|                    const enum IntraPredMode m =
 1263|  3.55M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  3.55M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  3.55M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1264|  3.55M|                                                          t->bx > ts->tiling.col_start,
 1265|  3.55M|                                                          t->by,
 1266|  3.55M|                                                          t->by > ts->tiling.row_start,
 1267|  3.55M|                                                          ts->tiling.col_end,
 1268|  3.55M|                                                          ts->tiling.row_end,
 1269|  3.55M|                                                          edge_flags, dst,
 1270|  3.55M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  3.55M|                                                          b->y_mode, &angle,
 1272|  3.55M|                                                          t_dim->w, t_dim->h,
 1273|  3.55M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  3.55M|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.55M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1275|  3.55M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  3.55M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  3.55M|                                             angle | intra_flags,
 1278|  3.55M|                                             4 * f->bw - 4 * t->bx,
 1279|  3.55M|                                             4 * f->bh - 4 * t->by
 1280|  3.55M|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.55M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1281|       |
 1282|  3.55M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  3.55M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.55M]
  |  |  ------------------
  |  |   35|  3.55M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.55M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  3.63M|                skip_y_pred: {}
 1293|  3.63M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 1.46M, False: 2.17M]
  ------------------
 1294|  1.46M|                        coef *cf;
 1295|  1.46M|                        int eob;
 1296|  1.46M|                        enum TxfmType txtp;
 1297|  1.46M|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 0, False: 1.46M]
  ------------------
 1298|      0|                            const int p = t->frame_thread.pass & 1;
 1299|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|      0|                            cf = ts->frame_thread[p].cf;
 1301|      0|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|      0|                            eob  = cbi >> 5;
 1303|      0|                            txtp = cbi & 0x1f;
 1304|  1.46M|                        } else {
 1305|  1.46M|                            uint8_t cf_ctx;
 1306|  1.46M|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|  1.46M|#define bitfn(x) x##_16bpc
  ------------------
 1307|  1.46M|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  1.46M|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  1.46M|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  1.46M|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.46M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.46M]
  |  |  ------------------
  |  |   35|  1.46M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.46M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  1.46M|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  1.46M|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  1.46M|                        }
 1316|  1.46M|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 1.12M, False: 338k]
  ------------------
 1317|  1.12M|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  1.12M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.12M]
  |  |  ------------------
  |  |   35|  1.12M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.12M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|  1.12M|                            dsp->itx.itxfm_add[b->tx]
 1321|  1.12M|                                              [txtp](dst,
 1322|  1.12M|                                                     f->cur.stride[0],
 1323|  1.12M|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  1.12M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1324|  1.12M|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  1.12M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.12M]
  |  |  ------------------
  |  |   35|  1.12M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.12M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|  1.12M|                        }
 1328|  2.17M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 2.17M, False: 0]
  ------------------
 1329|  2.17M|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|  2.17M|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|  2.17M|                    }
 1332|  3.63M|                    dst += 4 * t_dim->w;
 1333|  3.63M|                }
 1334|  1.96M|                t->bx -= x;
 1335|  1.96M|            }
 1336|  1.59M|            t->by -= y;
 1337|       |
 1338|  1.59M|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 651k, False: 939k]
  ------------------
 1339|       |
 1340|   939k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   939k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 189k, False: 750k]
  ------------------
 1343|   189k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 189k, False: 0]
  |  Branch (1343:17): [True: 189k, False: 0]
  ------------------
 1344|       |
 1345|   189k|                int16_t *const ac = t->scratch.ac;
 1346|   189k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|   189k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
 1348|   189k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|   189k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
 1350|   189k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|   189k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|   189k|                const int furthest_r =
 1354|   189k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|   189k|                const int furthest_b =
 1356|   189k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|   189k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|   189k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|   189k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|   189k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   567k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 378k, False: 189k]
  ------------------
 1362|   378k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 79.7k, False: 298k]
  ------------------
 1363|   298k|                    int angle = 0;
 1364|   298k|                    const pixel *top_sb_edge = NULL;
 1365|   298k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 94.3k, False: 204k]
  ------------------
 1366|  94.3k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  94.3k|                        const int sby = t->by >> f->sb_shift;
 1368|  94.3k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  94.3k|                    }
 1370|   298k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|   298k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|   298k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|   298k|                    const enum IntraPredMode m =
 1374|   298k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|   298k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   298k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1375|   298k|                                                          ypos, ypos > ystart,
 1376|   298k|                                                          ts->tiling.col_end >> ss_hor,
 1377|   298k|                                                          ts->tiling.row_end >> ss_ver,
 1378|   298k|                                                          0, uv_dst[pl], stride,
 1379|   298k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|   298k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|   298k|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   298k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1382|   298k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|   298k|                                           uv_t_dim->w * 4,
 1384|   298k|                                           uv_t_dim->h * 4,
 1385|   298k|                                           ac, b->cfl_alpha[pl]
 1386|   298k|                                           HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   298k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1387|   298k|                }
 1388|   189k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   189k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 189k]
  |  |  ------------------
  |  |   35|   189k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   189k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   750k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 11.2k, False: 739k]
  ------------------
 1394|  11.2k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  11.2k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1396|  11.2k|                const pixel (*pal)[8];
 1397|  11.2k|                const uint8_t *pal_idx;
 1398|  11.2k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 0, False: 11.2k]
  ------------------
 1399|      0|                    const int p = t->frame_thread.pass & 1;
 1400|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 0, False: 0]
  ------------------
 1401|      0|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|      0|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|      0|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  11.2k|                } else {
 1406|  11.2k|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|  11.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  11.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1407|  11.2k|                    pal_idx = t->scratch.pal_idx_uv;
 1408|  11.2k|                }
 1409|       |
 1410|  11.2k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  11.2k|                                       f->cur.stride[1], pal[1],
 1412|  11.2k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  11.2k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  11.2k|                                       f->cur.stride[1], pal[2],
 1415|  11.2k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  11.2k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  11.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 11.2k]
  |  |  ------------------
  |  |   35|  11.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  11.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  11.2k|            }
 1425|       |
 1426|   939k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   939k|                                 sm_uv_flag(&t->l, cby4);
 1428|   939k|            const int uv_sb_has_tr =
 1429|   939k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 37.5k, False: 902k]
  |  Branch (1429:55): [True: 18.5k, False: 883k]
  ------------------
 1430|   902k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   939k|            const int uv_sb_has_bl =
 1432|   939k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 37.5k, False: 902k]
  |  Branch (1432:30): [True: 18.5k, False: 883k]
  ------------------
 1433|   902k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   939k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.81M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.87M, False: 939k]
  ------------------
 1436|  4.06M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 2.18M, False: 1.87M]
  ------------------
 1437|  2.18M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  2.18M|                {
 1439|  2.18M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  2.18M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
 1441|  2.18M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  5.36M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 3.18M, False: 2.18M]
  ------------------
 1443|  3.18M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  3.18M|                    {
 1445|  3.18M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 378k, False: 2.80M]
  |  Branch (1445:56): [True: 298k, False: 79.7k]
  ------------------
 1446|  2.88M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 32.0k, False: 2.84M]
  ------------------
 1447|   330k|                        {
 1448|   330k|                            goto skip_uv_pred;
 1449|   330k|                        }
 1450|       |
 1451|  2.84M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  2.84M|                        const enum EdgeFlags edge_flags =
 1456|  2.84M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 1.04M, False: 1.80M]
  |  Branch (1456:58): [True: 600k, False: 1.20M]
  ------------------
 1457|  1.64M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 863k, False: 780k]
  ------------------
 1458|  1.98M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  2.84M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 984k, False: 1.86M]
  ------------------
 1460|  1.86M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 1.03M, False: 826k]
  |  Branch (1460:49): [True: 797k, False: 241k]
  ------------------
 1461|  1.78M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  2.84M|                        const pixel *top_sb_edge = NULL;
 1463|  2.84M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 707k, False: 2.14M]
  ------------------
 1464|   707k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   707k|                            const int sby = t->by >> f->sb_shift;
 1466|   707k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   707k|                        }
 1468|  2.84M|                        const enum IntraPredMode uv_mode =
 1469|  2.84M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 79.7k, False: 2.76M]
  ------------------
 1470|  2.84M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  2.84M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  2.84M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  2.84M|                        const enum IntraPredMode m =
 1474|  2.84M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  2.84M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  2.84M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1475|  2.84M|                                                              ypos, ypos > ystart,
 1476|  2.84M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  2.84M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  2.84M|                                                              edge_flags, dst, stride,
 1479|  2.84M|                                                              top_sb_edge, uv_mode,
 1480|  2.84M|                                                              &angle, uv_t_dim->w,
 1481|  2.84M|                                                              uv_t_dim->h,
 1482|  2.84M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  2.84M|                                                              edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  2.84M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1484|  2.84M|                        angle |= intra_edge_filter_flag;
 1485|  2.84M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  2.84M|                                                 uv_t_dim->w * 4,
 1487|  2.84M|                                                 uv_t_dim->h * 4,
 1488|  2.84M|                                                 angle | sm_uv_fl,
 1489|  2.84M|                                                 (4 * f->bw + ss_hor -
 1490|  2.84M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  2.84M|                                                 (4 * f->bh + ss_ver -
 1492|  2.84M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  2.84M|                                                 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  2.84M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1494|  2.84M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  2.84M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.84M]
  |  |  ------------------
  |  |   35|  2.84M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.84M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  3.18M|                    skip_uv_pred: {}
 1505|  3.18M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.71M, False: 1.46M]
  ------------------
 1506|  1.71M|                            enum TxfmType txtp;
 1507|  1.71M|                            int eob;
 1508|  1.71M|                            coef *cf;
 1509|  1.71M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 0, False: 1.71M]
  ------------------
 1510|      0|                                const int p = t->frame_thread.pass & 1;
 1511|      0|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|      0|                                cf = ts->frame_thread[p].cf;
 1513|      0|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|      0|                                eob  = cbi >> 5;
 1515|      0|                                txtp = cbi & 0x1f;
 1516|  1.71M|                            } else {
 1517|  1.71M|                                uint8_t cf_ctx;
 1518|  1.71M|                                cf = bitfn(t->cf);
  ------------------
  |  |   77|  1.71M|#define bitfn(x) x##_16bpc
  ------------------
 1519|  1.71M|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  1.71M|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  1.71M|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  1.71M|                                                   &txtp, &cf_ctx);
 1523|  1.71M|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.71M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.71M]
  |  |  ------------------
  |  |   35|  1.71M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.71M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  1.71M|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  1.71M|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  1.71M|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  1.71M|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  1.71M|                            }
 1532|  1.71M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 481k, False: 1.23M]
  ------------------
 1533|   481k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   481k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 481k]
  |  |  ------------------
  |  |   35|   481k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   481k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   481k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   481k|                                                  [txtp](dst, stride,
 1538|   481k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   481k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1539|   481k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   481k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 481k]
  |  |  ------------------
  |  |   35|   481k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   481k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   481k|                            }
 1543|  1.71M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 1.46M, False: 0]
  ------------------
 1544|  1.46M|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|  1.46M|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|  1.46M|                        }
 1547|  3.18M|                        dst += uv_t_dim->w * 4;
 1548|  3.18M|                    }
 1549|  2.18M|                    t->bx -= x << ss_hor;
 1550|  2.18M|                }
 1551|  1.87M|                t->by -= y << ss_ver;
 1552|  1.87M|            }
 1553|   939k|        }
 1554|  1.49M|    }
 1555|  1.45M|}
dav1d_recon_b_inter_16bpc:
 1559|   980k|{
 1560|   980k|    Dav1dTileState *const ts = t->ts;
 1561|   980k|    const Dav1dFrameContext *const f = t->f;
 1562|   980k|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|   980k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|   980k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|   980k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|   980k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|   980k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|   980k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|   980k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|   980k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 354k, False: 626k]
  ------------------
 1571|   354k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 332k, False: 22.7k]
  |  Branch (1571:45): [True: 11.7k, False: 10.9k]
  ------------------
 1572|   343k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 330k, False: 13.0k]
  |  Branch (1572:45): [True: 6.54k, False: 6.50k]
  ------------------
 1573|   980k|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 626k, False: 354k]
  ------------------
 1574|   980k|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|   980k|    int res;
 1576|       |
 1577|       |    // prediction
 1578|   980k|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|   980k|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|   980k|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1581|   980k|    const ptrdiff_t uvdstoff =
 1582|   980k|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1583|   980k|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|   980k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   980k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 450k, False: 530k]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|   450k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 450k, False: 0]
  ------------------
 1586|   450k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|   450k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|   450k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 450k]
  ------------------
 1589|   450k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 133k, False: 316k]
  |  Branch (1589:42): [True: 267k, False: 133k]
  ------------------
 1590|   267k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|   267k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|   267k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|   267k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|   267k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 267k]
  ------------------
 1595|   267k|        }
 1596|   530k|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 449k, False: 80.4k]
  ------------------
 1597|   449k|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|   449k|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|   449k|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 288k, False: 161k]
  ------------------
 1601|   288k|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 189k, False: 98.7k]
  |  Branch (1601:44): [True: 13.6k, False: 175k]
  ------------------
 1602|   274k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 49.0k, False: 225k]
  |  Branch (1602:44): [True: 46.9k, False: 2.09k]
  ------------------
 1603|  60.6k|        {
 1604|  60.6k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|  60.6k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 46.9k, False: 13.6k]
  ------------------
 1606|  60.6k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|  60.6k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 60.6k]
  ------------------
 1608|   389k|        } else {
 1609|   389k|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|   389k|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|   389k|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 389k]
  ------------------
 1612|   389k|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 87.1k, False: 302k]
  ------------------
 1613|  87.1k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|  87.1k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 87.1k]
  ------------------
 1615|  87.1k|            }
 1616|   389k|        }
 1617|   449k|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 18.7k, False: 431k]
  ------------------
 1618|  18.7k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  18.7k|#define bitfn(x) x##_16bpc
  ------------------
 1619|  18.7k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 3.54k, False: 15.1k]
  ------------------
 1620|  15.1k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  18.7k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  18.7k|#define bitfn(x) x##_16bpc
  ------------------
 1622|  18.7k|            int angle = 0;
 1623|  18.7k|            const pixel *top_sb_edge = NULL;
 1624|  18.7k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 2.64k, False: 16.0k]
  ------------------
 1625|  2.64k|                top_sb_edge = f->ipred_edge[0];
 1626|  2.64k|                const int sby = t->by >> f->sb_shift;
 1627|  2.64k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  2.64k|            }
 1629|  18.7k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  18.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  18.7k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1630|  18.7k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  18.7k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  18.7k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  18.7k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  18.7k|                                                  HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  18.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1635|  18.7k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  18.7k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  18.7k|                                     HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  18.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1638|  18.7k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  18.7k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  18.7k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  18.7k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 14.0k, False: 4.68k]
  |  |  ------------------
  |  |   85|  18.7k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  18.7k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  18.7k|        }
 1641|       |
 1642|   449k|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 303k, False: 146k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   146k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 5.01k, False: 141k]
  |  Branch (1645:42): [True: 2.35k, False: 138k]
  ------------------
 1646|   146k|        refmvs_block *const *r;
 1647|   146k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 7.36k, False: 138k]
  ------------------
 1648|  7.36k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 7.36k, False: 0]
  ------------------
 1649|  7.36k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  7.36k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 5.01k, False: 2.35k]
  ------------------
 1651|  7.36k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 4.99k, False: 2.37k]
  ------------------
 1652|  7.36k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 5.01k, False: 2.35k]
  |  Branch (1652:29): [True: 2.64k, False: 2.37k]
  ------------------
 1653|  2.64k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  7.36k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   146k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 6.76k, False: 139k]
  ------------------
 1658|  6.76k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 6.76k, False: 0]
  ------------------
 1659|  6.76k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  6.76k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 4.47k, False: 2.29k]
  |  Branch (1660:29): [True: 2.15k, False: 2.31k]
  ------------------
 1661|  6.46k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 4.30k, False: 2.15k]
  ------------------
 1662|  4.30k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  4.30k|                             NULL, f->cur.stride[1],
 1664|  4.30k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  4.30k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  4.30k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  4.30k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  4.30k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 4.30k, False: 0]
  ------------------
 1669|  4.30k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  4.30k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 4.30k]
  ------------------
 1671|  4.30k|                }
 1672|  2.15k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1673|  2.15k|                h_off = 2;
 1674|  2.15k|            }
 1675|  6.76k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 4.47k, False: 2.29k]
  ------------------
 1676|  4.47k|                const enum Filter2d left_filter_2d =
 1677|  4.47k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|  13.4k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 8.94k, False: 4.47k]
  ------------------
 1679|  8.94k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  8.94k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  8.94k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  8.94k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  8.94k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  8.94k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 8.94k, False: 0]
  ------------------
 1685|  8.94k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  8.94k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 8.94k]
  ------------------
 1687|  8.94k|                }
 1688|  4.47k|                h_off = 2;
 1689|  4.47k|            }
 1690|  6.76k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 4.44k, False: 2.31k]
  ------------------
 1691|  4.44k|                const enum Filter2d top_filter_2d =
 1692|  4.44k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|  13.3k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 8.89k, False: 4.44k]
  ------------------
 1694|  8.89k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  8.89k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  8.89k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  8.89k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  8.89k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  8.89k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 8.89k, False: 0]
  ------------------
 1700|  8.89k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  8.89k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 8.89k]
  ------------------
 1702|  8.89k|                }
 1703|  4.44k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1704|  4.44k|            }
 1705|  20.2k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 13.5k, False: 6.76k]
  ------------------
 1706|  13.5k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|  13.5k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|  13.5k|                         refp, b->ref[0], filter_2d);
 1709|  13.5k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 13.5k]
  ------------------
 1710|  13.5k|            }
 1711|   139k|        } else {
 1712|   139k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 80.8k, False: 58.4k]
  ------------------
 1713|  80.8k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 36.1k, False: 44.6k]
  |  Branch (1713:48): [True: 4.10k, False: 32.0k]
  ------------------
 1714|  76.7k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 3.69k, False: 73.0k]
  |  Branch (1714:48): [True: 2.43k, False: 1.26k]
  ------------------
 1715|  6.53k|            {
 1716|  19.6k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 13.0k, False: 6.53k]
  ------------------
 1717|  13.0k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|  13.0k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|  13.0k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 4.86k, False: 8.21k]
  ------------------
 1720|  13.0k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|  13.0k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 13.0k]
  ------------------
 1722|  13.0k|                }
 1723|   132k|            } else {
 1724|   398k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 265k, False: 132k]
  ------------------
 1725|   265k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|   265k|                             NULL, f->cur.stride[1],
 1727|   265k|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|   265k|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|   265k|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|   265k|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 265k]
  ------------------
 1731|   265k|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 54.6k, False: 210k]
  ------------------
 1732|  54.6k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|  54.6k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|  54.6k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 54.6k]
  ------------------
 1735|  54.6k|                    }
 1736|   265k|                }
 1737|   132k|            }
 1738|   139k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 8.33k, False: 131k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  8.33k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  8.33k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  8.33k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 6.05k, False: 2.27k]
  |  |  ------------------
  |  |   85|  8.33k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  8.33k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|  24.9k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 16.6k, False: 8.33k]
  ------------------
 1745|  16.6k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  16.6k|#define bitfn(x) x##_16bpc
  ------------------
 1746|  16.6k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  16.6k|#define bitfn(x) x##_16bpc
  ------------------
 1747|  16.6k|                    enum IntraPredMode m =
 1748|  16.6k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 3.05k, False: 13.6k]
  ------------------
 1749|  13.6k|                        SMOOTH_PRED : b->interintra_mode;
 1750|  16.6k|                    int angle = 0;
 1751|  16.6k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|  16.6k|                    const pixel *top_sb_edge = NULL;
 1753|  16.6k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 3.04k, False: 13.6k]
  ------------------
 1754|  3.04k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  3.04k|                        const int sby = t->by >> f->sb_shift;
 1756|  3.04k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  3.04k|                    }
 1758|  16.6k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|  16.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  16.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1759|  16.6k|                                                          (t->bx >> ss_hor) >
 1760|  16.6k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|  16.6k|                                                          t->by >> ss_ver,
 1762|  16.6k|                                                          (t->by >> ss_ver) >
 1763|  16.6k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|  16.6k|                                                          ts->tiling.col_end >> ss_hor,
 1765|  16.6k|                                                          ts->tiling.row_end >> ss_ver,
 1766|  16.6k|                                                          0, uvdst, f->cur.stride[1],
 1767|  16.6k|                                                          top_sb_edge, m,
 1768|  16.6k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|  16.6k|                                                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  16.6k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1770|  16.6k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|  16.6k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|  16.6k|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  16.6k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1773|  16.6k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|  16.6k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|  16.6k|                }
 1776|  8.33k|            }
 1777|   139k|        }
 1778|       |
 1779|   449k|    skip_inter_chroma_pred: {}
 1780|   449k|        t->tl_4x4_filter = filter_2d;
 1781|   449k|    } else {
 1782|  80.4k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|  80.4k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|  80.4k|        int jnt_weight;
 1786|  80.4k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|  80.4k|        const uint8_t *mask;
 1788|       |
 1789|   241k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 160k, False: 80.4k]
  ------------------
 1790|   160k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   160k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 14.0k, False: 146k]
  |  Branch (1792:55): [True: 520, False: 13.5k]
  ------------------
 1793|    520|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|    520|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|    520|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 520]
  ------------------
 1796|   160k|            } else {
 1797|   160k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   160k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   160k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 160k]
  ------------------
 1800|   160k|            }
 1801|   160k|        }
 1802|  80.4k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 80.4k, False: 0]
  ------------------
 1803|  45.6k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 45.6k, False: 34.8k]
  ------------------
 1804|  45.6k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  45.6k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  45.6k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1806|  45.6k|            break;
 1807|  11.6k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 11.6k, False: 68.7k]
  ------------------
 1808|  11.6k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  11.6k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  11.6k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  11.6k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1811|  11.6k|            break;
 1812|  16.3k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 16.3k, False: 64.0k]
  ------------------
 1813|  16.3k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  16.3k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  16.3k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  16.3k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  16.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1817|  16.3k|            mask = seg_mask;
 1818|  16.3k|            break;
 1819|  6.76k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 6.76k, False: 73.6k]
  ------------------
 1820|  6.76k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  6.76k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  6.76k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  6.76k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  6.76k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  6.76k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  6.76k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1824|  6.76k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 4.97k, False: 1.79k]
  ------------------
 1825|  4.97k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  4.97k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  4.97k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  6.76k|            break;
 1827|  80.4k|        }
 1828|       |
 1829|       |        // chroma
 1830|   172k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 57.3k, False: 23.0k]
  |  Branch (1830:42): [True: 114k, False: 57.3k]
  ------------------
 1831|   344k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 229k, False: 114k]
  ------------------
 1832|   229k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   229k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 21.2k, False: 208k]
  ------------------
 1834|  21.2k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 20.1k, False: 1.08k]
  |  Branch (1834:45): [True: 370, False: 19.7k]
  ------------------
 1835|    370|                {
 1836|    370|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|    370|                                      b_dim, 1 + pl,
 1838|    370|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|    370|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 370]
  ------------------
 1840|   229k|                } else {
 1841|   229k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|   229k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|   229k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 229k]
  ------------------
 1844|   229k|                }
 1845|   229k|            }
 1846|   114k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|   114k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 114k, False: 0]
  ------------------
 1848|  65.8k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 65.8k, False: 48.8k]
  ------------------
 1849|  65.8k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|  65.8k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|  65.8k|                            HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  65.8k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1852|  65.8k|                break;
 1853|  21.1k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 21.1k, False: 93.5k]
  ------------------
 1854|  21.1k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  21.1k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  21.1k|                              HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  21.1k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1857|  21.1k|                break;
 1858|  9.94k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 9.94k, False: 104k]
  ------------------
 1859|  27.7k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 17.7k, False: 96.9k]
  ------------------
 1860|  27.7k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  27.7k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  27.7k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  27.7k|                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  27.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1864|  27.7k|                break;
 1865|   114k|            }
 1866|   114k|        }
 1867|  80.4k|    }
 1868|       |
 1869|   980k|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   980k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 980k]
  |  |  ------------------
  |  |   35|   980k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   980k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|   980k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|   980k|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 763k, False: 217k]
  ------------------
 1882|       |        // reset coef contexts
 1883|   763k|        BlockContext *const a = t->a;
 1884|   763k|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|   763k|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|   763k|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 183k, False: 580k]
  ------------------
 1887|   183k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   183k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   183k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   183k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   183k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   183k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   183k|        }
 1894|   763k|        return 0;
 1895|   763k|    }
 1896|       |
 1897|   217k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   217k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   217k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   437k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 220k, False: 217k]
  ------------------
 1902|   445k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 225k, False: 220k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   225k|            int y_off = !!init_y, y;
 1905|   225k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
 1906|   473k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 247k, False: 225k]
  ------------------
 1907|   247k|                 y += ytx->h, y_off++)
 1908|   247k|            {
 1909|   247k|                int x, x_off = !!init_x;
 1910|   627k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 379k, False: 247k]
  ------------------
 1911|   379k|                     x += ytx->w, x_off++)
 1912|   379k|                {
 1913|   379k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   379k|                                   x_off, y_off, &dst[x * 4]);
 1915|   379k|                    t->bx += ytx->w;
 1916|   379k|                }
 1917|   247k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
 1918|   247k|                t->bx -= x;
 1919|   247k|                t->by += ytx->h;
 1920|   247k|            }
 1921|   225k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
 1922|   225k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|   476k|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 158k, False: 66.8k]
  |  Branch (1925:46): [True: 317k, False: 158k]
  ------------------
 1926|   317k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   317k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
 1928|   317k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|   671k|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 354k, False: 317k]
  ------------------
 1930|   354k|                {
 1931|   354k|                    int x;
 1932|   354k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|   853k|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 499k, False: 354k]
  ------------------
 1934|   499k|                    {
 1935|   499k|                        coef *cf;
 1936|   499k|                        int eob;
 1937|   499k|                        enum TxfmType txtp;
 1938|   499k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 0, False: 499k]
  ------------------
 1939|      0|                            const int p = t->frame_thread.pass & 1;
 1940|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|      0|                            cf = ts->frame_thread[p].cf;
 1942|      0|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|      0|                            eob  = cbi >> 5;
 1944|      0|                            txtp = cbi & 0x1f;
 1945|   499k|                        } else {
 1946|   499k|                            uint8_t cf_ctx;
 1947|   499k|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|   499k|#define bitfn(x) x##_16bpc
  ------------------
 1948|   499k|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|   499k|                                                        bx4 + (x << ss_hor)];
 1950|   499k|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|   499k|                                               &t->l.ccoef[pl][cby4 + y],
 1952|   499k|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|   499k|                                               cf, &txtp, &cf_ctx);
 1954|   499k|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   499k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 499k]
  |  |  ------------------
  |  |   35|   499k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   499k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|   499k|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|   499k|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|   499k|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|   499k|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|   499k|                        }
 1963|   499k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 158k, False: 341k]
  ------------------
 1964|   158k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   158k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 158k]
  |  |  ------------------
  |  |   35|   158k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   158k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   158k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   158k|                                              [txtp](&uvdst[4 * x],
 1968|   158k|                                                     f->cur.stride[1],
 1969|   158k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   158k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1970|   158k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   158k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 158k]
  |  |  ------------------
  |  |   35|   158k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   158k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   158k|                        }
 1974|   499k|                        t->bx += uvtx->w << ss_hor;
 1975|   499k|                    }
 1976|   354k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
 1977|   354k|                    t->bx -= x << ss_hor;
 1978|   354k|                    t->by += uvtx->h << ss_ver;
 1979|   354k|                }
 1980|   317k|                t->by -= y << ss_ver;
 1981|   317k|            }
 1982|   225k|        }
 1983|   220k|    }
 1984|   217k|    return 0;
 1985|   980k|}
dav1d_filter_sbrow_deblock_cols_16bpc:
 1987|  58.2k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|  58.2k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 0, False: 58.2k]
  ------------------
 1989|  58.2k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 40.4k, False: 17.7k]
  |  Branch (1989:50): [True: 36.2k, False: 4.23k]
  ------------------
 1990|  36.2k|    {
 1991|  36.2k|        return;
 1992|  36.2k|    }
 1993|  21.9k|    const int y = sby * f->sb_step * 4;
 1994|  21.9k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|  21.9k|    pixel *const p[3] = {
 1996|  21.9k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 1997|  21.9k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 1998|  21.9k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 1999|  21.9k|    };
 2000|  21.9k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|  21.9k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|  21.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  21.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2002|  21.9k|                                        f->lf.start_of_tile_row[sby]);
 2003|  21.9k|}
dav1d_filter_sbrow_deblock_rows_16bpc:
 2005|  58.2k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|  58.2k|    const int y = sby * f->sb_step * 4;
 2007|  58.2k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|  58.2k|    pixel *const p[3] = {
 2009|  58.2k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2010|  58.2k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2011|  58.2k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2012|  58.2k|    };
 2013|  58.2k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|  58.2k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 58.2k, False: 0]
  ------------------
 2015|  58.2k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 17.7k, False: 40.4k]
  |  Branch (2015:49): [True: 4.23k, False: 36.2k]
  ------------------
 2016|  21.9k|    {
 2017|  21.9k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|  21.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  21.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2018|  21.9k|    }
 2019|  58.2k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 26.5k, False: 31.7k]
  |  Branch (2019:29): [True: 5.55k, False: 26.1k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|  32.0k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|  32.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  32.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2022|  32.0k|    }
 2023|  58.2k|}
dav1d_filter_sbrow_cdef_16bpc:
 2025|  26.5k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|  26.5k|    const Dav1dFrameContext *const f = tc->f;
 2027|  26.5k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 26.5k]
  ------------------
 2028|  26.5k|    const int sbsz = f->sb_step;
 2029|  26.5k|    const int y = sby * sbsz * 4;
 2030|  26.5k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|  26.5k|    pixel *const p[3] = {
 2032|  26.5k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2033|  26.5k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2034|  26.5k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2035|  26.5k|    };
 2036|  26.5k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|  26.5k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|  26.5k|    const int start = sby * sbsz;
 2039|  26.5k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 23.7k, False: 2.73k]
  ------------------
 2040|  23.7k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|  23.7k|        pixel *p_up[3] = {
 2042|  23.7k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
 2043|  23.7k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2044|  23.7k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2045|  23.7k|        };
 2046|  23.7k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|  23.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  23.7k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2047|  23.7k|    }
 2048|  26.5k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|  26.5k|    const int end = imin(start + n_blks, f->bh);
 2050|  26.5k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|  26.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  26.5k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2051|  26.5k|}
dav1d_filter_sbrow_resize_16bpc:
 2053|  4.21k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  4.21k|    const int sbsz = f->sb_step;
 2055|  4.21k|    const int y = sby * sbsz * 4;
 2056|  4.21k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  4.21k|    const pixel *const p[3] = {
 2058|  4.21k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2059|  4.21k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2060|  4.21k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2061|  4.21k|    };
 2062|  4.21k|    pixel *const sr_p[3] = {
 2063|  4.21k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2064|  4.21k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2065|  4.21k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2066|  4.21k|    };
 2067|  4.21k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  14.5k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 10.3k, False: 4.21k]
  ------------------
 2069|  10.3k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 6.12k, False: 4.21k]
  |  Branch (2069:34): [True: 2.33k, False: 3.78k]
  ------------------
 2070|  10.3k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  10.3k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  10.3k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
 2073|  10.3k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  10.3k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
 2075|  10.3k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  10.3k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 6.12k, False: 4.21k]
  |  Branch (2076:34): [True: 3.73k, False: 2.39k]
  ------------------
 2077|  10.3k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  10.3k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  10.3k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  10.3k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  10.3k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  10.3k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  10.3k|                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  10.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 2085|  10.3k|    }
 2086|  4.21k|}
dav1d_filter_sbrow_lr_16bpc:
 2088|  15.1k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|  15.1k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 15.1k]
  ------------------
 2090|  15.1k|    const int y = sby * f->sb_step * 4;
 2091|  15.1k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|  15.1k|    pixel *const sr_p[3] = {
 2093|  15.1k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2094|  15.1k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2095|  15.1k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2096|  15.1k|    };
 2097|  15.1k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|  15.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  15.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2098|  15.1k|}
dav1d_filter_sbrow_16bpc:
 2100|  58.2k|void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
 2101|  58.2k|    bytefn(dav1d_filter_sbrow_deblock_cols)(f, sby);
  ------------------
  |  |   87|  58.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  58.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2102|  58.2k|    bytefn(dav1d_filter_sbrow_deblock_rows)(f, sby);
  ------------------
  |  |   87|  58.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  58.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2103|  58.2k|    if (f->seq_hdr->cdef)
  ------------------
  |  Branch (2103:9): [True: 26.5k, False: 31.7k]
  ------------------
 2104|  26.5k|        bytefn(dav1d_filter_sbrow_cdef)(f->c->tc, sby);
  ------------------
  |  |   87|  26.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  26.5k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2105|  58.2k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
  ------------------
  |  Branch (2105:9): [True: 4.21k, False: 53.9k]
  ------------------
 2106|  4.21k|        bytefn(dav1d_filter_sbrow_resize)(f, sby);
  ------------------
  |  |   87|  4.21k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  4.21k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2107|  58.2k|    if (f->lf.restore_planes)
  ------------------
  |  Branch (2107:9): [True: 15.1k, False: 43.0k]
  ------------------
 2108|  15.1k|        bytefn(dav1d_filter_sbrow_lr)(f, sby);
  ------------------
  |  |   87|  15.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  15.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2109|  58.2k|}
dav1d_backup_ipred_edge_16bpc:
 2111|  68.5k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|  68.5k|    const Dav1dFrameContext *const f = t->f;
 2113|  68.5k|    Dav1dTileState *const ts = t->ts;
 2114|  68.5k|    const int sby = t->by >> f->sb_shift;
 2115|  68.5k|    const int sby_off = f->sb128w * 128 * sby;
 2116|  68.5k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|  68.5k|    const pixel *const y =
 2119|  68.5k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|  68.5k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
 2121|  68.5k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   65|  68.5k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2122|  68.5k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|  68.5k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 31.8k, False: 36.6k]
  ------------------
 2125|  31.8k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|  31.8k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|  31.8k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|  31.8k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
 2130|  95.6k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 63.7k, False: 31.8k]
  ------------------
 2131|  63.7k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   65|  63.7k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2132|  31.8k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|  31.8k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|  31.8k|    }
 2135|  68.5k|}
dav1d_copy_pal_block_y_16bpc:
 2141|  51.7k|{
 2142|  51.7k|    const Dav1dFrameContext *const f = t->f;
 2143|  51.7k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 0, False: 51.7k]
  ------------------
 2144|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|      0|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  51.7k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  51.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  51.7k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2147|   284k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 232k, False: 51.7k]
  ------------------
 2148|   232k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   232k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   232k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2149|   196k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 144k, False: 51.7k]
  ------------------
 2150|   144k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   144k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   144k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2151|  51.7k|}
dav1d_copy_pal_block_uv_16bpc:
 2157|  11.2k|{
 2158|  11.2k|    const Dav1dFrameContext *const f = t->f;
 2159|  11.2k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 0, False: 11.2k]
  ------------------
 2160|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|      0|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  11.2k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|  11.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  11.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  33.8k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 22.5k, False: 11.2k]
  ------------------
 2165|   129k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 107k, False: 22.5k]
  ------------------
 2166|   107k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|   107k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   107k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2167|  97.7k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 75.1k, False: 22.5k]
  ------------------
 2168|  75.1k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  75.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  75.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2169|  22.5k|    }
 2170|  11.2k|}
dav1d_read_pal_plane_16bpc:
 2175|  63.0k|{
 2176|  63.0k|    Dav1dTileState *const ts = t->ts;
 2177|  63.0k|    const Dav1dFrameContext *const f = t->f;
 2178|  63.0k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  63.0k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  63.0k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  63.0k|    pixel cache[16], used_cache[8];
 2181|  63.0k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 11.2k, False: 51.7k]
  ------------------
 2182|  63.0k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  63.0k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 48.6k, False: 14.4k]
  |  Branch (2184:30): [True: 9.15k, False: 39.4k]
  ------------------
 2185|  63.0k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  63.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  63.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2186|  63.0k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  63.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  63.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|   121k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 80.4k, False: 41.2k]
  |  Branch (2189:23): [True: 58.7k, False: 21.7k]
  ------------------
 2190|  58.7k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 27.4k, False: 31.2k]
  ------------------
 2191|  27.4k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 5.47k, False: 22.0k]
  |  Branch (2191:29): [True: 21.1k, False: 861]
  ------------------
 2192|  26.6k|                cache[n_cache++] = *l;
 2193|  27.4k|            l++;
 2194|  27.4k|            l_cache--;
 2195|  31.2k|        } else {
 2196|  31.2k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 10.0k, False: 21.1k]
  ------------------
 2197|  10.0k|                l++;
 2198|  10.0k|                l_cache--;
 2199|  10.0k|            }
 2200|  31.2k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 4.23k, False: 26.9k]
  |  Branch (2200:29): [True: 26.7k, False: 246]
  ------------------
 2201|  30.9k|                cache[n_cache++] = *a;
 2202|  31.2k|            a++;
 2203|  31.2k|            a_cache--;
 2204|  31.2k|        }
 2205|  58.7k|    }
 2206|  63.0k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 21.7k, False: 41.2k]
  ------------------
 2207|  96.8k|        do {
 2208|  96.8k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 18.2k, False: 78.6k]
  |  Branch (2208:29): [True: 65.4k, False: 13.1k]
  ------------------
 2209|  83.6k|                cache[n_cache++] = *l;
 2210|  96.8k|            l++;
 2211|  96.8k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 75.0k, False: 21.7k]
  ------------------
 2212|  41.2k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 15.3k, False: 25.9k]
  ------------------
 2213|  70.0k|        do {
 2214|  70.0k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 10.7k, False: 59.3k]
  |  Branch (2214:29): [True: 50.6k, False: 8.63k]
  ------------------
 2215|  61.4k|                cache[n_cache++] = *a;
 2216|  70.0k|            a++;
 2217|  70.0k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 54.7k, False: 15.3k]
  ------------------
 2218|  15.3k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  63.0k|    int i = 0;
 2222|   237k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 183k, False: 53.9k]
  |  Branch (2222:36): [True: 174k, False: 9.06k]
  ------------------
 2223|   174k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|   174k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 78.9k, False: 95.1k]
  ------------------
 2224|  78.9k|            used_cache[i++] = cache[n];
 2225|  63.0k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  63.0k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 0, False: 63.0k]
  ------------------
 2229|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|      0|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  63.0k|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|  63.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  63.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2232|  63.0k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 52.0k, False: 10.9k]
  ------------------
 2233|  52.0k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [Folded, False: 52.0k]
  ------------------
 2234|  52.0k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  52.0k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 46.3k, False: 5.70k]
  ------------------
 2237|  46.3k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  46.3k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|   122k|            do {
 2241|   122k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|   122k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|   122k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 17.5k, False: 104k]
  ------------------
 2244|  49.5k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 32.0k, False: 17.5k]
  ------------------
 2245|  32.0k|                        pal[i] = max;
 2246|  17.5k|                    break;
 2247|  17.5k|                }
 2248|   104k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|   104k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 75.9k, False: 28.8k]
  ------------------
 2250|  46.3k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  52.0k|        int n = 0, m = n_used_cache;
 2254|   307k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 255k, False: 52.0k]
  ------------------
 2255|   255k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 83.6k, False: 171k]
  |  Branch (2255:38): [True: 17.8k, False: 65.7k]
  |  Branch (2255:53): [True: 30.9k, False: 34.8k]
  ------------------
 2256|  48.8k|                pal[i] = used_cache[n++];
 2257|   206k|            } else {
 2258|   206k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 206k, False: 0]
  ------------------
 2259|   206k|                pal[i] = pal[m++];
 2260|   206k|            }
 2261|   255k|        }
 2262|  52.0k|    } else {
 2263|  10.9k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  10.9k|    }
 2265|       |
 2266|  63.0k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  63.0k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 63.0k]
  |  |  ------------------
  |  |   35|  63.0k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  63.0k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  63.0k|}
dav1d_read_pal_uv_16bpc:
 2280|  11.2k|{
 2281|  11.2k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  11.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  11.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  11.2k|    Dav1dTileState *const ts = t->ts;
 2285|  11.2k|    const Dav1dFrameContext *const f = t->f;
 2286|  11.2k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 0, False: 11.2k]
  ------------------
 2287|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|      0|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  11.2k|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  11.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  11.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2290|  11.2k|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [Folded, False: 11.2k]
  ------------------
 2291|  11.2k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  11.2k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 5.54k, False: 5.74k]
  ------------------
 2292|  5.54k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  5.54k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  5.54k|        const int max = (1 << bpc) - 1;
 2295|  26.1k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 20.6k, False: 5.54k]
  ------------------
 2296|  20.6k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  20.6k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  20.3k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 20.3k, False: 215]
  |  Branch (2297:26): [True: 10.1k, False: 10.2k]
  ------------------
 2298|  20.6k|            prev = pal[i] = (prev + delta) & max;
 2299|  20.6k|        }
 2300|  5.74k|    } else {
 2301|  28.5k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 22.8k, False: 5.74k]
  ------------------
 2302|  22.8k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  5.74k|    }
 2304|  11.2k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  11.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 11.2k]
  |  |  ------------------
  |  |   35|  11.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  11.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  11.2k|}

dav1d_ref_create:
   37|  72.9k|Dav1dRef *dav1d_ref_create(const enum AllocationType type, size_t size) {
   38|  72.9k|    size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
   39|       |
   40|  72.9k|    uint8_t *const data = dav1d_alloc_aligned(type, size + sizeof(Dav1dRef), 64);
  ------------------
  |  |  134|  72.9k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
   41|  72.9k|    if (!data) return NULL;
  ------------------
  |  Branch (41:9): [True: 0, False: 72.9k]
  ------------------
   42|       |
   43|  72.9k|    Dav1dRef *const res = (Dav1dRef*)(data + size);
   44|  72.9k|    res->const_data = res->user_data = res->data = data;
   45|  72.9k|    atomic_init(&res->ref_cnt, 1);
   46|  72.9k|    res->free_ref = 0;
   47|  72.9k|    res->free_callback = default_free_callback;
   48|       |
   49|  72.9k|    return res;
   50|  72.9k|}
dav1d_ref_create_using_pool:
   56|   111k|Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *const pool, size_t size) {
   57|   111k|    void *const buf = dav1d_mem_pool_pop(pool, size);
   58|   111k|    if (!buf) return NULL;
  ------------------
  |  Branch (58:9): [True: 0, False: 111k]
  ------------------
   59|       |
   60|       |    /* Store Dav1dRef inside the Dav1dMemPoolBuffer alignment padding */
   61|   111k|    assert(sizeof(Dav1dMemPoolBuffer) + sizeof(Dav1dRef) <= 64);
  ------------------
  |  Branch (61:5): [True: 111k, Folded]
  ------------------
   62|   111k|    Dav1dRef *const res = &((Dav1dRef*)buf)[-1];
   63|   111k|    res->data = buf;
   64|   111k|    res->const_data = pool;
   65|   111k|    atomic_init(&res->ref_cnt, 1);
   66|   111k|    res->free_ref = 0;
   67|   111k|    res->free_callback = pool_free_callback;
   68|   111k|    res->user_data = buf;
   69|       |
   70|   111k|    return res;
   71|   111k|}
dav1d_ref_dec:
   73|  7.92M|void dav1d_ref_dec(Dav1dRef **const pref) {
   74|  7.92M|    assert(pref != NULL);
  ------------------
  |  Branch (74:5): [True: 7.92M, False: 0]
  ------------------
   75|       |
   76|  7.92M|    Dav1dRef *const ref = *pref;
   77|  7.92M|    if (!ref) return;
  ------------------
  |  Branch (77:9): [True: 5.75M, False: 2.17M]
  ------------------
   78|       |
   79|  2.17M|    *pref = NULL;
   80|  2.17M|    if (atomic_fetch_sub(&ref->ref_cnt, 1) == 1) {
  ------------------
  |  Branch (80:9): [True: 235k, False: 1.93M]
  ------------------
   81|   235k|        const int free_ref = ref->free_ref;
   82|   235k|        ref->free_callback(ref->const_data, ref->user_data);
   83|   235k|        if (free_ref) dav1d_free(ref);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  |  Branch (83:13): [True: 0, False: 235k]
  ------------------
   84|   235k|    }
   85|  2.17M|}
ref.c:default_free_callback:
   32|  72.9k|static void default_free_callback(const uint8_t *const data, void *const user_data) {
   33|  72.9k|    assert(data == user_data);
  ------------------
  |  Branch (33:5): [True: 72.9k, False: 0]
  ------------------
   34|  72.9k|    dav1d_free_aligned(user_data);
  ------------------
  |  |  136|  72.9k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
   35|  72.9k|}
ref.c:pool_free_callback:
   52|   111k|static void pool_free_callback(const uint8_t *const data, void *const user_data) {
   53|   111k|    dav1d_mem_pool_push((Dav1dMemPool*)data, user_data);
   54|   111k|}

obu.c:dav1d_ref_is_writable:
   73|  71.8k|static inline int dav1d_ref_is_writable(Dav1dRef *const ref) {
   74|  71.8k|    return atomic_load(&ref->ref_cnt) == 1 && ref->data;
  ------------------
  |  Branch (74:12): [True: 71.8k, False: 0]
  |  Branch (74:47): [True: 71.8k, False: 0]
  ------------------
   75|  71.8k|}
obu.c:dav1d_ref_init:
   59|    622|{
   60|    622|    ref->data = NULL;
   61|    622|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|    622|    ref->free_ref = free_ref;
   64|    622|    ref->free_callback = free_callback;
   65|    622|    ref->user_data = user_data;
   66|    622|    return ref;
   67|    622|}
obu.c:dav1d_ref_inc:
   69|  2.68k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  2.68k|}
picture.c:dav1d_ref_inc:
   69|  1.56M|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  1.56M|}
picture.c:dav1d_ref_init:
   59|  50.5k|{
   60|  50.5k|    ref->data = NULL;
   61|  50.5k|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|  50.5k|    ref->free_ref = free_ref;
   64|  50.5k|    ref->free_callback = free_callback;
   65|  50.5k|    ref->user_data = user_data;
   66|  50.5k|    return ref;
   67|  50.5k|}
cdf.c:dav1d_ref_inc:
   69|  82.2k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  82.2k|}
data.c:dav1d_ref_inc:
   69|   118k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   118k|}
decode.c:dav1d_ref_inc:
   69|   172k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   172k|}

dav1d_refmvs_find:
  354|  1.76M|{
  355|  1.76M|    const refmvs_frame *const rf = rt->rf;
  356|  1.76M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  357|  1.76M|    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
  358|  1.76M|    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
  359|  1.76M|    mv gmv[2], tgmv[2];
  360|       |
  361|  1.76M|    *cnt = 0;
  362|  1.76M|    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
  ------------------
  |  Branch (362:5): [True: 1.76M, False: 0]
  |  Branch (362:5): [True: 1.76M, False: 0]
  |  Branch (362:5): [True: 1.76M, False: 0]
  |  Branch (362:5): [True: 1.76M, False: 0]
  ------------------
  363|  1.76M|           ref.ref[1] >= -1 && ref.ref[1] <= 8);
  364|  1.76M|    if (ref.ref[0] > 0) {
  ------------------
  |  Branch (364:9): [True: 1.14M, False: 620k]
  ------------------
  365|  1.14M|        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
  366|  1.14M|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  367|  1.14M|        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (367:18): [True: 189k, False: 951k]
  ------------------
  368|   951k|                 tgmv[0] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   951k|#define INVALID_MV 0x80008000
  ------------------
  369|  1.14M|    } else {
  370|   620k|        tgmv[0] = (mv) { .n = 0 };
  371|   620k|        gmv[0] = (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   620k|#define INVALID_MV 0x80008000
  ------------------
  372|   620k|    }
  373|  1.76M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (373:9): [True: 213k, False: 1.54M]
  ------------------
  374|   213k|        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
  375|   213k|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  376|   213k|        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (376:18): [True: 27.3k, False: 185k]
  ------------------
  377|   185k|                 tgmv[1] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   185k|#define INVALID_MV 0x80008000
  ------------------
  378|   213k|    }
  379|       |
  380|       |    // top
  381|  1.76M|    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
  382|  1.76M|    unsigned max_rows = 0, n_rows = ~0;
  383|  1.76M|    const refmvs_block *b_top;
  384|  1.76M|    if (by4 > rt->tile_row.start) {
  ------------------
  |  Branch (384:9): [True: 1.31M, False: 448k]
  ------------------
  385|  1.31M|        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
  386|  1.31M|        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
  387|  1.31M|        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
  388|  1.31M|                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (388:46): [True: 115k, False: 1.19M]
  ------------------
  389|  1.31M|                          &have_newmv, &have_row_mvs);
  390|  1.31M|    }
  391|       |
  392|       |    // left
  393|  1.76M|    unsigned max_cols = 0, n_cols = ~0U;
  394|  1.76M|    refmvs_block *const *b_left;
  395|  1.76M|    if (bx4 > rt->tile_col.start) {
  ------------------
  |  Branch (395:9): [True: 1.68M, False: 77.8k]
  ------------------
  396|  1.68M|        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
  397|  1.68M|        b_left = &rt->r[(by4 & 31) + 5];
  398|  1.68M|        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
  399|  1.68M|                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (399:55): [True: 199k, False: 1.48M]
  ------------------
  400|  1.68M|                          &have_newmv, &have_col_mvs);
  401|  1.68M|    }
  402|       |
  403|       |    // top/right
  404|  1.76M|    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
  ------------------
  |  Branch (404:9): [True: 1.31M, False: 448k]
  |  Branch (404:26): [True: 781k, False: 531k]
  ------------------
  405|   781k|        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
  ------------------
  |  Branch (405:9): [True: 749k, False: 32.2k]
  |  Branch (405:33): [True: 705k, False: 44.0k]
  ------------------
  406|   705k|    {
  407|   705k|        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
  408|   705k|                              &have_newmv, &have_row_mvs);
  409|   705k|    }
  410|       |
  411|  1.76M|    const int nearest_match = have_col_mvs + have_row_mvs;
  412|  1.76M|    const int nearest_cnt = *cnt;
  413|  3.95M|    for (int n = 0; n < nearest_cnt; n++)
  ------------------
  |  Branch (413:21): [True: 2.18M, False: 1.76M]
  ------------------
  414|  2.18M|        mvstack[n].weight += 640;
  415|       |
  416|       |    // temporal
  417|  1.76M|    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
  418|  1.76M|    if (rf->use_ref_frame_mvs) {
  ------------------
  |  Branch (418:9): [True: 306k, False: 1.45M]
  ------------------
  419|   306k|        const ptrdiff_t stride = rf->rp_stride;
  420|   306k|        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
  421|   306k|        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
  422|   306k|        const refmvs_temporal_block *rb = rbi;
  423|   306k|        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
  ------------------
  |  Branch (423:28): [True: 12.7k, False: 294k]
  |  Branch (423:56): [True: 15.7k, False: 291k]
  ------------------
  424|   306k|        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
  425|   829k|        for (int y = 0; y < h8; y += step_v) {
  ------------------
  |  Branch (425:25): [True: 523k, False: 306k]
  ------------------
  426|  1.59M|            for (int x = 0; x < w8; x+= step_h) {
  ------------------
  |  Branch (426:29): [True: 1.07M, False: 523k]
  ------------------
  427|  1.07M|                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
  428|  1.07M|                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
  ------------------
  |  Branch (428:40): [True: 306k, False: 769k]
  ------------------
  429|  1.07M|            }
  430|   523k|            rb += stride * step_v;
  431|   523k|        }
  432|   306k|        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
  ------------------
  |  Branch (432:13): [True: 222k, False: 84.2k]
  |  Branch (432:36): [True: 204k, False: 17.6k]
  ------------------
  433|   204k|            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
  434|   204k|            rb = &rbi[bh8 * stride];
  435|   204k|            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
  436|   204k|                                                    (by8 & ~7) + 8);
  437|   204k|            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
  ------------------
  |  Branch (437:17): [True: 143k, False: 61.5k]
  |  Branch (437:31): [True: 109k, False: 33.5k]
  ------------------
  438|   109k|                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
  439|   109k|                                       NULL, NULL);
  440|   109k|            }
  441|   204k|            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
  ------------------
  |  Branch (441:17): [True: 153k, False: 50.8k]
  ------------------
  442|   153k|                if (has_bottom) {
  ------------------
  |  Branch (442:21): [True: 108k, False: 45.1k]
  ------------------
  443|   108k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
  444|   108k|                                           NULL, NULL);
  445|   108k|                }
  446|   153k|                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
  ------------------
  |  Branch (446:21): [True: 153k, False: 741]
  ------------------
  447|   153k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
  448|   153k|                                           ref, NULL, NULL);
  449|   153k|                }
  450|   153k|            }
  451|   204k|        }
  452|   306k|    }
  453|  1.76M|    assert(*cnt <= 8);
  ------------------
  |  Branch (453:5): [True: 1.76M, False: 0]
  ------------------
  454|       |
  455|       |    // top/left (which, confusingly, is part of "secondary" references)
  456|  1.76M|    int have_dummy_newmv_match;
  457|  1.76M|    if ((n_rows | n_cols) != ~0U) {
  ------------------
  |  Branch (457:9): [True: 1.24M, False: 513k]
  ------------------
  458|  1.24M|        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
  459|  1.24M|                              &have_dummy_newmv_match, &have_row_mvs);
  460|  1.24M|    }
  461|       |
  462|       |    // "secondary" (non-direct neighbour) top & left edges
  463|       |    // what is different about secondary is that everything is now in 8x8 resolution
  464|  5.28M|    for (int n = 2; n <= 3; n++) {
  ------------------
  |  Branch (464:21): [True: 3.52M, False: 1.76M]
  ------------------
  465|  3.52M|        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
  ------------------
  |  Branch (465:13): [True: 1.85M, False: 1.66M]
  |  Branch (465:38): [True: 1.26M, False: 591k]
  ------------------
  466|  1.26M|            n_rows += scan_row(mvstack, cnt, ref, gmv,
  467|  1.26M|                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
  468|  1.26M|                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (468:59): [True: 19.8k, False: 1.24M]
  ------------------
  469|  1.26M|                               &have_dummy_newmv_match, &have_row_mvs);
  470|  1.26M|        }
  471|       |
  472|  3.52M|        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
  ------------------
  |  Branch (472:13): [True: 2.53M, False: 983k]
  |  Branch (472:38): [True: 1.94M, False: 595k]
  ------------------
  473|  1.94M|            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
  474|  1.94M|                               bh4, h4, (bx4 - n * 2 + 1) | 1,
  475|  1.94M|                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (475:50): [True: 135k, False: 1.80M]
  ------------------
  476|  1.94M|                               &have_dummy_newmv_match, &have_col_mvs);
  477|  1.94M|        }
  478|  3.52M|    }
  479|  1.76M|    assert(*cnt <= 8);
  ------------------
  |  Branch (479:5): [True: 1.76M, False: 0]
  ------------------
  480|       |
  481|  1.76M|    const int ref_match_count = have_col_mvs + have_row_mvs;
  482|       |
  483|       |    // context build-up
  484|  1.76M|    int refmv_ctx, newmv_ctx;
  485|  1.76M|    switch (nearest_match) {
  ------------------
  |  Branch (485:13): [True: 1.76M, False: 0]
  ------------------
  486|   202k|    case 0:
  ------------------
  |  Branch (486:5): [True: 202k, False: 1.55M]
  ------------------
  487|   202k|        refmv_ctx = imin(2, ref_match_count);
  488|   202k|        newmv_ctx = ref_match_count > 0;
  489|   202k|        break;
  490|   696k|    case 1:
  ------------------
  |  Branch (490:5): [True: 696k, False: 1.06M]
  ------------------
  491|   696k|        refmv_ctx = imin(ref_match_count * 3, 4);
  492|   696k|        newmv_ctx = 3 - have_newmv;
  493|   696k|        break;
  494|   861k|    case 2:
  ------------------
  |  Branch (494:5): [True: 861k, False: 899k]
  ------------------
  495|   861k|        refmv_ctx = 5;
  496|   861k|        newmv_ctx = 5 - have_newmv;
  497|   861k|        break;
  498|  1.76M|    }
  499|       |
  500|       |    // sorting (nearest, then "secondary")
  501|  1.76M|    int len = nearest_cnt;
  502|  3.61M|    while (len) {
  ------------------
  |  Branch (502:12): [True: 1.85M, False: 1.76M]
  ------------------
  503|  1.85M|        int last = 0;
  504|  2.59M|        for (int n = 1; n < len; n++) {
  ------------------
  |  Branch (504:25): [True: 747k, False: 1.85M]
  ------------------
  505|   747k|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (505:17): [True: 317k, False: 430k]
  ------------------
  506|   317k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  507|   317k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   317k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 317k]
  |  |  ------------------
  ------------------
  508|   317k|                last = n;
  509|   317k|            }
  510|   747k|        }
  511|  1.85M|        len = last;
  512|  1.85M|    }
  513|  1.76M|    len = *cnt;
  514|  2.80M|    while (len > nearest_cnt) {
  ------------------
  |  Branch (514:12): [True: 1.04M, False: 1.76M]
  ------------------
  515|  1.04M|        int last = nearest_cnt;
  516|  1.73M|        for (int n = nearest_cnt + 1; n < len; n++) {
  ------------------
  |  Branch (516:39): [True: 689k, False: 1.04M]
  ------------------
  517|   689k|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (517:17): [True: 202k, False: 487k]
  ------------------
  518|   202k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   202k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 202k]
  |  |  ------------------
  ------------------
  519|   202k|#undef EXCHANGE
  520|   202k|                last = n;
  521|   202k|            }
  522|   689k|        }
  523|  1.04M|        len = last;
  524|  1.04M|    }
  525|       |
  526|  1.76M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (526:9): [True: 213k, False: 1.54M]
  ------------------
  527|   213k|        if (*cnt < 2) {
  ------------------
  |  Branch (527:13): [True: 131k, False: 81.6k]
  ------------------
  528|   131k|            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
  529|   131k|            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
  530|   131k|            const int sz4 = imin(w4, h4);
  531|   131k|            refmvs_candidate *const same = &mvstack[*cnt];
  532|   131k|            int same_count[4] = { 0 };
  533|       |
  534|       |            // non-self references in top
  535|   236k|            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
  ------------------
  |  Branch (535:17): [True: 113k, False: 18.3k]
  |  Branch (535:48): [True: 123k, False: 113k]
  ------------------
  536|   123k|                const refmvs_block *const cand_b = &b_top[x];
  537|   123k|                add_compound_extended_candidate(same, same_count, cand_b,
  538|   123k|                                                sign0, sign1, ref, rf->sign_bias);
  539|   123k|                x += dav1d_block_dimensions[cand_b->bs][0];
  540|   123k|            }
  541|       |
  542|       |            // non-self references in left
  543|   267k|            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
  ------------------
  |  Branch (543:17): [True: 124k, False: 7.34k]
  |  Branch (543:48): [True: 143k, False: 124k]
  ------------------
  544|   143k|                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  545|   143k|                add_compound_extended_candidate(same, same_count, cand_b,
  546|   143k|                                                sign0, sign1, ref, rf->sign_bias);
  547|   143k|                y += dav1d_block_dimensions[cand_b->bs][1];
  548|   143k|            }
  549|       |
  550|   131k|            refmvs_candidate *const diff = &same[2];
  551|   131k|            const int *const diff_count = &same_count[2];
  552|       |
  553|       |            // merge together
  554|   394k|            for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (554:29): [True: 262k, False: 131k]
  ------------------
  555|   262k|                int m = same_count[n];
  556|       |
  557|   262k|                if (m >= 2) continue;
  ------------------
  |  Branch (557:21): [True: 89.1k, False: 173k]
  ------------------
  558|       |
  559|   173k|                const int l = diff_count[n];
  560|   173k|                if (l) {
  ------------------
  |  Branch (560:21): [True: 161k, False: 12.4k]
  ------------------
  561|   161k|                    same[m].mv.mv[n] = diff[0].mv.mv[n];
  562|   161k|                    if (++m == 2) continue;
  ------------------
  |  Branch (562:25): [True: 106k, False: 54.4k]
  ------------------
  563|  54.4k|                    if (l == 2) {
  ------------------
  |  Branch (563:25): [True: 45.8k, False: 8.65k]
  ------------------
  564|  45.8k|                        same[1].mv.mv[n] = diff[1].mv.mv[n];
  565|  45.8k|                        continue;
  566|  45.8k|                    }
  567|  54.4k|                }
  568|  27.7k|                do {
  569|  27.7k|                    same[m].mv.mv[n] = tgmv[n];
  570|  27.7k|                } while (++m < 2);
  ------------------
  |  Branch (570:26): [True: 6.65k, False: 21.1k]
  ------------------
  571|  21.1k|            }
  572|       |
  573|       |            // if the first extended was the same as the non-extended one,
  574|       |            // then replace it with the second extended one
  575|   131k|            int n = *cnt;
  576|   131k|            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
  ------------------
  |  Branch (576:17): [True: 80.0k, False: 51.3k]
  |  Branch (576:27): [True: 59.2k, False: 20.7k]
  ------------------
  577|  59.2k|                mvstack[1].mv = mvstack[2].mv;
  578|   182k|            do {
  579|   182k|                mvstack[n].weight = 2;
  580|   182k|            } while (++n < 2);
  ------------------
  |  Branch (580:22): [True: 51.3k, False: 131k]
  ------------------
  581|   131k|            *cnt = 2;
  582|   131k|        }
  583|       |
  584|       |        // clamping
  585|   213k|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  586|   213k|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  587|   213k|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  588|   213k|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  589|       |
  590|   213k|        const int n_refmvs = *cnt;
  591|   213k|        int n = 0;
  592|   498k|        do {
  593|   498k|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  594|   498k|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  595|   498k|            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
  596|   498k|            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
  597|   498k|        } while (++n < n_refmvs);
  ------------------
  |  Branch (597:18): [True: 285k, False: 213k]
  ------------------
  598|       |
  599|   213k|        switch (refmv_ctx >> 1) {
  ------------------
  |  Branch (599:17): [True: 213k, False: 0]
  ------------------
  600|  67.7k|        case 0:
  ------------------
  |  Branch (600:9): [True: 67.7k, False: 145k]
  ------------------
  601|  67.7k|            *ctx = imin(newmv_ctx, 1);
  602|  67.7k|            break;
  603|  72.3k|        case 1:
  ------------------
  |  Branch (603:9): [True: 72.3k, False: 140k]
  ------------------
  604|  72.3k|            *ctx = 1 + imin(newmv_ctx, 3);
  605|  72.3k|            break;
  606|  72.9k|        case 2:
  ------------------
  |  Branch (606:9): [True: 72.9k, False: 140k]
  ------------------
  607|  72.9k|            *ctx = iclip(3 + newmv_ctx, 4, 7);
  608|  72.9k|            break;
  609|   213k|        }
  610|       |
  611|   213k|        return;
  612|  1.54M|    } else if (*cnt < 2 && ref.ref[0] > 0) {
  ------------------
  |  Branch (612:16): [True: 659k, False: 888k]
  |  Branch (612:28): [True: 554k, False: 104k]
  ------------------
  613|   554k|        const int sign = rf->sign_bias[ref.ref[0] - 1];
  614|   554k|        const int sz4 = imin(w4, h4);
  615|       |
  616|       |        // non-self references in top
  617|  1.02M|        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (617:13): [True: 492k, False: 62.1k]
  |  Branch (617:44): [True: 532k, False: 490k]
  |  Branch (617:55): [True: 530k, False: 2.10k]
  ------------------
  618|   530k|            const refmvs_block *const cand_b = &b_top[x];
  619|   530k|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  620|   530k|            x += dav1d_block_dimensions[cand_b->bs][0];
  621|   530k|        }
  622|       |
  623|       |        // non-self references in left
  624|   980k|        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (624:13): [True: 494k, False: 60.2k]
  |  Branch (624:44): [True: 534k, False: 446k]
  |  Branch (624:55): [True: 486k, False: 47.4k]
  ------------------
  625|   486k|            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  626|   486k|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  627|   486k|            y += dav1d_block_dimensions[cand_b->bs][1];
  628|   486k|        }
  629|   554k|    }
  630|  1.76M|    assert(*cnt <= 8);
  ------------------
  |  Branch (630:5): [True: 1.54M, False: 0]
  ------------------
  631|       |
  632|       |    // clamping
  633|  1.54M|    int n_refmvs = *cnt;
  634|  1.54M|    if (n_refmvs) {
  ------------------
  |  Branch (634:9): [True: 1.48M, False: 66.5k]
  ------------------
  635|  1.48M|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  636|  1.48M|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  637|  1.48M|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  638|  1.48M|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  639|       |
  640|  1.48M|        int n = 0;
  641|  3.44M|        do {
  642|  3.44M|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  643|  3.44M|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  644|  3.44M|        } while (++n < n_refmvs);
  ------------------
  |  Branch (644:18): [True: 1.96M, False: 1.48M]
  ------------------
  645|  1.48M|    }
  646|       |
  647|  2.18M|    for (int n = *cnt; n < 2; n++)
  ------------------
  |  Branch (647:24): [True: 632k, False: 1.54M]
  ------------------
  648|   632k|        mvstack[n].mv.mv[0] = tgmv[0];
  649|       |
  650|  1.54M|    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
  651|  1.54M|}
dav1d_refmvs_tile_sbrow_init:
  657|   131k|{
  658|   131k|    if (rf->n_tile_threads == 1) tile_row_idx = 0;
  ------------------
  |  Branch (658:9): [True: 131k, False: 0]
  ------------------
  659|   131k|    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
  660|   131k|    const ptrdiff_t r_stride = rf->rp_stride * 2;
  661|   131k|    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
  ------------------
  |  Branch (661:33): [True: 0, False: 131k]
  |  Branch (661:60): [True: 0, False: 0]
  ------------------
  662|   131k|        35 * 2 * rf->n_blocks : 0;
  663|   131k|    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
  664|   131k|    const int sbsz = rf->sbsz;
  665|   131k|    const int off = (sbsz * sby) & 16;
  666|  3.43M|    for (int i = 0; i < sbsz; i++, r += r_stride)
  ------------------
  |  Branch (666:21): [True: 3.30M, False: 131k]
  ------------------
  667|  3.30M|        rt->r[off + 5 + i] = r;
  668|   131k|    rt->r[off + 0] = r;
  669|   131k|    r += r_stride;
  670|   131k|    rt->r[off + 1] = NULL;
  671|   131k|    rt->r[off + 2] = r;
  672|   131k|    r += r_stride;
  673|   131k|    rt->r[off + 3] = NULL;
  674|   131k|    rt->r[off + 4] = r;
  675|   131k|    if (sby & 1) {
  ------------------
  |  Branch (675:9): [True: 52.4k, False: 78.5k]
  ------------------
  676|  52.4k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  677|  52.4k|        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
  ------------------
  |  |  676|  52.4k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 52.4k]
  |  |  ------------------
  ------------------
  678|  52.4k|        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
  ------------------
  |  |  676|  52.4k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 52.4k]
  |  |  ------------------
  ------------------
  679|  52.4k|        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
  ------------------
  |  |  676|  52.4k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 52.4k]
  |  |  ------------------
  ------------------
  680|  52.4k|#undef EXCHANGE
  681|  52.4k|    }
  682|       |
  683|   131k|    rt->rf = rf;
  684|   131k|    rt->tile_row.start = tile_row_start4;
  685|   131k|    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
  686|   131k|    rt->tile_col.start = tile_col_start4;
  687|   131k|    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
  688|   131k|}
dav1d_refmvs_init_frame:
  807|  28.2k|{
  808|  28.2k|    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
  809|  28.2k|    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
  ------------------
  |  Branch (809:29): [True: 0, False: 28.2k]
  ------------------
  810|  28.2k|    const int n_blocks = rp_stride * n_tile_rows;
  811|       |
  812|  28.2k|    rf->sbsz = 16 << seq_hdr->sb128;
  813|  28.2k|    rf->frm_hdr = frm_hdr;
  814|  28.2k|    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
  815|  28.2k|    rf->ih8 = (frm_hdr->height + 7) >> 3;
  816|  28.2k|    rf->iw4 = rf->iw8 << 1;
  817|  28.2k|    rf->ih4 = rf->ih8 << 1;
  818|  28.2k|    rf->rp = rp;
  819|  28.2k|    rf->rp_stride = rp_stride;
  820|  28.2k|    rf->n_tile_threads = n_tile_threads;
  821|  28.2k|    rf->n_frame_threads = n_frame_threads;
  822|       |
  823|  28.2k|    if (n_blocks != rf->n_blocks) {
  ------------------
  |  Branch (823:9): [True: 6.56k, False: 21.7k]
  ------------------
  824|  6.56k|        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
  825|  6.56k|        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
  826|       |        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
  827|       |         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
  828|  6.56k|        dav1d_free_aligned(rf->r);
  ------------------
  |  |  136|  6.56k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  829|  6.56k|        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
  ------------------
  |  |  134|  6.56k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  830|  6.56k|        if (!rf->r) {
  ------------------
  |  Branch (830:13): [True: 0, False: 6.56k]
  ------------------
  831|      0|            rf->n_blocks = 0;
  832|      0|            return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  833|      0|        }
  834|       |
  835|  6.56k|        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
  836|  6.56k|        rf->n_blocks = n_blocks;
  837|  6.56k|    }
  838|       |
  839|  28.2k|    const int poc = frm_hdr->frame_offset;
  840|   226k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (840:21): [True: 197k, False: 28.2k]
  ------------------
  841|   197k|        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
  842|   197k|                                          ref_poc[i], poc);
  843|   197k|        rf->sign_bias[i] = poc_diff > 0;
  844|   197k|        rf->mfmv_sign[i] = poc_diff < 0;
  845|   197k|        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
  846|   197k|                                            poc, ref_poc[i]), -31, 31);
  847|   197k|    }
  848|       |
  849|       |    // temporal MV setup
  850|  28.2k|    rf->n_mfmvs = 0;
  851|  28.2k|    rf->rp_ref = rp_ref;
  852|  28.2k|    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
  ------------------
  |  Branch (852:9): [True: 7.76k, False: 20.5k]
  |  Branch (852:39): [True: 7.76k, False: 0]
  ------------------
  853|  7.76k|        int total = 2;
  854|  7.76k|        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
  ------------------
  |  Branch (854:13): [True: 5.37k, False: 2.38k]
  |  Branch (854:26): [True: 4.07k, False: 1.30k]
  ------------------
  855|  4.07k|            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
  856|  4.07k|            total = 3;
  857|  4.07k|        }
  858|  7.76k|        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
  ------------------
  |  Branch (858:13): [True: 4.43k, False: 3.33k]
  |  Branch (858:26): [True: 957, False: 3.47k]
  ------------------
  859|  4.43k|                                      frm_hdr->frame_offset) > 0)
  860|    957|        {
  861|    957|            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
  862|    957|        }
  863|  7.76k|        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
  ------------------
  |  Branch (863:13): [True: 5.43k, False: 2.33k]
  |  Branch (863:26): [True: 921, False: 4.51k]
  ------------------
  864|  5.43k|                                      frm_hdr->frame_offset) > 0)
  865|    921|        {
  866|    921|            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
  867|    921|        }
  868|  7.76k|        if (rf->n_mfmvs < total && rp_ref[6] &&
  ------------------
  |  Branch (868:13): [True: 7.11k, False: 647]
  |  Branch (868:36): [True: 4.65k, False: 2.46k]
  ------------------
  869|  4.65k|            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
  ------------------
  |  Branch (869:13): [True: 3.59k, False: 1.06k]
  ------------------
  870|  4.65k|                         frm_hdr->frame_offset) > 0)
  871|  3.59k|        {
  872|  3.59k|            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
  873|  3.59k|        }
  874|  7.76k|        if (rf->n_mfmvs < total && rp_ref[1])
  ------------------
  |  Branch (874:13): [True: 6.64k, False: 1.12k]
  |  Branch (874:36): [True: 3.94k, False: 2.70k]
  ------------------
  875|  3.94k|            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
  876|       |
  877|  21.2k|        for (int n = 0; n < rf->n_mfmvs; n++) {
  ------------------
  |  Branch (877:25): [True: 13.4k, False: 7.76k]
  ------------------
  878|  13.4k|            const int rpoc = ref_poc[rf->mfmv_ref[n]];
  879|  13.4k|            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
  880|  13.4k|                                           rpoc, frm_hdr->frame_offset);
  881|  13.4k|            if (abs(diff1) > 31) {
  ------------------
  |  Branch (881:17): [True: 292, False: 13.1k]
  ------------------
  882|    292|                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
  ------------------
  |  |   41|    292|#define INVALID_REF2CUR (-32)
  ------------------
  883|  13.1k|            } else {
  884|  13.1k|                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
  ------------------
  |  Branch (884:39): [True: 7.91k, False: 5.27k]
  ------------------
  885|   105k|                for (int m = 0; m < 7; m++) {
  ------------------
  |  Branch (885:33): [True: 92.3k, False: 13.1k]
  ------------------
  886|  92.3k|                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
  887|  92.3k|                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
  888|  92.3k|                                                   rpoc, rrpoc);
  889|       |                    // unsigned comparison also catches the < 0 case
  890|  92.3k|                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
  ------------------
  |  Branch (890:46): [True: 23.5k, False: 68.7k]
  ------------------
  891|  92.3k|                }
  892|  13.1k|            }
  893|  13.4k|        }
  894|  7.76k|    }
  895|  28.2k|    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
  896|       |
  897|  28.2k|    return 0;
  898|  28.2k|}
dav1d_refmvs_dsp_init:
  921|  9.51k|{
  922|  9.51k|    c->load_tmvs = load_tmvs_c;
  923|  9.51k|    c->save_tmvs = save_tmvs_c;
  924|  9.51k|    c->splat_mv = splat_mv_c;
  925|       |
  926|  9.51k|#if HAVE_ASM
  927|       |#if ARCH_AARCH64 || ARCH_ARM
  928|       |    refmvs_dsp_init_arm(c);
  929|       |#elif ARCH_LOONGARCH64
  930|       |    refmvs_dsp_init_loongarch(c);
  931|       |#elif ARCH_X86
  932|       |    refmvs_dsp_init_x86(c);
  933|  9.51k|#endif
  934|  9.51k|#endif
  935|  9.51k|}
refmvs.c:scan_row:
  102|  2.57M|{
  103|  2.57M|    const refmvs_block *cand_b = b;
  104|  2.57M|    const enum BlockSize first_cand_bs = cand_b->bs;
  105|  2.57M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  106|  2.57M|    int cand_bw4 = first_cand_b_dim[0];
  107|  2.57M|    int len = imax(step, imin(bw4, cand_bw4));
  108|       |
  109|  2.57M|    if (bw4 <= cand_bw4) {
  ------------------
  |  Branch (109:9): [True: 2.23M, False: 337k]
  ------------------
  110|       |        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
  111|       |        // position of the first block has to be odd already, i.e. not just
  112|       |        // for row_offset=-3/-5
  113|       |        // FIXME why can this not be cand_bw4?
  114|  2.23M|        const int weight = bw4 == 1 ? 2 :
  ------------------
  |  Branch (114:28): [True: 687k, False: 1.54M]
  ------------------
  115|  2.23M|                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
  116|  2.23M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  117|  2.23M|                              have_newmv_match, have_refmv_match);
  118|  2.23M|        return weight >> 1;
  119|  2.23M|    }
  120|       |
  121|   695k|    for (int x = 0;;) {
  122|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  123|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  124|       |        // the weight here
  125|   695k|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  126|   695k|                              have_newmv_match, have_refmv_match);
  127|   695k|        x += len;
  128|   695k|        if (x >= w4) return 1;
  ------------------
  |  Branch (128:13): [True: 337k, False: 358k]
  ------------------
  129|   358k|        cand_b = &b[x];
  130|   358k|        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
  131|   358k|        assert(cand_bw4 < bw4);
  ------------------
  |  Branch (131:9): [True: 358k, False: 0]
  ------------------
  132|   358k|        len = imax(step, cand_bw4);
  133|   358k|    }
  134|   337k|}
refmvs.c:scan_col:
  141|  3.62M|{
  142|  3.62M|    const refmvs_block *cand_b = &b[0][bx4];
  143|  3.62M|    const enum BlockSize first_cand_bs = cand_b->bs;
  144|  3.62M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  145|  3.62M|    int cand_bh4 = first_cand_b_dim[1];
  146|  3.62M|    int len = imax(step, imin(bh4, cand_bh4));
  147|       |
  148|  3.62M|    if (bh4 <= cand_bh4) {
  ------------------
  |  Branch (148:9): [True: 3.17M, False: 453k]
  ------------------
  149|       |        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
  150|       |        // position of the first block has to be odd already, i.e. not just
  151|       |        // for col_offset=-3/-5
  152|       |        // FIXME why can this not be cand_bh4?
  153|  3.17M|        const int weight = bh4 == 1 ? 2 :
  ------------------
  |  Branch (153:28): [True: 1.19M, False: 1.97M]
  ------------------
  154|  3.17M|                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
  155|  3.17M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  156|  3.17M|                            have_newmv_match, have_refmv_match);
  157|  3.17M|        return weight >> 1;
  158|  3.17M|    }
  159|       |
  160|   883k|    for (int y = 0;;) {
  161|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  162|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  163|       |        // the weight here
  164|   883k|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  165|   883k|                              have_newmv_match, have_refmv_match);
  166|   883k|        y += len;
  167|   883k|        if (y >= h4) return 1;
  ------------------
  |  Branch (167:13): [True: 453k, False: 430k]
  ------------------
  168|   430k|        cand_b = &b[y][bx4];
  169|   430k|        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
  170|   430k|        assert(cand_bh4 < bh4);
  ------------------
  |  Branch (170:9): [True: 430k, False: 0]
  ------------------
  171|   430k|        len = imax(step, cand_bh4);
  172|   430k|    }
  173|   453k|}
refmvs.c:add_spatial_candidate:
   46|  8.94M|{
   47|  8.94M|    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
  ------------------
  |  |   40|  8.94M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (47:9): [True: 737k, False: 8.20M]
  ------------------
   48|       |
   49|  8.20M|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (49:9): [True: 6.97M, False: 1.23M]
  ------------------
   50|  8.82M|        for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (50:25): [True: 7.96M, False: 854k]
  ------------------
   51|  7.96M|            if (b->ref.ref[n] == ref.ref[0]) {
  ------------------
  |  Branch (51:17): [True: 6.11M, False: 1.85M]
  ------------------
   52|  6.11M|                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
  ------------------
  |  |   40|  1.59M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (52:37): [True: 1.59M, False: 4.52M]
  |  Branch (52:52): [True: 195k, False: 1.39M]
  ------------------
   53|  5.92M|                                   gmv[0] : b->mv.mv[n];
   54|       |
   55|  6.11M|                *have_refmv_match = 1;
   56|  6.11M|                *have_newmv_match |= b->mf >> 1;
   57|       |
   58|  6.11M|                const int last = *cnt;
   59|  10.1M|                for (int m = 0; m < last; m++)
  ------------------
  |  Branch (59:33): [True: 6.96M, False: 3.17M]
  ------------------
   60|  6.96M|                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
  ------------------
  |  Branch (60:25): [True: 2.94M, False: 4.02M]
  ------------------
   61|  2.94M|                        mvstack[m].weight += weight;
   62|  2.94M|                        return;
   63|  2.94M|                    }
   64|       |
   65|  3.17M|                if (last < 8) {
  ------------------
  |  Branch (65:21): [True: 3.16M, False: 7.82k]
  ------------------
   66|  3.16M|                    mvstack[last].mv.mv[0] = cand_mv;
   67|  3.16M|                    mvstack[last].weight = weight;
   68|  3.16M|                    *cnt = last + 1;
   69|  3.16M|                }
   70|  3.17M|                return;
   71|  6.11M|            }
   72|  7.96M|        }
   73|  6.97M|    } else if (b->ref.pair == ref.pair) {
  ------------------
  |  Branch (73:16): [True: 491k, False: 740k]
  ------------------
   74|   491k|        const refmvs_mvpair cand_mv = { .mv = {
   75|   491k|            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
  ------------------
  |  |   40|  19.0k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (75:20): [True: 19.0k, False: 472k]
  |  Branch (75:35): [True: 4.36k, False: 14.6k]
  ------------------
   76|   491k|            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
  ------------------
  |  |   40|  19.0k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (76:20): [True: 19.0k, False: 472k]
  |  Branch (76:35): [True: 3.43k, False: 15.5k]
  ------------------
   77|   491k|        }};
   78|       |
   79|   491k|        *have_refmv_match = 1;
   80|   491k|        *have_newmv_match |= b->mf >> 1;
   81|       |
   82|   491k|        const int last = *cnt;
   83|   746k|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (83:25): [True: 483k, False: 262k]
  ------------------
   84|   483k|            if (mvstack[n].mv.n == cand_mv.n) {
  ------------------
  |  Branch (84:17): [True: 229k, False: 254k]
  ------------------
   85|   229k|                mvstack[n].weight += weight;
   86|   229k|                return;
   87|   229k|            }
   88|       |
   89|   262k|        if (last < 8) {
  ------------------
  |  Branch (89:13): [True: 261k, False: 705]
  ------------------
   90|   261k|            mvstack[last].mv = cand_mv;
   91|   261k|            mvstack[last].weight = weight;
   92|   261k|            *cnt = last + 1;
   93|   261k|        }
   94|   262k|    }
   95|  8.20M|}
refmvs.c:add_temporal_candidate:
  198|  1.44M|{
  199|  1.44M|    if (rb->mv.n == INVALID_MV) return;
  ------------------
  |  |   40|  1.44M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (199:9): [True: 667k, False: 781k]
  ------------------
  200|       |
  201|   781k|    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
  202|   781k|    fix_mv_precision(rf->frm_hdr, &mv);
  203|       |
  204|   781k|    const int last = *cnt;
  205|   781k|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (205:9): [True: 506k, False: 275k]
  ------------------
  206|   506k|        if (globalmv_ctx)
  ------------------
  |  Branch (206:13): [True: 111k, False: 394k]
  ------------------
  207|   111k|            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
  208|       |
  209|  1.28M|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (209:25): [True: 1.12M, False: 155k]
  ------------------
  210|  1.12M|            if (mvstack[n].mv.mv[0].n == mv.n) {
  ------------------
  |  Branch (210:17): [True: 350k, False: 774k]
  ------------------
  211|   350k|                mvstack[n].weight += 2;
  212|   350k|                return;
  213|   350k|            }
  214|   155k|        if (last < 8) {
  ------------------
  |  Branch (214:13): [True: 154k, False: 603]
  ------------------
  215|   154k|            mvstack[last].mv.mv[0] = mv;
  216|   154k|            mvstack[last].weight = 2;
  217|   154k|            *cnt = last + 1;
  218|   154k|        }
  219|   275k|    } else {
  220|   275k|        refmvs_mvpair mvp = { .mv = {
  221|   275k|            [0] = mv,
  222|   275k|            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
  223|   275k|        }};
  224|   275k|        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
  225|       |
  226|   490k|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (226:25): [True: 435k, False: 55.0k]
  ------------------
  227|   435k|            if (mvstack[n].mv.n == mvp.n) {
  ------------------
  |  Branch (227:17): [True: 220k, False: 214k]
  ------------------
  228|   220k|                mvstack[n].weight += 2;
  229|   220k|                return;
  230|   220k|            }
  231|  55.0k|        if (last < 8) {
  ------------------
  |  Branch (231:13): [True: 54.6k, False: 413]
  ------------------
  232|  54.6k|            mvstack[last].mv = mvp;
  233|  54.6k|            mvstack[last].weight = 2;
  234|  54.6k|            *cnt = last + 1;
  235|  54.6k|        }
  236|  55.0k|    }
  237|   781k|}
refmvs.c:mv_projection:
  175|  1.05M|static inline union mv mv_projection(const union mv mv, const int num, const int den) {
  176|  1.05M|    static const uint16_t div_mult[32] = {
  177|  1.05M|           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
  178|  1.05M|        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
  179|  1.05M|        1024,   963,  910,  862,  819,  780,  744,  712,
  180|  1.05M|         682,   655,  630,  606,  585,  564,  546,  528
  181|  1.05M|    };
  182|  1.05M|    assert(den > 0 && den < 32);
  ------------------
  |  Branch (182:5): [True: 1.05M, False: 0]
  |  Branch (182:5): [True: 1.05M, False: 0]
  ------------------
  183|  1.05M|    assert(num > -32 && num < 32);
  ------------------
  |  Branch (183:5): [True: 1.05M, False: 0]
  |  Branch (183:5): [True: 1.05M, False: 0]
  ------------------
  184|  1.05M|    const int frac = num * div_mult[den];
  185|  1.05M|    const int y = mv.y * frac, x = mv.x * frac;
  186|       |    // Round and clip according to AV1 spec section 7.9.3
  187|  1.05M|    return (union mv) { // 0x3fff == (1 << 14) - 1
  188|  1.05M|        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
  189|  1.05M|        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
  190|  1.05M|    };
  191|  1.05M|}
refmvs.c:add_compound_extended_candidate:
  245|   266k|{
  246|   266k|    refmvs_candidate *const diff = &same[2];
  247|   266k|    int *const diff_count = &same_count[2];
  248|       |
  249|   688k|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (249:21): [True: 521k, False: 166k]
  ------------------
  250|   521k|        const int cand_ref = cand_b->ref.ref[n];
  251|       |
  252|   521k|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (252:13): [True: 100k, False: 421k]
  ------------------
  253|       |
  254|   421k|        mv cand_mv = cand_b->mv.mv[n];
  255|   421k|        if (cand_ref == ref.ref[0]) {
  ------------------
  |  Branch (255:13): [True: 160k, False: 260k]
  ------------------
  256|   160k|            if (same_count[0] < 2)
  ------------------
  |  Branch (256:17): [True: 153k, False: 7.58k]
  ------------------
  257|   153k|                same[same_count[0]++].mv.mv[0] = cand_mv;
  258|   160k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (258:17): [True: 134k, False: 25.7k]
  ------------------
  259|   134k|                if (sign1 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (259:21): [True: 22.1k, False: 112k]
  ------------------
  260|  22.1k|                    cand_mv.y = -cand_mv.y;
  261|  22.1k|                    cand_mv.x = -cand_mv.x;
  262|  22.1k|                }
  263|   134k|                diff[diff_count[1]++].mv.mv[1] = cand_mv;
  264|   134k|            }
  265|   260k|        } else if (cand_ref == ref.ref[1]) {
  ------------------
  |  Branch (265:20): [True: 142k, False: 118k]
  ------------------
  266|   142k|            if (same_count[1] < 2)
  ------------------
  |  Branch (266:17): [True: 137k, False: 4.49k]
  ------------------
  267|   137k|                same[same_count[1]++].mv.mv[1] = cand_mv;
  268|   142k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (268:17): [True: 117k, False: 25.2k]
  ------------------
  269|   117k|                if (sign0 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (269:21): [True: 19.3k, False: 97.6k]
  ------------------
  270|  19.3k|                    cand_mv.y = -cand_mv.y;
  271|  19.3k|                    cand_mv.x = -cand_mv.x;
  272|  19.3k|                }
  273|   117k|                diff[diff_count[0]++].mv.mv[0] = cand_mv;
  274|   117k|            }
  275|   142k|        } else {
  276|   118k|            mv i_cand_mv = (union mv) {
  277|   118k|                .x = -cand_mv.x,
  278|   118k|                .y = -cand_mv.y
  279|   118k|            };
  280|       |
  281|   118k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (281:17): [True: 90.4k, False: 28.0k]
  ------------------
  282|  90.4k|                diff[diff_count[0]++].mv.mv[0] =
  283|  90.4k|                    sign0 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (283:21): [True: 11.0k, False: 79.4k]
  ------------------
  284|  79.4k|                    i_cand_mv : cand_mv;
  285|  90.4k|            }
  286|       |
  287|   118k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (287:17): [True: 81.9k, False: 36.5k]
  ------------------
  288|  81.9k|                diff[diff_count[1]++].mv.mv[1] =
  289|  81.9k|                    sign1 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (289:21): [True: 11.0k, False: 70.8k]
  ------------------
  290|  70.8k|                    i_cand_mv : cand_mv;
  291|  81.9k|            }
  292|   118k|        }
  293|   421k|    }
  294|   266k|}
refmvs.c:add_single_extended_candidate:
  299|  1.01M|{
  300|  2.03M|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (300:21): [True: 1.99M, False: 39.3k]
  ------------------
  301|  1.99M|        const int cand_ref = cand_b->ref.ref[n];
  302|       |
  303|  1.99M|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (303:13): [True: 977k, False: 1.01M]
  ------------------
  304|       |        // we need to continue even if cand_ref == ref.ref[0], since
  305|       |        // the candidate could have been added as a globalmv variant,
  306|       |        // which changes the value
  307|       |        // FIXME if scan_{row,col}() returned a mask for the nearest
  308|       |        // edge, we could skip the appropriate ones here
  309|       |
  310|  1.01M|        mv cand_mv = cand_b->mv.mv[n];
  311|  1.01M|        if (sign ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (311:13): [True: 19.4k, False: 994k]
  ------------------
  312|  19.4k|            cand_mv.y = -cand_mv.y;
  313|  19.4k|            cand_mv.x = -cand_mv.x;
  314|  19.4k|        }
  315|       |
  316|  1.01M|        int m;
  317|  1.01M|        const int last = *cnt;
  318|  1.11M|        for (m = 0; m < last; m++)
  ------------------
  |  Branch (318:21): [True: 986k, False: 129k]
  ------------------
  319|   986k|            if (cand_mv.n == mvstack[m].mv.mv[0].n)
  ------------------
  |  Branch (319:17): [True: 884k, False: 102k]
  ------------------
  320|   884k|                break;
  321|  1.01M|        if (m == last) {
  ------------------
  |  Branch (321:13): [True: 129k, False: 884k]
  ------------------
  322|   129k|            mvstack[m].mv.mv[0] = cand_mv;
  323|   129k|            mvstack[m].weight = 2; // "minimal"
  324|   129k|            *cnt = last + 1;
  325|   129k|        }
  326|  1.01M|    }
  327|  1.01M|}

decode.c:dav1d_refmvs_save_tmvs:
  145|  48.3k|{
  146|  48.3k|    const refmvs_frame *const rf = rt->rf;
  147|       |
  148|  48.3k|    assert(row_start8 >= 0);
  ------------------
  |  Branch (148:5): [True: 48.3k, False: 0]
  ------------------
  149|  48.3k|    assert((unsigned) (row_end8 - row_start8) <= 16U);
  ------------------
  |  Branch (149:5): [True: 48.3k, False: 0]
  ------------------
  150|  48.3k|    row_end8 = imin(row_end8, rf->ih8);
  151|  48.3k|    col_end8 = imin(col_end8, rf->iw8);
  152|       |
  153|  48.3k|    const ptrdiff_t stride = rf->rp_stride;
  154|  48.3k|    const uint8_t *const ref_sign = rf->mfmv_sign;
  155|  48.3k|    refmvs_temporal_block *rp = &rf->rp[row_start8 * stride];
  156|       |
  157|  48.3k|    dsp->save_tmvs(rp, stride, rt->r + 6, ref_sign,
  158|  48.3k|                   col_end8, row_end8, col_start8, row_start8);
  159|  48.3k|}

dav1d_init_last_nonzero_col_from_eob_tables:
  350|  2.35k|COLD void dav1d_init_last_nonzero_col_from_eob_tables(void) {
  351|       |    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  352|  2.35k|    pthread_once(&initted, init_internal);
  353|  2.35k|}
scan.c:init_internal:
  333|      1|static COLD void init_internal(void) {
  334|      1|    init_tbl(last_nonzero_col_from_eob_4x4,   scan_4x4,    4,  4);
  335|      1|    init_tbl(last_nonzero_col_from_eob_8x8,   scan_8x8,    8,  8);
  336|      1|    init_tbl(last_nonzero_col_from_eob_16x16, scan_16x16, 16, 16);
  337|      1|    init_tbl(last_nonzero_col_from_eob_32x32, scan_32x32, 32, 32);
  338|      1|    init_tbl(last_nonzero_col_from_eob_4x8,   scan_4x8,    4,  8);
  339|      1|    init_tbl(last_nonzero_col_from_eob_8x4,   scan_8x4,    8,  4);
  340|      1|    init_tbl(last_nonzero_col_from_eob_8x16,  scan_8x16,   8, 16);
  341|      1|    init_tbl(last_nonzero_col_from_eob_16x8,  scan_16x8,  16,  8);
  342|      1|    init_tbl(last_nonzero_col_from_eob_16x32, scan_16x32, 16, 32);
  343|      1|    init_tbl(last_nonzero_col_from_eob_32x16, scan_32x16, 32, 16);
  344|      1|    init_tbl(last_nonzero_col_from_eob_4x16,  scan_4x16,   4, 16);
  345|      1|    init_tbl(last_nonzero_col_from_eob_16x4,  scan_16x4,  16,  4);
  346|      1|    init_tbl(last_nonzero_col_from_eob_8x32,  scan_8x32,   8, 32);
  347|      1|    init_tbl(last_nonzero_col_from_eob_32x8,  scan_32x8,  32,  8);
  348|      1|}
scan.c:init_tbl:
  321|     14|{
  322|     14|    int max_col = 0;
  323|    218|    for (int y = 0, n = 0; y < h; y++) {
  ------------------
  |  Branch (323:28): [True: 204, False: 14]
  ------------------
  324|  3.54k|        for (int x = 0; x < w; x++, n++) {
  ------------------
  |  Branch (324:25): [True: 3.34k, False: 204]
  ------------------
  325|  3.34k|            const int rc = scan[n];
  326|  3.34k|            const int rcx = rc & (h - 1);
  327|  3.34k|            max_col = imax(max_col, rcx);
  328|  3.34k|            last_nonzero_col_from_eob[n] = max_col;
  329|  3.34k|        }
  330|    204|    }
  331|     14|}

dav1d_get_shear_params:
   80|  94.8k|int dav1d_get_shear_params(Dav1dWarpedMotionParams *const wm) {
   81|  94.8k|    const int32_t *const mat = wm->matrix;
   82|       |
   83|  94.8k|    if (mat[2] <= 0) return 1;
  ------------------
  |  Branch (83:9): [True: 0, False: 94.8k]
  ------------------
   84|       |
   85|  94.8k|    wm->u.p.alpha = iclip_wmp(mat[2] - 0x10000);
   86|  94.8k|    wm->u.p.beta = iclip_wmp(mat[3]);
   87|       |
   88|  94.8k|    int shift;
   89|  94.8k|    const int y = apply_sign(resolve_divisor_32(abs(mat[2]), &shift), mat[2]);
   90|  94.8k|    const int64_t v1 = ((int64_t) mat[4] * 0x10000) * y;
   91|  94.8k|    const int rnd = (1 << shift) >> 1;
   92|  94.8k|    wm->u.p.gamma = iclip_wmp(apply_sign64((int) ((llabs(v1) + rnd) >> shift), v1));
   93|  94.8k|    const int64_t v2 = ((int64_t) mat[3] * mat[4]) * y;
   94|  94.8k|    wm->u.p.delta = iclip_wmp(mat[5] -
   95|  94.8k|                          apply_sign64((int) ((llabs(v2) + rnd) >> shift), v2) -
   96|  94.8k|                          0x10000);
   97|       |
   98|  94.8k|    return (4 * abs(wm->u.p.alpha) + 7 * abs(wm->u.p.beta) >= 0x10000) ||
  ------------------
  |  Branch (98:12): [True: 3.09k, False: 91.7k]
  ------------------
   99|  91.7k|           (4 * abs(wm->u.p.gamma) + 4 * abs(wm->u.p.delta) >= 0x10000);
  ------------------
  |  Branch (99:12): [True: 648, False: 91.0k]
  ------------------
  100|  94.8k|}
dav1d_find_affine_int:
  153|  92.5k|{
  154|  92.5k|    int32_t *const mat = wm->matrix;
  155|  92.5k|    int a[2][2] = { { 0, 0 }, { 0, 0 } };
  156|  92.5k|    int bx[2] = { 0, 0 };
  157|  92.5k|    int by[2] = { 0, 0 };
  158|  92.5k|    const int rsuy = 2 * bh4 - 1;
  159|  92.5k|    const int rsux = 2 * bw4 - 1;
  160|  92.5k|    const int suy = rsuy * 8;
  161|  92.5k|    const int sux = rsux * 8;
  162|  92.5k|    const int duy = suy + mv.y;
  163|  92.5k|    const int dux = sux + mv.x;
  164|  92.5k|    const int isuy = by4 * 4 + rsuy;
  165|  92.5k|    const int isux = bx4 * 4 + rsux;
  166|       |
  167|   326k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (167:21): [True: 233k, False: 92.5k]
  ------------------
  168|   233k|        const int dx = pts[i][1][0] - dux;
  169|   233k|        const int dy = pts[i][1][1] - duy;
  170|   233k|        const int sx = pts[i][0][0] - sux;
  171|   233k|        const int sy = pts[i][0][1] - suy;
  172|   233k|        if (abs(sx - dx) < 256 && abs(sy - dy) < 256) {
  ------------------
  |  Branch (172:13): [True: 231k, False: 2.02k]
  |  Branch (172:35): [True: 230k, False: 1.23k]
  ------------------
  173|   230k|            a[0][0] += ((sx * sx) >> 2) + sx * 2 + 8;
  174|   230k|            a[0][1] += ((sx * sy) >> 2) + sx + sy + 4;
  175|   230k|            a[1][1] += ((sy * sy) >> 2) + sy * 2 + 8;
  176|   230k|            bx[0] += ((sx * dx) >> 2) + sx + dx + 8;
  177|   230k|            bx[1] += ((sy * dx) >> 2) + sy + dx + 4;
  178|   230k|            by[0] += ((sx * dy) >> 2) + sx + dy + 4;
  179|   230k|            by[1] += ((sy * dy) >> 2) + sy + dy + 8;
  180|   230k|        }
  181|   233k|    }
  182|       |
  183|       |    // compute determinant of a
  184|  92.5k|    const int64_t det = (int64_t) a[0][0] * a[1][1] - (int64_t) a[0][1] * a[0][1];
  185|  92.5k|    if (det == 0) return 1;
  ------------------
  |  Branch (185:9): [True: 3.26k, False: 89.2k]
  ------------------
  186|  89.2k|    int shift, idet = apply_sign64(resolve_divisor_64(llabs(det), &shift), det);
  187|  89.2k|    shift -= 16;
  188|  89.2k|    if (shift < 0) {
  ------------------
  |  Branch (188:9): [True: 0, False: 89.2k]
  ------------------
  189|      0|        idet <<= -shift;
  190|      0|        shift = 0;
  191|      0|    }
  192|       |
  193|       |    // solve the least-squares
  194|  89.2k|    mat[2] = get_mult_shift_diag((int64_t) a[1][1] * bx[0] -
  195|  89.2k|                                 (int64_t) a[0][1] * bx[1], idet, shift);
  196|  89.2k|    mat[3] = get_mult_shift_ndiag((int64_t) a[0][0] * bx[1] -
  197|  89.2k|                                  (int64_t) a[0][1] * bx[0], idet, shift);
  198|  89.2k|    mat[4] = get_mult_shift_ndiag((int64_t) a[1][1] * by[0] -
  199|  89.2k|                                  (int64_t) a[0][1] * by[1], idet, shift);
  200|  89.2k|    mat[5] = get_mult_shift_diag((int64_t) a[0][0] * by[1] -
  201|  89.2k|                                 (int64_t) a[0][1] * by[0], idet, shift);
  202|       |
  203|  89.2k|    mat[0] = iclip(mv.x * 0x2000 - (isux * (mat[2] - 0x10000) + isuy * mat[3]),
  204|  89.2k|                   -0x800000, 0x7fffff);
  205|  89.2k|    mat[1] = iclip(mv.y * 0x2000 - (isux * mat[4] + isuy * (mat[5] - 0x10000)),
  206|  89.2k|                   -0x800000, 0x7fffff);
  207|       |
  208|  89.2k|    return 0;
  209|  92.5k|}
warpmv.c:iclip_wmp:
   63|   379k|static inline int iclip_wmp(const int v) {
   64|   379k|    const int cv = iclip(v, INT16_MIN, INT16_MAX);
   65|       |
   66|   379k|    return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
   67|   379k|}
warpmv.c:resolve_divisor_32:
   69|  94.8k|static inline int resolve_divisor_32(const unsigned d, int *const shift) {
   70|  94.8k|    *shift = ulog2(d);
   71|  94.8k|    const int e = d - (1 << *shift);
   72|  94.8k|    const int f = *shift > 8 ? (e + (1 << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (72:19): [True: 94.8k, False: 0]
  ------------------
   73|  94.8k|                               e << (8 - *shift);
   74|  94.8k|    assert(f <= 256);
  ------------------
  |  Branch (74:5): [True: 94.8k, False: 0]
  ------------------
   75|  94.8k|    *shift += 14;
   76|       |    // Use f as lookup into the precomputed table of multipliers
   77|  94.8k|    return div_lut[f];
   78|  94.8k|}
warpmv.c:resolve_divisor_64:
  102|  89.2k|static int resolve_divisor_64(const uint64_t d, int *const shift) {
  103|  89.2k|    *shift = u64log2(d);
  104|  89.2k|    const int64_t e = d - (1LL << *shift);
  105|  89.2k|    const int64_t f = *shift > 8 ? (e + (1LL << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (105:23): [True: 89.2k, False: 0]
  ------------------
  106|  89.2k|                                   e << (8 - *shift);
  107|  89.2k|    assert(f <= 256);
  ------------------
  |  Branch (107:5): [True: 89.2k, False: 0]
  ------------------
  108|  89.2k|    *shift += 14;
  109|       |    // Use f as lookup into the precomputed table of multipliers
  110|  89.2k|    return div_lut[f];
  111|  89.2k|}
warpmv.c:get_mult_shift_diag:
  125|   178k|{
  126|   178k|    const int64_t v1 = px * idet;
  127|   178k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  128|   178k|                                        ((1LL << shift) >> 1)) >> shift),
  129|   178k|                                v1);
  130|   178k|    return iclip(v2, 0xe001, 0x11fff);
  131|   178k|}
warpmv.c:get_mult_shift_ndiag:
  115|   178k|{
  116|   178k|    const int64_t v1 = px * idet;
  117|   178k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  118|   178k|                                        ((1LL << shift) >> 1)) >> shift),
  119|   178k|                                v1);
  120|   178k|    return iclip(v2, -0x1fff, 0x1fff);
  121|   178k|}

dav1d_init_ii_wedge_masks:
  207|      1|COLD void dav1d_init_ii_wedge_masks(void) {
  208|       |    // This function is guaranteed to be called only once
  209|       |
  210|      1|    enum WedgeMasterLineType {
  211|      1|        WEDGE_MASTER_LINE_ODD,
  212|      1|        WEDGE_MASTER_LINE_EVEN,
  213|      1|        WEDGE_MASTER_LINE_VERT,
  214|      1|        N_WEDGE_MASTER_LINES,
  215|      1|    };
  216|      1|    static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
  217|      1|        [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
  218|      1|        [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
  219|      1|        [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
  220|      1|    };
  221|      1|    uint8_t master[6][64 * 64];
  222|       |
  223|       |    // create master templates
  224|     65|    for (int y = 0, off = 0; y < 64; y++, off += 64)
  ------------------
  |  Branch (224:30): [True: 64, False: 1]
  ------------------
  225|     64|        insert_border(&master[WEDGE_VERTICAL][off],
  226|     64|                      wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
  227|     33|    for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
  ------------------
  |  Branch (227:40): [True: 32, False: 1]
  ------------------
  228|     32|    {
  229|     32|        insert_border(&master[WEDGE_OBLIQUE63][off],
  230|     32|                      wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
  231|     32|        insert_border(&master[WEDGE_OBLIQUE63][off + 64],
  232|     32|                      wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
  233|     32|    }
  234|       |
  235|      1|    transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
  236|      1|    transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
  237|      1|    hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
  238|      1|    hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
  239|       |
  240|      1|#define fill(w, h, sz_422, sz_420, hvsw, signs) \
  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  242|      1|                master, wedge_codebook_16_##hvsw, \
  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  246|       |
  247|      1|    fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  248|      1|    fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  249|      1|    fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  250|      1|    fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  251|      1|    fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  252|      1|    fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  253|      1|    fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  254|      1|    fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  255|      1|    fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  256|      1|#undef fill
  257|       |
  258|      1|    memset(dav1d_masks.ii_dc, 32, 32 * 32);
  259|      4|    for (int c = 0; c < 3; c++) {
  ------------------
  |  Branch (259:21): [True: 3, False: 1]
  ------------------
  260|      3|        dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
  261|      3|        dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
  262|      3|        dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
  263|      3|        dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
  264|      3|        dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
  265|      3|        dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
  266|      3|        dav1d_masks.offsets[c][BS_8x8  -BS_32x32].ii[II_DC_PRED] =
  267|      3|            MASK_OFFSET(dav1d_masks.ii_dc);
  ------------------
  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  268|      3|    }
  269|       |
  270|      1|#define BUILD_NONDC_II_MASKS(w, h, step) \
  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  272|       |
  273|      1|#define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
  274|      1|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  275|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  276|      1|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  277|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  278|      1|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  279|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  280|       |
  281|      1|    BUILD_NONDC_II_MASKS(32, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  282|      1|    BUILD_NONDC_II_MASKS(16, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  283|      1|    BUILD_NONDC_II_MASKS(16, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  284|      1|    BUILD_NONDC_II_MASKS( 8, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  285|      1|    BUILD_NONDC_II_MASKS( 8, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  286|      1|    BUILD_NONDC_II_MASKS( 8,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  287|      1|    BUILD_NONDC_II_MASKS( 4, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  288|      1|    BUILD_NONDC_II_MASKS( 4,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  289|      1|    BUILD_NONDC_II_MASKS( 4,  4, 8);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  290|      4|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (290:21): [True: 3, False: 1]
  ------------------
  291|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  292|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  293|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32,  8, 32,  8, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  294|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16,  8, 16,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  295|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x8,  16, 16,  8,  8,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  296|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x16,   8, 16,  4, 16,  4,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  297|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x8,    8,  8,  4,  8,  4,  4);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  298|      3|    }
  299|      1|}
wedge.c:insert_border:
   90|    128|{
   91|    128|    if (ctr > 4) memset(dst, 0, ctr - 4);
  ------------------
  |  Branch (91:9): [True: 128, False: 0]
  ------------------
   92|    128|    memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
   93|    128|    if (ctr < 64 - 4)
  ------------------
  |  Branch (93:9): [True: 128, False: 0]
  ------------------
   94|    128|        memset(dst + ctr + 4, 64, 64 - 4 - ctr);
   95|    128|}
wedge.c:transpose:
   97|      2|static void transpose(uint8_t *const dst, const uint8_t *const src) {
   98|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (98:32): [True: 128, False: 2]
  ------------------
   99|  8.32k|        for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
  ------------------
  |  Branch (99:36): [True: 8.19k, False: 128]
  ------------------
  100|  8.19k|            dst[x_off + y] = src[y_off + x];
  101|      2|}
wedge.c:hflip:
  103|      2|static void hflip(uint8_t *const dst, const uint8_t *const src) {
  104|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (104:32): [True: 128, False: 2]
  ------------------
  105|  8.32k|        for (int x = 0; x < 64; x++)
  ------------------
  |  Branch (105:25): [True: 8.19k, False: 128]
  ------------------
  106|  8.19k|            dst[y_off + 64 - 1 - x] = src[y_off + x];
  107|      2|}
wedge.c:fill2d_16x2:
  153|      9|{
  154|      9|    const int n_stride_444 = (w * h);
  155|      9|    const int n_stride_422 = n_stride_444 >> 1;
  156|      9|    const int n_stride_420 = n_stride_444 >> 2;
  157|      9|    const int sign_stride_422 = 16 * n_stride_422;
  158|      9|    const int sign_stride_420 = 16 * n_stride_420;
  159|       |
  160|       |    // assign pointer offsets in lookup table
  161|    153|    for (int n = 0; n < 16; n++) {
  ------------------
  |  Branch (161:21): [True: 144, False: 9]
  ------------------
  162|    144|        const int sign = signs & 1;
  163|       |
  164|    144|        copy2d(masks_444, master[cb[n].direction], sign, w, h,
  165|    144|               32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
  166|       |
  167|       |        // not using !sign is intentional here, since 444 does not require
  168|       |        // any rounding since no chroma subsampling is applied.
  169|    144|        dav1d_masks.offsets[0][bs].wedge[0][n] =
  170|    144|        dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
  ------------------
  |  |  129|    144|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  171|       |
  172|    144|        dav1d_masks.offsets[1][bs].wedge[0][n] =
  173|    144|            init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
  174|    144|        dav1d_masks.offsets[1][bs].wedge[1][n] =
  175|    144|            init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
  176|    144|        dav1d_masks.offsets[2][bs].wedge[0][n] =
  177|    144|            init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
  178|    144|        dav1d_masks.offsets[2][bs].wedge[1][n] =
  179|    144|            init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
  180|       |
  181|    144|        signs >>= 1;
  182|    144|        masks_444 += n_stride_444;
  183|    144|        masks_422 += n_stride_422;
  184|    144|        masks_420 += n_stride_420;
  185|    144|    }
  186|      9|}
wedge.c:copy2d:
  111|    144|{
  112|    144|    src += y_off * 64 + x_off;
  113|    144|    if (sign) {
  ------------------
  |  Branch (113:9): [True: 109, False: 35]
  ------------------
  114|  2.14k|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (114:25): [True: 2.03k, False: 109]
  ------------------
  115|  40.4k|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (115:29): [True: 38.4k, False: 2.03k]
  ------------------
  116|  38.4k|                dst[x] = 64 - src[x];
  117|  2.03k|            src += 64;
  118|  2.03k|            dst += w;
  119|  2.03k|        }
  120|    109|    } else {
  121|    691|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (121:25): [True: 656, False: 35]
  ------------------
  122|    656|            memcpy(dst, src, w);
  123|    656|            src += 64;
  124|    656|            dst += w;
  125|    656|        }
  126|     35|    }
  127|    144|}
wedge.c:init_chroma:
  134|    576|{
  135|    576|    const uint16_t offset = MASK_OFFSET(chroma);
  ------------------
  |  |  129|    576|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  136|  8.64k|    for (int y = 0; y < h; y += 1 + ss_ver) {
  ------------------
  |  Branch (136:21): [True: 8.06k, False: 576]
  ------------------
  137|  83.3k|        for (int x = 0; x < w; x += 2) {
  ------------------
  |  Branch (137:25): [True: 75.2k, False: 8.06k]
  ------------------
  138|  75.2k|            int sum = luma[x] + luma[x + 1] + 1;
  139|  75.2k|            if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
  ------------------
  |  Branch (139:17): [True: 25.0k, False: 50.1k]
  ------------------
  140|  75.2k|            chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
  141|  75.2k|        }
  142|  8.06k|        luma += w << ss_ver;
  143|  8.06k|        chroma += w >> 1;
  144|  8.06k|    }
  145|    576|    return offset;
  146|    576|}
wedge.c:build_nondc_ii_masks:
  190|      9|{
  191|      9|    static const uint8_t ii_weights_1d[32] = {
  192|      9|        60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
  193|      9|         6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
  194|      9|    };
  195|       |
  196|      9|    uint8_t *const mask_h  = &mask_v[w * h];
  197|      9|    uint8_t *const mask_sm = &mask_h[w * h];
  198|    173|    for (int y = 0, off = 0; y < h; y++, off += w) {
  ------------------
  |  Branch (198:30): [True: 164, False: 9]
  ------------------
  199|    164|        memset(&mask_v[off], ii_weights_1d[y * step], w);
  200|  2.51k|        for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (200:25): [True: 2.35k, False: 164]
  ------------------
  201|  2.35k|            mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
  202|  2.35k|            mask_h[off + x] = ii_weights_1d[x * step];
  203|  2.35k|        }
  204|    164|    }
  205|      9|}

cdef_tmpl.c:cdef_dsp_init_x86:
   46|  3.41k|static ALWAYS_INLINE void cdef_dsp_init_x86(Dav1dCdefDSPContext *const c) {
   47|  3.41k|    const unsigned flags = dav1d_get_cpu_flags();
   48|       |
   49|  3.41k|#if BITDEPTH == 8
   50|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (50:9): [True: 0, False: 3.41k]
  ------------------
   51|       |
   52|  3.41k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  3.41k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  3.41k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  3.41k|#endif
   56|       |
   57|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (57:9): [True: 0, False: 3.41k]
  ------------------
   58|       |
   59|  3.41k|    c->dir = BF(dav1d_cdef_dir, ssse3);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   60|  3.41k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, ssse3);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  3.41k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, ssse3);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  3.41k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, ssse3);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (64:9): [True: 0, False: 3.41k]
  ------------------
   65|       |
   66|  3.41k|    c->dir = BF(dav1d_cdef_dir, sse4);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|  3.41k|#if BITDEPTH == 8
   68|  3.41k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse4);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|  3.41k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse4);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|  3.41k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse4);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|  3.41k|#endif
   72|       |
   73|  3.41k|#if ARCH_X86_64
   74|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 3.41k]
  ------------------
   75|       |
   76|  3.41k|    c->dir = BF(dav1d_cdef_dir, avx2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  3.41k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|  3.41k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|  3.41k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx2);
  ------------------
  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|       |
   81|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (81:9): [True: 3.41k, False: 0]
  ------------------
   82|       |
   83|      0|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   84|      0|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   85|      0|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   86|      0|#endif
   87|      0|}

dav1d_get_cpu_flags_x86:
   47|      1|COLD unsigned dav1d_get_cpu_flags_x86(void) {
   48|      1|    union {
   49|      1|        CpuidRegisters r;
   50|      1|        struct {
   51|      1|            uint32_t max_leaf;
   52|      1|            char vendor[12];
   53|      1|        };
   54|      1|    } cpu;
   55|      1|    dav1d_cpu_cpuid(&cpu.r, 0, 0);
   56|      1|    unsigned flags = dav1d_get_default_cpu_flags();
   57|       |
   58|      1|    if (cpu.max_leaf >= 1) {
  ------------------
  |  Branch (58:9): [True: 1, False: 0]
  ------------------
   59|      1|        CpuidRegisters r;
   60|      1|        dav1d_cpu_cpuid(&r, 1, 0);
   61|      1|        const unsigned family = ((r.eax >> 8) & 0x0f) + ((r.eax >> 20) & 0xff);
   62|       |
   63|      1|        if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   64|      1|            flags |= DAV1D_X86_CPU_FLAG_SSE2;
   65|      1|            if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   66|      1|                flags |= DAV1D_X86_CPU_FLAG_SSSE3;
   67|      1|                if (X(r.ecx, 0x00080000)) /* SSE4.1 */
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   68|      1|                    flags |= DAV1D_X86_CPU_FLAG_SSE41;
   69|      1|            }
   70|      1|        }
   71|      1|#if ARCH_X86_64
   72|       |        /* We only support >128-bit SIMD on x86-64. */
   73|      1|        if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   74|      1|            const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
   75|      1|            if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   76|      1|                if (cpu.max_leaf >= 7) {
  ------------------
  |  Branch (76:21): [True: 1, False: 0]
  ------------------
   77|      1|                    dav1d_cpu_cpuid(&r, 7, 0);
   78|      1|                    if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   79|      1|                        flags |= DAV1D_X86_CPU_FLAG_AVX2;
   80|      1|                        if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 1]
  |  |  ------------------
  ------------------
   81|      0|                            if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
   82|      0|                                flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
   83|      0|                        }
   84|      1|                    }
   85|      1|                }
   86|      1|            }
   87|      1|        }
   88|      1|#endif
   89|      1|        if (!memcmp(cpu.vendor, "AuthenticAMD", sizeof(cpu.vendor))) {
  ------------------
  |  Branch (89:13): [True: 1, False: 0]
  ------------------
   90|      1|            if ((flags & DAV1D_X86_CPU_FLAG_AVX2) && family <= 0x19) {
  ------------------
  |  Branch (90:17): [True: 1, False: 0]
  |  Branch (90:54): [True: 1, False: 0]
  ------------------
   91|       |                /* Excavator, Zen, Zen+, Zen 2, Zen 3, Zen 3+, Zen 4 */
   92|      1|                flags |= DAV1D_X86_CPU_FLAG_SLOW_GATHER;
   93|      1|            }
   94|      1|        }
   95|      1|    }
   96|       |
   97|      1|    return flags;
   98|      1|}

filmgrain_tmpl.c:film_grain_dsp_init_x86:
   45|  8.03k|static ALWAYS_INLINE void film_grain_dsp_init_x86(Dav1dFilmGrainDSPContext *const c) {
   46|  8.03k|    const unsigned flags = dav1d_get_cpu_flags();
   47|       |
   48|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 8.03k]
  ------------------
   49|       |
   50|  8.03k|    c->generate_grain_y = BF(dav1d_generate_grain_y, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   51|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   52|  8.03k|    c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  8.03k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.03k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.03k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  8.03k|#if ARCH_X86_64
   60|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (60:9): [True: 0, False: 8.03k]
  ------------------
   61|       |
   62|  8.03k|    c->generate_grain_y = BF(dav1d_generate_grain_y, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  8.03k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|       |
   67|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (67:9): [True: 0, False: 8.03k]
  ------------------
   68|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|      0|    }
   73|       |
   74|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (74:9): [True: 8.03k, False: 0]
  ------------------
   75|       |
   76|      0|    if (BITDEPTH == 8 || !(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (76:9): [True: 0, Folded]
  |  Branch (76:26): [True: 0, False: 0]
  ------------------
   77|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   81|      0|    }
   82|      0|#endif
   83|      0|}

ipred_tmpl.c:intra_pred_dsp_init_x86:
   71|  8.03k|static ALWAYS_INLINE void intra_pred_dsp_init_x86(Dav1dIntraPredDSPContext *const c) {
   72|  8.03k|    const unsigned flags = dav1d_get_cpu_flags();
   73|       |
   74|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 8.03k]
  ------------------
   75|       |
   76|  8.03k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   77|  8.03k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   78|  8.03k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   79|  8.03k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   80|  8.03k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   81|  8.03k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   82|  8.03k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   83|  8.03k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   84|  8.03k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   85|  8.03k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   86|  8.03k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   87|  8.03k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   88|  8.03k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   89|  8.03k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   ssse3);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   90|       |
   91|  8.03k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      ssse3);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   92|  8.03k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  ssse3);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   93|  8.03k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  ssse3);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   94|  8.03k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, ssse3);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   95|       |
   96|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, ssse3);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   97|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, ssse3);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   98|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, ssse3);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  8.03k|    c->pal_pred = BF(dav1d_pal_pred, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  101|       |
  102|  8.03k|#if ARCH_X86_64
  103|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (103:9): [True: 0, False: 8.03k]
  ------------------
  104|       |
  105|  8.03k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  106|  8.03k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  107|  8.03k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  108|  8.03k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  109|  8.03k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  110|  8.03k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  111|  8.03k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  112|  8.03k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  113|  8.03k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  114|  8.03k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|  8.03k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  116|  8.03k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  117|  8.03k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  118|  8.03k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx2);
  ------------------
  |  |   39|  8.03k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  119|       |
  120|  8.03k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      avx2);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  121|  8.03k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  avx2);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  122|  8.03k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  avx2);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  123|  8.03k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, avx2);
  ------------------
  |  |   41|  8.03k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  124|       |
  125|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, avx2);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  126|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, avx2);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  127|  8.03k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, avx2);
  ------------------
  |  |   43|  8.03k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  128|       |
  129|  8.03k|    c->pal_pred = BF(dav1d_pal_pred, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|       |
  131|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (131:9): [True: 8.03k, False: 0]
  ------------------
  132|       |
  133|      0|#if BITDEPTH == 8
  134|      0|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.03k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  135|      0|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  136|      0|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  137|      0|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  138|      0|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  139|      0|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  140|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  141|      0|#endif
  142|      0|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  143|      0|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  144|      0|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  145|      0|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  146|      0|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  147|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  148|      0|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  149|      0|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  150|       |
  151|      0|    c->pal_pred = BF(dav1d_pal_pred, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  152|      0|#endif
  153|      0|}

itx_tmpl.c:itx_dsp_init_x86:
  112|  3.41k|{
  113|  3.41k|#define assign_itx_bpc_fn(pfx, w, h, type, type_enum, bpc, ext) \
  114|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  115|  3.41k|        BF_BPC(dav1d_inv_txfm_add_##type##_##w##x##h, bpc, ext)
  116|       |
  117|  3.41k|#define assign_itx1_bpc_fn(pfx, w, h, bpc, ext) \
  118|  3.41k|    assign_itx_bpc_fn(pfx, w, h, dct_dct,           DCT_DCT,           bpc, ext)
  119|       |
  120|  3.41k|#define assign_itx2_bpc_fn(pfx, w, h, bpc, ext) \
  121|  3.41k|    assign_itx1_bpc_fn(pfx, w, h, bpc, ext); \
  122|  3.41k|    assign_itx_bpc_fn(pfx, w, h, identity_identity, IDTX,              bpc, ext)
  123|       |
  124|  3.41k|#define assign_itx12_bpc_fn(pfx, w, h, bpc, ext) \
  125|  3.41k|    assign_itx2_bpc_fn(pfx, w, h, bpc, ext); \
  126|  3.41k|    assign_itx_bpc_fn(pfx, w, h, dct_adst,          ADST_DCT,          bpc, ext); \
  127|  3.41k|    assign_itx_bpc_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      bpc, ext); \
  128|  3.41k|    assign_itx_bpc_fn(pfx, w, h, dct_identity,      H_DCT,             bpc, ext); \
  129|  3.41k|    assign_itx_bpc_fn(pfx, w, h, adst_dct,          DCT_ADST,          bpc, ext); \
  130|  3.41k|    assign_itx_bpc_fn(pfx, w, h, adst_adst,         ADST_ADST,         bpc, ext); \
  131|  3.41k|    assign_itx_bpc_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     bpc, ext); \
  132|  3.41k|    assign_itx_bpc_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      bpc, ext); \
  133|  3.41k|    assign_itx_bpc_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     bpc, ext); \
  134|  3.41k|    assign_itx_bpc_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, bpc, ext); \
  135|  3.41k|    assign_itx_bpc_fn(pfx, w, h, identity_dct,      V_DCT,             bpc, ext)
  136|       |
  137|  3.41k|#define assign_itx16_bpc_fn(pfx, w, h, bpc, ext) \
  138|  3.41k|    assign_itx12_bpc_fn(pfx, w, h, bpc, ext); \
  139|  3.41k|    assign_itx_bpc_fn(pfx, w, h, adst_identity,     H_ADST,            bpc, ext); \
  140|  3.41k|    assign_itx_bpc_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        bpc, ext); \
  141|  3.41k|    assign_itx_bpc_fn(pfx, w, h, identity_adst,     V_ADST,            bpc, ext); \
  142|  3.41k|    assign_itx_bpc_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        bpc, ext)
  143|       |
  144|  3.41k|    const unsigned flags = dav1d_get_cpu_flags();
  145|       |
  146|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (146:9): [True: 0, False: 3.41k]
  ------------------
  147|       |
  148|  3.41k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, sse2);
  ------------------
  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  149|       |
  150|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (150:9): [True: 0, False: 3.41k]
  ------------------
  151|       |
  152|  3.41k|#if BITDEPTH == 8
  153|  3.41k|    assign_itx16_fn(,   4,  4, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  154|  3.41k|    assign_itx16_fn(R,  4,  8, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  155|  3.41k|    assign_itx16_fn(R,  8,  4, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  156|  3.41k|    assign_itx16_fn(,   8,  8, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  157|  3.41k|    assign_itx16_fn(R,  4, 16, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  158|  3.41k|    assign_itx16_fn(R, 16,  4, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  159|  3.41k|    assign_itx16_fn(R,  8, 16, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  160|  3.41k|    assign_itx16_fn(R, 16,  8, ssse3);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  161|  3.41k|    assign_itx12_fn(,  16, 16, ssse3);
  ------------------
  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  162|  3.41k|    assign_itx2_fn (R,  8, 32, ssse3);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  163|  3.41k|    assign_itx2_fn (R, 32,  8, ssse3);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  164|  3.41k|    assign_itx2_fn (R, 16, 32, ssse3);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  165|  3.41k|    assign_itx2_fn (R, 32, 16, ssse3);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  166|  3.41k|    assign_itx2_fn (,  32, 32, ssse3);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  167|  3.41k|    assign_itx1_fn (R, 16, 64, ssse3);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  168|  3.41k|    assign_itx1_fn (R, 32, 64, ssse3);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  169|  3.41k|    assign_itx1_fn (R, 64, 16, ssse3);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  170|  3.41k|    assign_itx1_fn (R, 64, 32, ssse3);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  171|  3.41k|    assign_itx1_fn ( , 64, 64, ssse3);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  172|  3.41k|    *all_simd = 1;
  173|  3.41k|#endif
  174|       |
  175|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (175:9): [True: 0, False: 3.41k]
  ------------------
  176|       |
  177|       |#if BITDEPTH == 16
  178|       |    if (bpc == 10) {
  179|       |        assign_itx16_fn(,   4,  4, sse4);
  180|       |        assign_itx16_fn(R,  4,  8, sse4);
  181|       |        assign_itx16_fn(R,  4, 16, sse4);
  182|       |        assign_itx16_fn(R,  8,  4, sse4);
  183|       |        assign_itx16_fn(,   8,  8, sse4);
  184|       |        assign_itx16_fn(R,  8, 16, sse4);
  185|       |        assign_itx16_fn(R, 16,  4, sse4);
  186|       |        assign_itx16_fn(R, 16,  8, sse4);
  187|       |        assign_itx12_fn(,  16, 16, sse4);
  188|       |        assign_itx2_fn (R,  8, 32, sse4);
  189|       |        assign_itx2_fn (R, 32,  8, sse4);
  190|       |        assign_itx2_fn (R, 16, 32, sse4);
  191|       |        assign_itx2_fn (R, 32, 16, sse4);
  192|       |        assign_itx2_fn (,  32, 32, sse4);
  193|       |        assign_itx1_fn (R, 16, 64, sse4);
  194|       |        assign_itx1_fn (R, 32, 64, sse4);
  195|       |        assign_itx1_fn (R, 64, 16, sse4);
  196|       |        assign_itx1_fn (R, 64, 32, sse4);
  197|       |        assign_itx1_fn (,  64, 64, sse4);
  198|       |        *all_simd = 1;
  199|       |    }
  200|       |#endif
  201|       |
  202|  3.41k|#if ARCH_X86_64
  203|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (203:9): [True: 0, False: 3.41k]
  ------------------
  204|       |
  205|  3.41k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, avx2);
  ------------------
  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  206|       |
  207|  3.41k|#if BITDEPTH == 8
  208|  3.41k|    assign_itx16_fn( ,  4,  4, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  209|  3.41k|    assign_itx16_fn(R,  4,  8, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  210|  3.41k|    assign_itx16_fn(R,  4, 16, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  211|  3.41k|    assign_itx16_fn(R,  8,  4, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  212|  3.41k|    assign_itx16_fn( ,  8,  8, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  213|  3.41k|    assign_itx16_fn(R,  8, 16, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  214|  3.41k|    assign_itx2_fn (R,  8, 32, avx2);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  215|  3.41k|    assign_itx16_fn(R, 16,  4, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  216|  3.41k|    assign_itx16_fn(R, 16,  8, avx2);
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.41k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.41k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  217|  3.41k|    assign_itx12_fn( , 16, 16, avx2);
  ------------------
  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.41k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.41k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.41k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.41k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.41k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.41k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.41k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  218|  3.41k|    assign_itx2_fn (R, 16, 32, avx2);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  219|  3.41k|    assign_itx1_fn (R, 16, 64, avx2);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  220|  3.41k|    assign_itx2_fn (R, 32,  8, avx2);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  221|  3.41k|    assign_itx2_fn (R, 32, 16, avx2);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  222|  3.41k|    assign_itx2_fn ( , 32, 32, avx2);
  ------------------
  |  |   84|  3.41k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  223|  3.41k|    assign_itx1_fn (R, 32, 64, avx2);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  224|  3.41k|    assign_itx1_fn (R, 64, 16, avx2);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  225|  3.41k|    assign_itx1_fn (R, 64, 32, avx2);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  226|  3.41k|    assign_itx1_fn ( , 64, 64, avx2);
  ------------------
  |  |   81|  3.41k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  227|       |#else
  228|       |    if (bpc == 10) {
  229|       |        assign_itx16_bpc_fn( ,  4,  4, 10, avx2);
  230|       |        assign_itx16_bpc_fn(R,  4,  8, 10, avx2);
  231|       |        assign_itx16_bpc_fn(R,  4, 16, 10, avx2);
  232|       |        assign_itx16_bpc_fn(R,  8,  4, 10, avx2);
  233|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx2);
  234|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx2);
  235|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx2);
  236|       |        assign_itx16_bpc_fn(R, 16,  4, 10, avx2);
  237|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx2);
  238|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx2);
  239|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx2);
  240|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx2);
  241|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx2);
  242|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx2);
  243|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx2);
  244|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx2);
  245|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx2);
  246|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx2);
  247|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx2);
  248|       |    } else {
  249|       |        assign_itx16_bpc_fn( ,  4,  4, 12, avx2);
  250|       |        assign_itx16_bpc_fn(R,  4,  8, 12, avx2);
  251|       |        assign_itx16_bpc_fn(R,  4, 16, 12, avx2);
  252|       |        assign_itx16_bpc_fn(R,  8,  4, 12, avx2);
  253|       |        assign_itx16_bpc_fn( ,  8,  8, 12, avx2);
  254|       |        assign_itx16_bpc_fn(R,  8, 16, 12, avx2);
  255|       |        assign_itx2_bpc_fn (R,  8, 32, 12, avx2);
  256|       |        assign_itx16_bpc_fn(R, 16,  4, 12, avx2);
  257|       |        assign_itx16_bpc_fn(R, 16,  8, 12, avx2);
  258|       |        assign_itx12_bpc_fn( , 16, 16, 12, avx2);
  259|       |        assign_itx2_bpc_fn (R, 32,  8, 12, avx2);
  260|       |        assign_itx_bpc_fn(R, 16, 32, identity_identity, IDTX, 12, avx2);
  261|       |        assign_itx_bpc_fn(R, 32, 16, identity_identity, IDTX, 12, avx2);
  262|       |        assign_itx_bpc_fn( , 32, 32, identity_identity, IDTX, 12, avx2);
  263|       |    }
  264|       |#endif
  265|       |
  266|  3.41k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (266:9): [True: 3.41k, False: 0]
  ------------------
  267|       |
  268|      0|#if BITDEPTH == 8
  269|  3.41k|    assign_itx16_fn( ,  4,  4, avx512icl); // no wht
  ------------------
  |  |  101|  3.41k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.41k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.41k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.41k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.41k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.41k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.41k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.41k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  270|      0|    assign_itx16_fn(R,  4,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  271|      0|    assign_itx16_fn(R,  4, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  272|      0|    assign_itx16_fn(R,  8,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  273|      0|    assign_itx16_fn( ,  8,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  274|      0|    assign_itx16_fn(R,  8, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  275|      0|    assign_itx2_fn (R,  8, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  276|      0|    assign_itx16_fn(R, 16,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  277|      0|    assign_itx16_fn(R, 16,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  278|      0|    assign_itx12_fn( , 16, 16, avx512icl);
  ------------------
  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  279|      0|    assign_itx2_fn (R, 16, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  280|      0|    assign_itx1_fn (R, 16, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  281|      0|    assign_itx2_fn (R, 32,  8, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  282|      0|    assign_itx2_fn (R, 32, 16, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  283|      0|    assign_itx2_fn ( , 32, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  284|      0|    assign_itx1_fn (R, 32, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  285|      0|    assign_itx1_fn (R, 64, 16, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  286|      0|    assign_itx1_fn (R, 64, 32, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  287|      0|    assign_itx1_fn ( , 64, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  288|       |#else
  289|       |    if (bpc == 10) {
  290|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx512icl);
  291|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx512icl);
  292|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx512icl);
  293|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx512icl);
  294|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx512icl);
  295|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx512icl);
  296|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx512icl);
  297|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx512icl);
  298|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx512icl);
  299|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx512icl);
  300|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx512icl);
  301|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx512icl);
  302|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx512icl);
  303|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx512icl);
  304|       |    }
  305|       |#endif
  306|      0|#endif
  307|      0|}

loopfilter_tmpl.c:loop_filter_dsp_init_x86:
   41|  8.03k|static ALWAYS_INLINE void loop_filter_dsp_init_x86(Dav1dLoopFilterDSPContext *const c) {
   42|  8.03k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 8.03k]
  ------------------
   45|       |
   46|  8.03k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   47|  8.03k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   48|  8.03k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   49|  8.03k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   50|       |
   51|  8.03k|#if ARCH_X86_64
   52|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 8.03k]
  ------------------
   53|       |
   54|  8.03k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  8.03k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.03k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.03k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (59:9): [True: 8.03k, False: 0]
  ------------------
   60|       |
   61|      0|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|      0|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (64:9): [True: 0, False: 0]
  ------------------
   65|      0|        c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|      0|        c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|      0|    }
   68|      0|#endif
   69|      0|}

looprestoration_tmpl.c:loop_restoration_dsp_init_x86:
   50|  8.03k|static ALWAYS_INLINE void loop_restoration_dsp_init_x86(Dav1dLoopRestorationDSPContext *const c, const int bpc) {
   51|  8.03k|    const unsigned flags = dav1d_get_cpu_flags();
   52|       |
   53|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (53:9): [True: 0, False: 8.03k]
  ------------------
   54|  8.03k|#if BITDEPTH == 8
   55|  8.03k|    c->wiener[0] = BF(dav1d_wiener_filter7, sse2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.03k|    c->wiener[1] = BF(dav1d_wiener_filter5, sse2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.03k|#endif
   58|       |
   59|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (59:9): [True: 0, False: 8.03k]
  ------------------
   60|  8.03k|    c->wiener[0] = BF(dav1d_wiener_filter7, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  8.03k|    c->wiener[1] = BF(dav1d_wiener_filter5, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  8.03k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (62:9): [True: 3.41k, Folded]
  |  Branch (62:26): [True: 2.25k, False: 2.35k]
  ------------------
   63|  5.67k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, ssse3);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  5.67k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, ssse3);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  5.67k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, ssse3);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|  5.67k|    }
   67|       |
   68|  8.03k|#if ARCH_X86_64
   69|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (69:9): [True: 0, False: 8.03k]
  ------------------
   70|       |
   71|  8.03k|    c->wiener[0] = BF(dav1d_wiener_filter7, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|  8.03k|    c->wiener[1] = BF(dav1d_wiener_filter5, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   73|  8.03k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (73:9): [True: 3.41k, Folded]
  |  Branch (73:26): [True: 2.25k, False: 2.35k]
  ------------------
   74|  5.67k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx2);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   75|  5.67k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx2);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   76|  5.67k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx2);
  ------------------
  |  |   52|  5.67k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  5.67k|    }
   78|       |
   79|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (79:9): [True: 8.03k, False: 0]
  ------------------
   80|       |
   81|      0|    c->wiener[0] = BF(dav1d_wiener_filter7, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   82|      0|#if BITDEPTH == 8
   83|       |    /* With VNNI we don't need a 5-tap version. */
   84|      0|    c->wiener[1] = c->wiener[0];
   85|       |#else
   86|       |    c->wiener[1] = BF(dav1d_wiener_filter5, avx512icl);
   87|       |#endif
   88|      0|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (88:9): [True: 0, Folded]
  |  Branch (88:26): [True: 0, False: 0]
  ------------------
   89|      0|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   90|      0|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   91|      0|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   92|      0|    }
   93|      0|#endif
   94|      0|}

mc_tmpl.c:mc_dsp_init_x86:
   92|  8.03k|static ALWAYS_INLINE void mc_dsp_init_x86(Dav1dMCDSPContext *const c) {
   93|  8.03k|    const unsigned flags = dav1d_get_cpu_flags();
   94|       |
   95|  8.03k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
  ------------------
  |  Branch (95:8): [True: 0, False: 8.03k]
  ------------------
   96|      0|        return;
   97|       |
   98|  8.03k|    init_8tap_fns(ssse3);
  ------------------
  |  |  143|  8.03k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  8.03k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  8.03k|    init_mc_fn(FILTER_2D_BILINEAR,             bilin,               ssse3);
  ------------------
  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  101|  8.03k|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
  ------------------
  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  102|       |
  103|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  104|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  105|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  106|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  107|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  108|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  109|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  110|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  111|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  112|  8.03k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  113|       |
  114|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  115|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  116|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  117|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  118|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  119|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  120|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  121|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  122|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  123|  8.03k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  124|       |
  125|  8.03k|    c->avg = BF(dav1d_avg, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  126|  8.03k|    c->w_avg = BF(dav1d_w_avg, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  127|  8.03k|    c->mask = BF(dav1d_mask, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  128|  8.03k|    c->w_mask[0] = BF(dav1d_w_mask_444, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  129|  8.03k|    c->w_mask[1] = BF(dav1d_w_mask_422, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|  8.03k|    c->w_mask[2] = BF(dav1d_w_mask_420, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  131|  8.03k|    c->blend = BF(dav1d_blend, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  132|  8.03k|    c->blend_v = BF(dav1d_blend_v, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  133|  8.03k|    c->blend_h = BF(dav1d_blend_h, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  134|  8.03k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  135|  8.03k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  136|  8.03k|    c->emu_edge = BF(dav1d_emu_edge, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  137|  8.03k|    c->resize = BF(dav1d_resize, ssse3);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  138|       |
  139|  8.03k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSE41))
  ------------------
  |  Branch (139:8): [True: 0, False: 8.03k]
  ------------------
  140|      0|        return;
  141|       |
  142|  8.03k|#if BITDEPTH == 8
  143|  8.03k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, sse4);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  144|  8.03k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, sse4);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  145|  8.03k|#endif
  146|       |
  147|  8.03k|#if ARCH_X86_64
  148|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
  ------------------
  |  Branch (148:9): [True: 0, False: 8.03k]
  ------------------
  149|      0|        return;
  150|       |
  151|  8.03k|    init_8tap_fns(avx2);
  ------------------
  |  |  143|  8.03k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  8.03k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  152|       |
  153|  8.03k|    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
  ------------------
  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  154|  8.03k|    init_mct_fn(FILTER_2D_BILINEAR,           bilin,               avx2);
  ------------------
  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  155|       |
  156|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  157|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  158|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  159|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  160|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  161|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  162|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  163|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  164|  8.03k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  165|  8.03k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   40|  8.03k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  166|       |
  167|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  168|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  169|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  170|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  171|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  172|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  173|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  174|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  175|  8.03k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  176|  8.03k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   42|  8.03k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  177|       |
  178|  8.03k|    c->avg = BF(dav1d_avg, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  179|  8.03k|    c->w_avg = BF(dav1d_w_avg, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  180|  8.03k|    c->mask = BF(dav1d_mask, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  181|  8.03k|    c->w_mask[0] = BF(dav1d_w_mask_444, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  182|  8.03k|    c->w_mask[1] = BF(dav1d_w_mask_422, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  183|  8.03k|    c->w_mask[2] = BF(dav1d_w_mask_420, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  184|  8.03k|    c->blend = BF(dav1d_blend, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  185|  8.03k|    c->blend_v = BF(dav1d_blend_v, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  186|  8.03k|    c->blend_h = BF(dav1d_blend_h, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  187|  8.03k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  188|  8.03k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  189|  8.03k|    c->emu_edge = BF(dav1d_emu_edge, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  190|  8.03k|    c->resize = BF(dav1d_resize, avx2);
  ------------------
  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  191|       |
  192|  8.03k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL))
  ------------------
  |  Branch (192:9): [True: 8.03k, False: 0]
  ------------------
  193|  8.03k|        return;
  194|       |
  195|  8.03k|    init_8tap_fns(avx512icl);
  ------------------
  |  |  143|      0|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.03k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|      0|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.03k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.03k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.03k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  196|       |
  197|      0|    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  198|      0|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  199|       |
  200|      0|    c->avg = BF(dav1d_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  201|      0|    c->w_avg = BF(dav1d_w_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  202|      0|    c->mask = BF(dav1d_mask, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  203|      0|    c->w_mask[0] = BF(dav1d_w_mask_444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  204|      0|    c->w_mask[1] = BF(dav1d_w_mask_422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  205|      0|    c->w_mask[2] = BF(dav1d_w_mask_420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  206|      0|    c->blend = BF(dav1d_blend, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  207|      0|    c->blend_v = BF(dav1d_blend_v, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  208|      0|    c->blend_h = BF(dav1d_blend_h, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  209|       |
  210|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (210:9): [True: 0, False: 0]
  ------------------
  211|      0|        c->resize = BF(dav1d_resize, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  212|      0|        c->warp8x8  = BF(dav1d_warp_affine_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  213|      0|        c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  214|      0|    }
  215|      0|#endif
  216|      0|}

msac.c:msac_init_x86:
   59|  46.3k|static ALWAYS_INLINE void msac_init_x86(MsacContext *const s) {
   60|  46.3k|    const unsigned flags = dav1d_get_cpu_flags();
   61|       |
   62|  46.3k|    if (flags & DAV1D_X86_CPU_FLAG_SSE2) {
  ------------------
  |  Branch (62:9): [True: 46.3k, False: 0]
  ------------------
   63|  46.3k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_sse2;
   64|  46.3k|    }
   65|       |
   66|  46.3k|    if (flags & DAV1D_X86_CPU_FLAG_AVX2) {
  ------------------
  |  Branch (66:9): [True: 46.3k, False: 0]
  ------------------
   67|  46.3k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_avx2;
   68|  46.3k|    }
   69|  46.3k|}

pal.c:pal_dsp_init_x86:
   34|  9.51k|static ALWAYS_INLINE void pal_dsp_init_x86(Dav1dPalDSPContext *const c) {
   35|  9.51k|    const unsigned flags = dav1d_get_cpu_flags();
   36|       |
   37|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (37:9): [True: 0, False: 9.51k]
  ------------------
   38|       |
   39|  9.51k|    c->pal_idx_finish = dav1d_pal_idx_finish_ssse3;
   40|       |
   41|  9.51k|#if ARCH_X86_64
   42|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (42:9): [True: 0, False: 9.51k]
  ------------------
   43|       |
   44|  9.51k|    c->pal_idx_finish = dav1d_pal_idx_finish_avx2;
   45|       |
   46|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (46:9): [True: 9.51k, False: 0]
  ------------------
   47|       |
   48|      0|    c->pal_idx_finish = dav1d_pal_idx_finish_avx512icl;
   49|      0|#endif
   50|      0|}

refmvs.c:refmvs_dsp_init_x86:
   41|  9.51k|static ALWAYS_INLINE void refmvs_dsp_init_x86(Dav1dRefmvsDSPContext *const c) {
   42|  9.51k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 9.51k]
  ------------------
   45|       |
   46|  9.51k|    c->splat_mv = dav1d_splat_mv_sse2;
   47|       |
   48|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 9.51k]
  ------------------
   49|       |
   50|  9.51k|    c->save_tmvs = dav1d_save_tmvs_ssse3;
   51|       |
   52|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 9.51k]
  ------------------
   53|  9.51k|#if ARCH_X86_64
   54|  9.51k|    c->load_tmvs = dav1d_load_tmvs_sse4;
   55|       |
   56|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (56:9): [True: 0, False: 9.51k]
  ------------------
   57|       |
   58|  9.51k|    c->save_tmvs = dav1d_save_tmvs_avx2;
   59|  9.51k|    c->splat_mv = dav1d_splat_mv_avx2;
   60|       |
   61|  9.51k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (61:9): [True: 9.51k, False: 0]
  ------------------
   62|       |
   63|      0|    c->save_tmvs = dav1d_save_tmvs_avx512icl;
   64|      0|    c->splat_mv = dav1d_splat_mv_avx512icl;
   65|      0|#endif
   66|      0|}

LLVMFuzzerInitialize:
   59|      2|int LLVMFuzzerInitialize(int *argc, char ***argv) {
   60|      2|    int i = 1;
   61|     11|    for (; i < *argc; i++) {
  ------------------
  |  Branch (61:12): [True: 9, False: 2]
  ------------------
   62|      9|        if (!strcmp((*argv)[i], "--cpumask")) {
  ------------------
  |  Branch (62:13): [True: 0, False: 9]
  ------------------
   63|      0|            const char * cpumask = (*argv)[i+1];
   64|      0|            if (cpumask) {
  ------------------
  |  Branch (64:17): [True: 0, False: 0]
  ------------------
   65|      0|                char *end;
   66|      0|                unsigned res;
   67|      0|                if (!strncmp(cpumask, "0x", 2)) {
  ------------------
  |  Branch (67:21): [True: 0, False: 0]
  ------------------
   68|      0|                    cpumask += 2;
   69|      0|                    res = (unsigned) strtoul(cpumask, &end, 16);
   70|      0|                } else {
   71|      0|                    res = (unsigned) strtoul(cpumask, &end, 0);
   72|      0|                }
   73|      0|                if (end != cpumask && !end[0]) {
  ------------------
  |  Branch (73:21): [True: 0, False: 0]
  |  Branch (73:39): [True: 0, False: 0]
  ------------------
   74|      0|                    dav1d_set_cpu_flags_mask(res);
   75|      0|                }
   76|      0|            }
   77|      0|            break;
   78|      0|        }
   79|      9|    }
   80|       |
   81|      2|    for (; i < *argc - 2; i++) {
  ------------------
  |  Branch (81:12): [True: 0, False: 2]
  ------------------
   82|      0|        (*argv)[i] = (*argv)[i + 2];
   83|      0|    }
   84|       |
   85|      2|    *argc = i;
   86|       |
   87|      2|    return 0;
   88|      2|}
LLVMFuzzerTestOneInput:
   94|  9.52k|{
   95|  9.52k|    Dav1dSettings settings = { 0 };
   96|  9.52k|    Dav1dContext * ctx = NULL;
   97|  9.52k|    Dav1dPicture pic;
   98|  9.52k|    const uint8_t *ptr = data;
   99|  9.52k|    int have_seq_hdr = 0;
  100|  9.52k|    int err;
  101|       |
  102|  9.52k|    dav1d_version();
  103|       |
  104|  9.52k|    if (size < 32) goto end;
  ------------------
  |  Branch (104:9): [True: 8, False: 9.51k]
  ------------------
  105|       |#ifdef DAV1D_ALLOC_FAIL
  106|       |    unsigned h = djb_xor(ptr, 32);
  107|       |    unsigned seed = h;
  108|       |    unsigned probability = h > (RAND_MAX >> 5) ? RAND_MAX >> 5 : h;
  109|       |    int max_frame_delay = (h & 0xf) + 1;
  110|       |    int n_threads = ((h >> 4) & 0x7) + 1;
  111|       |    if (max_frame_delay > 5) max_frame_delay = 1;
  112|       |    if (n_threads > 3) n_threads = 1;
  113|       |#endif
  114|  9.51k|    ptr += 32; // skip ivf header
  115|       |
  116|  9.51k|    dav1d_default_settings(&settings);
  117|       |
  118|       |#ifdef DAV1D_MT_FUZZING
  119|       |    settings.max_frame_delay = settings.n_threads = 4;
  120|       |#elif defined(DAV1D_ALLOC_FAIL)
  121|       |    settings.max_frame_delay = max_frame_delay;
  122|       |    settings.n_threads = n_threads;
  123|       |    dav1d_setup_alloc_fail(seed, probability);
  124|       |#else
  125|  9.51k|    settings.max_frame_delay = settings.n_threads = 1;
  126|  9.51k|#endif
  127|  9.51k|#if defined(DAV1D_FUZZ_MAX_SIZE)
  128|  9.51k|    settings.frame_size_limit = DAV1D_FUZZ_MAX_SIZE;
  ------------------
  |  |   56|  9.51k|#define DAV1D_FUZZ_MAX_SIZE 4096 * 4096
  ------------------
  129|  9.51k|#endif
  130|       |
  131|  9.51k|    err = dav1d_open(&ctx, &settings);
  132|  9.51k|    if (err < 0) goto end;
  ------------------
  |  Branch (132:9): [True: 0, False: 9.51k]
  ------------------
  133|       |
  134|  86.0k|    while (ptr <= data + size - 12) {
  ------------------
  |  Branch (134:12): [True: 76.9k, False: 9.00k]
  ------------------
  135|  76.9k|        Dav1dData buf;
  136|  76.9k|        uint8_t *p;
  137|       |
  138|  76.9k|        size_t frame_size = r32le(ptr);
  139|  76.9k|        ptr += 12;
  140|       |
  141|  76.9k|        if (frame_size > size || ptr > data + size - frame_size)
  ------------------
  |  Branch (141:13): [True: 382, False: 76.6k]
  |  Branch (141:34): [True: 131, False: 76.4k]
  ------------------
  142|    513|            break;
  143|       |
  144|  76.4k|        if (!frame_size) continue;
  ------------------
  |  Branch (144:13): [True: 280, False: 76.2k]
  ------------------
  145|       |
  146|  76.2k|        if (!have_seq_hdr) {
  ------------------
  |  Branch (146:13): [True: 13.4k, False: 62.7k]
  ------------------
  147|  13.4k|            Dav1dSequenceHeader seq;
  148|  13.4k|            int err = dav1d_parse_sequence_header(&seq, ptr, frame_size);
  149|       |            // skip frames until we see a sequence header
  150|  13.4k|            if  (err != 0) {
  ------------------
  |  Branch (150:18): [True: 4.28k, False: 9.16k]
  ------------------
  151|  4.28k|                ptr += frame_size;
  152|  4.28k|                continue;
  153|  4.28k|            }
  154|  9.16k|            have_seq_hdr = 1;
  155|  9.16k|        }
  156|       |
  157|       |        // copy frame data to a new buffer to catch reads past the end of input
  158|  71.9k|        p = dav1d_data_create(&buf, frame_size);
  159|  71.9k|        if (!p) goto cleanup;
  ------------------
  |  Branch (159:13): [True: 0, False: 71.9k]
  ------------------
  160|  71.9k|        memcpy(p, ptr, frame_size);
  161|  71.9k|        ptr += frame_size;
  162|       |
  163|  75.6k|        do {
  164|  75.6k|            if ((err = dav1d_send_data(ctx, &buf)) < 0) {
  ------------------
  |  Branch (164:17): [True: 53.1k, False: 22.5k]
  ------------------
  165|  53.1k|                if (err != DAV1D_ERR(EAGAIN))
  ------------------
  |  |   58|  53.1k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (165:21): [True: 49.2k, False: 3.87k]
  ------------------
  166|  49.2k|                    break;
  167|  53.1k|            }
  168|  26.4k|            memset(&pic, 0, sizeof(pic));
  169|  26.4k|            err = dav1d_get_picture(ctx, &pic);
  170|  26.4k|            if (err == 0) {
  ------------------
  |  Branch (170:17): [True: 19.8k, False: 6.57k]
  ------------------
  171|  19.8k|                dav1d_picture_unref(&pic);
  172|  19.8k|            } else if (err != DAV1D_ERR(EAGAIN)) {
  ------------------
  |  |   58|  6.57k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (172:24): [True: 98, False: 6.47k]
  ------------------
  173|     98|                break;
  174|     98|            }
  175|  26.4k|        } while (buf.sz > 0);
  ------------------
  |  Branch (175:18): [True: 3.77k, False: 22.5k]
  ------------------
  176|       |
  177|  71.9k|        if (buf.sz > 0)
  ------------------
  |  Branch (177:13): [True: 49.3k, False: 22.5k]
  ------------------
  178|  49.3k|            dav1d_data_unref(&buf);
  179|  71.9k|    }
  180|       |
  181|  9.51k|    memset(&pic, 0, sizeof(pic));
  182|  9.51k|    if ((err = dav1d_get_picture(ctx, &pic)) == 0) {
  ------------------
  |  Branch (182:9): [True: 238, False: 9.28k]
  ------------------
  183|       |        /* Test calling dav1d_picture_unref() after dav1d_close() */
  184|  1.77k|        do {
  185|  1.77k|            Dav1dPicture pic2 = { 0 };
  186|  1.77k|            if ((err = dav1d_get_picture(ctx, &pic2)) == 0)
  ------------------
  |  Branch (186:17): [True: 1.40k, False: 370]
  ------------------
  187|  1.40k|                dav1d_picture_unref(&pic2);
  188|  1.77k|        } while (err != DAV1D_ERR(EAGAIN));
  ------------------
  |  |   58|  1.77k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (188:18): [True: 1.53k, False: 238]
  ------------------
  189|       |
  190|    238|        dav1d_close(&ctx);
  191|    238|        dav1d_picture_unref(&pic);
  192|    238|        return 0;
  193|    238|    }
  194|       |
  195|  9.28k|cleanup:
  196|  9.28k|    dav1d_close(&ctx);
  197|  9.28k|end:
  198|  9.28k|    return 0;
  199|  9.28k|}
dav1d_fuzzer.c:r32le:
   52|  76.9k|static unsigned r32le(const uint8_t *const p) {
   53|  76.9k|    return ((uint32_t)p[3] << 24U) | (p[2] << 16U) | (p[1] << 8U) | p[0];
   54|  76.9k|}

