obu.c:clz:
  186|  5.23k|static inline int clz(const unsigned int mask) {
  187|  5.23k|    return __builtin_clz(mask);
  188|  5.23k|}
decode.c:ctz:
  182|   108k|static inline int ctz(const unsigned int mask) {
  183|   108k|    return __builtin_ctz(mask);
  184|   108k|}
decode.c:clz:
  186|  6.75M|static inline int clz(const unsigned int mask) {
  187|  6.75M|    return __builtin_clz(mask);
  188|  6.75M|}
getbits.c:clz:
  186|   110k|static inline int clz(const unsigned int mask) {
  187|   110k|    return __builtin_clz(mask);
  188|   110k|}
lf_mask.c:clz:
  186|  1.95M|static inline int clz(const unsigned int mask) {
  187|  1.95M|    return __builtin_clz(mask);
  188|  1.95M|}
warpmv.c:clz:
  186|  81.4k|static inline int clz(const unsigned int mask) {
  187|  81.4k|    return __builtin_clz(mask);
  188|  81.4k|}
warpmv.c:clzll:
  190|  75.8k|static inline int clzll(const unsigned long long mask) {
  191|  75.8k|    return __builtin_clzll(mask);
  192|  75.8k|}
looprestoration_tmpl.c:clz:
  186|   433k|static inline int clz(const unsigned int mask) {
  187|   433k|    return __builtin_clz(mask);
  188|   433k|}
recon_tmpl.c:clz:
  186|  15.9M|static inline int clz(const unsigned int mask) {
  187|  15.9M|    return __builtin_clz(mask);
  188|  15.9M|}
cdef_apply_tmpl.c:clz:
  186|   412k|static inline int clz(const unsigned int mask) {
  187|   412k|    return __builtin_clz(mask);
  188|   412k|}
ipred_prepare_tmpl.c:clz:
  186|  6.82M|static inline int clz(const unsigned int mask) {
  187|  6.82M|    return __builtin_clz(mask);
  188|  6.82M|}

fg_apply_tmpl.c:PXSTRIDE:
   79|  42.1k|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  42.1k|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 42.1k, False: 0]
  ------------------
   81|  42.1k|    return x >> 1;
   82|  42.1k|}
itx_tmpl.c:PXSTRIDE:
   79|  4.82M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  4.82M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 4.82M, False: 0]
  ------------------
   81|  4.82M|    return x >> 1;
   82|  4.82M|}
looprestoration_tmpl.c:PXSTRIDE:
   79|  1.15M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  1.15M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 1.15M, False: 0]
  ------------------
   81|  1.15M|    return x >> 1;
   82|  1.15M|}
recon_tmpl.c:PXSTRIDE:
   79|  13.5M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  13.5M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 13.5M, False: 0]
  ------------------
   81|  13.5M|    return x >> 1;
   82|  13.5M|}
cdef_apply_tmpl.c:PXSTRIDE:
   79|  3.02M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  3.02M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 3.02M, False: 0]
  ------------------
   81|  3.02M|    return x >> 1;
   82|  3.02M|}
ipred_prepare_tmpl.c:PXSTRIDE:
   79|  74.7M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  74.7M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 74.7M, False: 0]
  ------------------
   81|  74.7M|    return x >> 1;
   82|  74.7M|}
ipred_prepare_tmpl.c:pixel_set:
   66|  1.14M|static inline void pixel_set(pixel *const dst, const int val, const int num) {
   67|  20.1M|    for (int n = 0; n < num; n++)
  ------------------
  |  Branch (67:21): [True: 19.0M, False: 1.14M]
  ------------------
   68|  19.0M|        dst[n] = val;
   69|  1.14M|}
lf_apply_tmpl.c:PXSTRIDE:
   79|  2.45M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  2.45M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 2.45M, False: 0]
  ------------------
   81|  2.45M|    return x >> 1;
   82|  2.45M|}
lr_apply_tmpl.c:PXSTRIDE:
   79|   455k|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|   455k|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 455k, False: 0]
  ------------------
   81|   455k|    return x >> 1;
   82|   455k|}

lib.c:umin:
   47|  9.69k|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  9.69k|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 0, False: 9.69k]
  ------------------
   49|  9.69k|}
obu.c:ulog2:
   67|  5.23k|static inline int ulog2(const unsigned v) {
   68|  5.23k|    return 31 ^ clz(v);
   69|  5.23k|}
obu.c:imin:
   39|   297k|static inline int imin(const int a, const int b) {
   40|   297k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 230k, False: 66.1k]
  ------------------
   41|   297k|}
obu.c:imax:
   35|   219k|static inline int imax(const int a, const int b) {
   36|   219k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 23.0k, False: 196k]
  ------------------
   37|   219k|}
obu.c:iclip_u8:
   55|  85.3k|static inline int iclip_u8(const int v) {
   56|  85.3k|    return iclip(v, 0, 255);
   57|  85.3k|}
obu.c:iclip:
   51|  85.3k|static inline int iclip(const int v, const int min, const int max) {
   52|  85.3k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 2.19k, False: 83.1k]
  |  Branch (52:28): [True: 878, False: 82.2k]
  ------------------
   53|  85.3k|}
refmvs.c:imin:
   39|  22.2M|static inline int imin(const int a, const int b) {
   40|  22.2M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 10.4M, False: 11.7M]
  ------------------
   41|  22.2M|}
refmvs.c:apply_sign:
   59|   390k|static inline int apply_sign(const int v, const int s) {
   60|   390k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 139k, False: 251k]
  ------------------
   61|   390k|}
refmvs.c:imax:
   35|  11.0M|static inline int imax(const int a, const int b) {
   36|  11.0M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 1.92M, False: 9.16M]
  ------------------
   37|  11.0M|}
refmvs.c:iclip:
   51|  9.82M|static inline int iclip(const int v, const int min, const int max) {
   52|  9.82M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 201k, False: 9.62M]
  |  Branch (52:28): [True: 191k, False: 9.43M]
  ------------------
   53|  9.82M|}
wedge.c:imax:
   35|    256|static inline int imax(const int a, const int b) {
   36|    256|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 128, False: 128]
  ------------------
   37|    256|}
wedge.c:imin:
   39|  2.48k|static inline int imin(const int a, const int b) {
   40|  2.48k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 1.41k, False: 1.06k]
  ------------------
   41|  2.48k|}
fg_apply_tmpl.c:imin:
   39|  15.9k|static inline int imin(const int a, const int b) {
   40|  15.9k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 5.77k, False: 10.1k]
  ------------------
   41|  15.9k|}
cdf.c:imin:
   39|   131k|static inline int imin(const int a, const int b) {
   40|   131k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 32.8k, False: 98.6k]
  ------------------
   41|   131k|}
decode.c:iclip:
   51|  1.08M|static inline int iclip(const int v, const int min, const int max) {
   52|  1.08M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 75.5k, False: 1.01M]
  |  Branch (52:28): [True: 21.4k, False: 992k]
  ------------------
   53|  1.08M|}
decode.c:apply_sign:
   59|   118k|static inline int apply_sign(const int v, const int s) {
   60|   118k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 55.7k, False: 62.6k]
  ------------------
   61|   118k|}
decode.c:ulog2:
   67|  6.75M|static inline int ulog2(const unsigned v) {
   68|  6.75M|    return 31 ^ clz(v);
   69|  6.75M|}
decode.c:imax:
   35|  6.26M|static inline int imax(const int a, const int b) {
   36|  6.26M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.35M, False: 3.90M]
  ------------------
   37|  6.26M|}
decode.c:imin:
   39|  15.6M|static inline int imin(const int a, const int b) {
   40|  15.6M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 9.49M, False: 6.17M]
  ------------------
   41|  15.6M|}
decode.c:iclip_u8:
   55|   733k|static inline int iclip_u8(const int v) {
   56|   733k|    return iclip(v, 0, 255);
   57|   733k|}
getbits.c:ulog2:
   67|   110k|static inline int ulog2(const unsigned v) {
   68|   110k|    return 31 ^ clz(v);
   69|   110k|}
getbits.c:inv_recenter:
   75|  40.7k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|  40.7k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 2.17k, False: 38.6k]
  ------------------
   77|  2.17k|        return v;
   78|  38.6k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 24.7k, False: 13.8k]
  ------------------
   79|  24.7k|        return (v >> 1) + r;
   80|  13.8k|    else
   81|  13.8k|        return r - ((v + 1) >> 1);
   82|  40.7k|}
lf_mask.c:imin:
   39|  26.7M|static inline int imin(const int a, const int b) {
   40|  26.7M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 2.68M, False: 24.0M]
  ------------------
   41|  26.7M|}
lf_mask.c:ulog2:
   67|  1.95M|static inline int ulog2(const unsigned v) {
   68|  1.95M|    return 31 ^ clz(v);
   69|  1.95M|}
lf_mask.c:imax:
   35|   943k|static inline int imax(const int a, const int b) {
   36|   943k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 887k, False: 56.2k]
  ------------------
   37|   943k|}
lf_mask.c:iclip:
   51|  2.79M|static inline int iclip(const int v, const int min, const int max) {
   52|  2.79M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 262k, False: 2.52M]
  |  Branch (52:28): [True: 60.4k, False: 2.46M]
  ------------------
   53|  2.79M|}
msac.c:inv_recenter:
   75|   134k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|   134k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 31.5k, False: 102k]
  ------------------
   77|  31.5k|        return v;
   78|   102k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 48.0k, False: 54.8k]
  ------------------
   79|  48.0k|        return (v >> 1) + r;
   80|  54.8k|    else
   81|  54.8k|        return r - ((v + 1) >> 1);
   82|   134k|}
warpmv.c:apply_sign:
   59|   407k|static inline int apply_sign(const int v, const int s) {
   60|   407k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 87.3k, False: 319k]
  ------------------
   61|   407k|}
warpmv.c:ulog2:
   67|  81.4k|static inline int ulog2(const unsigned v) {
   68|  81.4k|    return 31 ^ clz(v);
   69|  81.4k|}
warpmv.c:apply_sign64:
   63|   542k|static inline int apply_sign64(const int v, const int64_t s) {
   64|   542k|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 53.4k, False: 488k]
  ------------------
   65|   542k|}
warpmv.c:iclip:
   51|   780k|static inline int iclip(const int v, const int min, const int max) {
   52|   780k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 13.3k, False: 767k]
  |  Branch (52:28): [True: 21.8k, False: 745k]
  ------------------
   53|   780k|}
warpmv.c:u64log2:
   71|  75.8k|static inline int u64log2(const uint64_t v) {
   72|  75.8k|    return 63 ^ clzll(v);
   73|  75.8k|}
itx_tmpl.c:iclip:
   51|   321M|static inline int iclip(const int v, const int min, const int max) {
   52|   321M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 15.4M, False: 305M]
  |  Branch (52:28): [True: 16.8M, False: 288M]
  ------------------
   53|   321M|}
itx_tmpl.c:imin:
   39|   208k|static inline int imin(const int a, const int b) {
   40|   208k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 44.2k, False: 163k]
  ------------------
   41|   208k|}
looprestoration_tmpl.c:iclip:
   51|  50.6M|static inline int iclip(const int v, const int min, const int max) {
   52|  50.6M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 5.30k, False: 50.6M]
  |  Branch (52:28): [True: 4.53k, False: 50.6M]
  ------------------
   53|  50.6M|}
looprestoration_tmpl.c:imax:
   35|  51.0M|static inline int imax(const int a, const int b) {
   36|  51.0M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 3.55M, False: 47.4M]
  ------------------
   37|  51.0M|}
looprestoration_tmpl.c:umin:
   47|  51.0M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  51.0M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 51.0M, False: 18.2k]
  ------------------
   49|  51.0M|}
recon_tmpl.c:ulog2:
   67|  15.9M|static inline int ulog2(const unsigned v) {
   68|  15.9M|    return 31 ^ clz(v);
   69|  15.9M|}
recon_tmpl.c:imin:
   39|  53.3M|static inline int imin(const int a, const int b) {
   40|  53.3M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 44.4M, False: 8.90M]
  ------------------
   41|  53.3M|}
recon_tmpl.c:imax:
   35|  3.71M|static inline int imax(const int a, const int b) {
   36|  3.71M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.11M, False: 1.60M]
  ------------------
   37|  3.71M|}
recon_tmpl.c:umin:
   47|   166M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|   166M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 93.4M, False: 72.7M]
  ------------------
   49|   166M|}
recon_tmpl.c:apply_sign64:
   63|  2.50M|static inline int apply_sign64(const int v, const int64_t s) {
   64|  2.50M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 208k, False: 2.29M]
  ------------------
   65|  2.50M|}
recon_tmpl.c:iclip:
   51|   757k|static inline int iclip(const int v, const int min, const int max) {
   52|   757k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 55.5k, False: 701k]
  |  Branch (52:28): [True: 9.03k, False: 692k]
  ------------------
   53|   757k|}
itx_1d.c:iclip:
   51|   831M|static inline int iclip(const int v, const int min, const int max) {
   52|   831M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 8.83M, False: 822M]
  |  Branch (52:28): [True: 8.68M, False: 814M]
  ------------------
   53|   831M|}
scan.c:imax:
   35|  3.34k|static inline int imax(const int a, const int b) {
   36|  3.34k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.82k, False: 523]
  ------------------
   37|  3.34k|}
cdef_apply_tmpl.c:imin:
   39|  1.84M|static inline int imin(const int a, const int b) {
   40|  1.84M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 1.60M, False: 241k]
  ------------------
   41|  1.84M|}
cdef_apply_tmpl.c:ulog2:
   67|   412k|static inline int ulog2(const unsigned v) {
   68|   412k|    return 31 ^ clz(v);
   69|   412k|}
ipred_prepare_tmpl.c:imin:
   39|  21.6M|static inline int imin(const int a, const int b) {
   40|  21.6M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 19.1M, False: 2.48M]
  ------------------
   41|  21.6M|}
lf_apply_tmpl.c:imin:
   39|   653k|static inline int imin(const int a, const int b) {
   40|   653k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 202k, False: 450k]
  ------------------
   41|   653k|}
lr_apply_tmpl.c:imin:
   39|  85.5k|static inline int imin(const int a, const int b) {
   40|  85.5k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 36.4k, False: 49.0k]
  ------------------
   41|  85.5k|}

dav1d_cdef_brow_8bpc:
  102|  62.8k|{
  103|  62.8k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|  62.8k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [True: 62.8k, Folded]
  ------------------
  105|  62.8k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|  62.8k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 60.4k, False: 2.44k]
  ------------------
  107|  62.8k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|  62.8k|    const int sbsz = 16;
  109|  62.8k|    const int sb64w = f->sb128w << 1;
  110|  62.8k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|  62.8k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|  62.8k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|  62.8k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|  62.8k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|  62.8k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|  62.8k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|  62.8k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|  62.8k|    const int have_tt = f->c->n_tc > 1;
  119|  62.8k|    const int sb128 = f->seq_hdr->sb128;
  120|  62.8k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|  62.8k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  62.8k|#define PXSTRIDE(x) (x)
  ------------------
  122|  62.8k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  62.8k|#define PXSTRIDE(x) (x)
  ------------------
  123|       |
  124|   363k|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 300k, False: 62.8k]
  ------------------
  125|   300k|        const int tf = tc->top_pre_cdef_toggle;
  126|   300k|        const int by_idx = (by & 30) >> 1;
  127|   300k|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 2.21k, False: 298k]
  ------------------
  128|       |
  129|   300k|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 300k, False: 0]
  |  Branch (129:26): [True: 0, False: 0]
  |  Branch (129:41): [True: 0, False: 0]
  ------------------
  130|   300k|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 298k, False: 2.21k]
  ------------------
  131|   298k|        {
  132|       |            // backup pre-filter data for next iteration
  133|   298k|            pixel *const cdef_top_bak[3] = {
  134|   298k|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|   298k|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|   298k|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|   298k|            };
  138|   298k|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|   298k|        }
  140|       |
  141|   300k|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|   300k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   300k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|   300k|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|   300k|        edges &= ~CDEF_HAVE_LEFT;
  144|   300k|        edges |= CDEF_HAVE_RIGHT;
  145|   300k|        enum Backup2x8Flags prev_flag = 0;
  146|  1.19M|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 896k, False: 300k]
  ------------------
  147|   896k|            const int sb128x = sbx >> 1;
  148|   896k|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|   896k|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|   896k|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 672k, False: 223k]
  ------------------
  151|   223k|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 85.4k, False: 138k]
  ------------------
  152|  85.4k|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 77.4k, False: 8.02k]
  ------------------
  153|   750k|            {
  154|   750k|                prev_flag = 0;
  155|   750k|                goto next_sb;
  156|   750k|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|   146k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|   146k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|   146k|                                                    noskip_row[0][0];
  162|       |
  163|   146k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|   146k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|   146k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|   146k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|   146k|            int y_sec_lvl = y_lvl & 3;
  169|   146k|            y_sec_lvl += y_sec_lvl == 3;
  170|   146k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|   146k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|   146k|            int uv_sec_lvl = uv_lvl & 3;
  174|   146k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|   146k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|   146k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|  1.18M|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 1.03M, False: 146k]
  ------------------
  179|  1.03M|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|  1.03M|            {
  181|  1.03M|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 28.3k, False: 1.00M]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|  1.03M|                const uint32_t bx_mask = 3U << (bx & 30);
  186|  1.03M|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 138k, False: 895k]
  ------------------
  187|   138k|                    prev_flag = 0;
  188|   138k|                    goto next_b;
  189|   138k|                }
  190|   895k|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|   895k|                prev_flag = flag;
  192|   895k|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 53.6k, False: 841k]
  |  Branch (192:32): [True: 26.7k, False: 26.9k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|  26.7k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|  26.7k|                }
  197|   895k|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 869k, False: 25.3k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|   869k|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|   869k|                }
  201|       |
  202|   895k|                int dir;
  203|   895k|                unsigned variance;
  204|   895k|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 727k, False: 168k]
  |  Branch (204:34): [True: 101k, False: 66.5k]
  ------------------
  205|   828k|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|   828k|                                        &variance HIGHBD_CALL_SUFFIX);
  207|       |
  208|   895k|                const pixel *top, *bot;
  209|   895k|                ptrdiff_t offset;
  210|       |
  211|   895k|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 895k, False: 0]
  ------------------
  212|      0|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 0, False: 0]
  |  Branch (212:36): [True: 0, False: 0]
  ------------------
  213|      0|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 0, False: 0]
  ------------------
  214|      0|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|      0|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|      0|                    } else {
  217|      0|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|      0|                        top = &f->lf.lr_lpf_line[0][offset];
  219|      0|                    }
  220|      0|                    bot = bptrs[0] + 8 * y_stride;
  221|      0|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 0, False: 0]
  |  Branch (221:44): [True: 0, False: 0]
  ------------------
  222|      0|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|      0|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 0, False: 0]
  ------------------
  224|      0|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|      0|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|      0|                    } else {
  227|      0|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|      0|                        offset = line * y_stride + bx * 4;
  229|      0|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|      0|                    }
  231|      0|                } else {
  232|   895k|            st_y:;
  233|   895k|                    offset = sby * 4 * y_stride;
  234|   895k|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|   895k|                    bot = bptrs[0] + 8 * y_stride;
  236|   895k|                }
  237|   895k|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 727k, False: 168k]
  ------------------
  238|   727k|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|   727k|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 441k, False: 285k]
  |  Branch (239:42): [True: 196k, False: 88.2k]
  ------------------
  240|   638k|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|   638k|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|   638k|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  243|   727k|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 112k, False: 55.1k]
  ------------------
  244|   112k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|   112k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|   112k|                                    edges HIGHBD_CALL_SUFFIX);
  247|       |
  248|   895k|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 125k, False: 769k]
  ------------------
  249|   895k|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 769k, False: 0]
  ------------------
  250|       |
  251|   769k|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 689k, False: 80.3k]
  ------------------
  252|  2.30M|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 1.53M, False: 769k]
  ------------------
  253|  1.53M|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 1.53M, False: 0]
  ------------------
  254|      0|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 0, False: 0]
  |  Branch (254:40): [True: 0, False: 0]
  ------------------
  255|      0|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 0, False: 0]
  ------------------
  256|      0|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|      0|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|      0|                        } else {
  259|      0|                            const int line = sby * (4 << sb128) - 4;
  260|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|      0|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|      0|                        }
  263|      0|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|      0|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 0, False: 0]
  |  Branch (264:48): [True: 0, False: 0]
  ------------------
  265|      0|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|      0|                                                     (bx * 4 >> ss_hor);
  267|      0|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|      0|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 0, False: 0]
  ------------------
  269|      0|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|      0|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|      0|                        } else {
  272|      0|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|      0|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|      0|                        }
  276|      0|                    } else {
  277|  1.53M|                st_uv:;
  278|  1.53M|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|  1.53M|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|  1.53M|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|  1.53M|                    }
  282|  1.53M|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|  1.53M|                                         lr_bak[bit][pl], top, bot,
  284|  1.53M|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|  1.53M|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  286|  1.53M|                }
  287|       |
  288|   895k|            skip_uv:
  289|   895k|                bit ^= 1;
  290|       |
  291|  1.03M|            next_b:
  292|  1.03M|                bptrs[0] += 8;
  293|  1.03M|                bptrs[1] += 8 >> ss_hor;
  294|  1.03M|                bptrs[2] += 8 >> ss_hor;
  295|  1.03M|            }
  296|       |
  297|   896k|        next_sb:
  298|   896k|            iptrs[0] += sbsz * 4;
  299|   896k|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|   896k|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|   896k|        }
  302|       |
  303|   300k|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   300k|#define PXSTRIDE(x) (x)
  ------------------
  304|   300k|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   300k|#define PXSTRIDE(x) (x)
  ------------------
  305|   300k|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   300k|#define PXSTRIDE(x) (x)
  ------------------
  306|   300k|        tc->top_pre_cdef_toggle ^= 1;
  307|   300k|    }
  308|  62.8k|}
cdef_apply_tmpl.c:backup2lines:
   44|   560k|{
   45|   560k|    const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
  ------------------
  |  |   53|   560k|#define PXSTRIDE(x) (x)
  ------------------
   46|   560k|    if (y_stride < 0)
  ------------------
  |  Branch (46:9): [True: 0, False: 560k]
  ------------------
   47|      0|        pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   48|   560k|    else
   49|   560k|        pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
  ------------------
  |  |   47|   560k|#define pixel_copy memcpy
  ------------------
   50|       |
   51|   560k|    if (layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (51:9): [True: 226k, False: 333k]
  ------------------
   52|   226k|        const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
  ------------------
  |  |   53|   226k|#define PXSTRIDE(x) (x)
  ------------------
   53|   226k|        if (uv_stride < 0) {
  ------------------
  |  Branch (53:13): [True: 0, False: 226k]
  ------------------
   54|      0|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
  ------------------
  |  Branch (54:32): [True: 0, False: 0]
  ------------------
   55|      0|            pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   56|      0|            pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   57|   226k|        } else {
   58|   226k|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
  ------------------
  |  Branch (58:32): [True: 111k, False: 115k]
  ------------------
   59|   226k|            pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|   226k|#define pixel_copy memcpy
  ------------------
   60|   226k|            pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|   226k|#define pixel_copy memcpy
  ------------------
   61|   226k|        }
   62|   226k|    }
   63|   560k|}
cdef_apply_tmpl.c:backup2x8:
   70|  1.08M|{
   71|  1.08M|    ptrdiff_t y_off = 0;
   72|  1.08M|    if (flag & BACKUP_2X8_Y) {
  ------------------
  |  Branch (72:9): [True: 1.00M, False: 74.5k]
  ------------------
   73|  9.07M|        for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
  ------------------
  |  |   53|  8.06M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (73:25): [True: 8.06M, False: 1.00M]
  ------------------
   74|  8.06M|            pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  8.06M|#define pixel_copy memcpy
  ------------------
   75|  1.00M|    }
   76|       |
   77|  1.08M|    if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
  ------------------
  |  Branch (77:9): [True: 105k, False: 977k]
  |  Branch (77:46): [True: 119k, False: 858k]
  ------------------
   78|   224k|        return;
   79|       |
   80|   858k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   81|   858k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   82|       |
   83|   858k|    x_off >>= ss_hor;
   84|   858k|    y_off = 0;
   85|  4.80M|    for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
  ------------------
  |  |   53|  3.94M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (85:21): [True: 3.94M, False: 858k]
  ------------------
   86|  3.94M|        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  3.94M|#define pixel_copy memcpy
  ------------------
   87|  3.94M|        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  3.94M|#define pixel_copy memcpy
  ------------------
   88|  3.94M|    }
   89|   858k|}
cdef_apply_tmpl.c:adjust_strength:
   91|   883k|static int adjust_strength(const int strength, const unsigned var) {
   92|   883k|    if (!var) return 0;
  ------------------
  |  Branch (92:9): [True: 395k, False: 487k]
  ------------------
   93|   487k|    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
  ------------------
  |  Branch (93:19): [True: 412k, False: 75.4k]
  ------------------
   94|   487k|    return (strength * (4 + i) + 8) >> 4;
   95|   883k|}
dav1d_cdef_brow_16bpc:
  102|  49.6k|{
  103|  49.6k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|  49.6k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [Folded, False: 49.6k]
  ------------------
  105|  49.6k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|  49.6k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 47.2k, False: 2.40k]
  ------------------
  107|  49.6k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|  49.6k|    const int sbsz = 16;
  109|  49.6k|    const int sb64w = f->sb128w << 1;
  110|  49.6k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|  49.6k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|  49.6k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|  49.6k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|  49.6k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|  49.6k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|  49.6k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|  49.6k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|  49.6k|    const int have_tt = f->c->n_tc > 1;
  119|  49.6k|    const int sb128 = f->seq_hdr->sb128;
  120|  49.6k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|  49.6k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  122|  49.6k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  123|       |
  124|   313k|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 263k, False: 49.6k]
  ------------------
  125|   263k|        const int tf = tc->top_pre_cdef_toggle;
  126|   263k|        const int by_idx = (by & 30) >> 1;
  127|   263k|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 2.18k, False: 261k]
  ------------------
  128|       |
  129|   263k|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 263k, False: 0]
  |  Branch (129:26): [True: 0, False: 0]
  |  Branch (129:41): [True: 0, False: 0]
  ------------------
  130|   263k|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 261k, False: 2.18k]
  ------------------
  131|   261k|        {
  132|       |            // backup pre-filter data for next iteration
  133|   261k|            pixel *const cdef_top_bak[3] = {
  134|   261k|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|   261k|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|   261k|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|   261k|            };
  138|   261k|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|   261k|        }
  140|       |
  141|   263k|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|   263k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   263k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|   263k|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|   263k|        edges &= ~CDEF_HAVE_LEFT;
  144|   263k|        edges |= CDEF_HAVE_RIGHT;
  145|   263k|        enum Backup2x8Flags prev_flag = 0;
  146|   913k|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 650k, False: 263k]
  ------------------
  147|   650k|            const int sb128x = sbx >> 1;
  148|   650k|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|   650k|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|   650k|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 522k, False: 128k]
  ------------------
  151|   128k|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 92.2k, False: 35.7k]
  ------------------
  152|  92.2k|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 87.0k, False: 5.27k]
  ------------------
  153|   609k|            {
  154|   609k|                prev_flag = 0;
  155|   609k|                goto next_sb;
  156|   609k|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|  41.0k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|  41.0k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|  41.0k|                                                    noskip_row[0][0];
  162|       |
  163|  41.0k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|  41.0k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|  41.0k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|  41.0k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|  41.0k|            int y_sec_lvl = y_lvl & 3;
  169|  41.0k|            y_sec_lvl += y_sec_lvl == 3;
  170|  41.0k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|  41.0k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|  41.0k|            int uv_sec_lvl = uv_lvl & 3;
  174|  41.0k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|  41.0k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|  41.0k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|   254k|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 213k, False: 41.0k]
  ------------------
  179|   213k|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|   213k|            {
  181|   213k|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 15.0k, False: 198k]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|   213k|                const uint32_t bx_mask = 3U << (bx & 30);
  186|   213k|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 15.0k, False: 198k]
  ------------------
  187|  15.0k|                    prev_flag = 0;
  188|  15.0k|                    goto next_b;
  189|  15.0k|                }
  190|   198k|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|   198k|                prev_flag = flag;
  192|   198k|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 17.0k, False: 181k]
  |  Branch (192:32): [True: 2.23k, False: 14.8k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|  2.23k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|  2.23k|                }
  197|   198k|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 183k, False: 14.8k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|   183k|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|   183k|                }
  201|       |
  202|   198k|                int dir;
  203|   198k|                unsigned variance;
  204|   198k|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 156k, False: 42.4k]
  |  Branch (204:34): [True: 25.0k, False: 17.3k]
  ------------------
  205|   181k|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|   181k|                                        &variance HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   181k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  207|       |
  208|   198k|                const pixel *top, *bot;
  209|   198k|                ptrdiff_t offset;
  210|       |
  211|   198k|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 198k, False: 0]
  ------------------
  212|      0|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 0, False: 0]
  |  Branch (212:36): [True: 0, False: 0]
  ------------------
  213|      0|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 0, False: 0]
  ------------------
  214|      0|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|      0|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|      0|                    } else {
  217|      0|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|      0|                        top = &f->lf.lr_lpf_line[0][offset];
  219|      0|                    }
  220|      0|                    bot = bptrs[0] + 8 * y_stride;
  221|      0|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 0, False: 0]
  |  Branch (221:44): [True: 0, False: 0]
  ------------------
  222|      0|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|      0|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 0, False: 0]
  ------------------
  224|      0|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|      0|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|      0|                    } else {
  227|      0|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|      0|                        offset = line * y_stride + bx * 4;
  229|      0|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|      0|                    }
  231|      0|                } else {
  232|   198k|            st_y:;
  233|   198k|                    offset = sby * 4 * y_stride;
  234|   198k|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|   198k|                    bot = bptrs[0] + 8 * y_stride;
  236|   198k|                }
  237|   198k|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 156k, False: 42.4k]
  ------------------
  238|   156k|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|   156k|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 12.5k, False: 143k]
  |  Branch (239:42): [True: 76.3k, False: 67.1k]
  ------------------
  240|  88.9k|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|  88.9k|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|  88.9k|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  88.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  243|   156k|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 23.1k, False: 19.3k]
  ------------------
  244|  23.1k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|  23.1k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|  23.1k|                                    edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  23.1k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  247|       |
  248|   198k|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 105k, False: 93.0k]
  ------------------
  249|   198k|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 93.0k, False: 0]
  ------------------
  250|       |
  251|  93.0k|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 82.3k, False: 10.6k]
  ------------------
  252|   279k|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 186k, False: 93.0k]
  ------------------
  253|   186k|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 186k, False: 0]
  ------------------
  254|      0|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 0, False: 0]
  |  Branch (254:40): [True: 0, False: 0]
  ------------------
  255|      0|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 0, False: 0]
  ------------------
  256|      0|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|      0|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|      0|                        } else {
  259|      0|                            const int line = sby * (4 << sb128) - 4;
  260|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|      0|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|      0|                        }
  263|      0|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|      0|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 0, False: 0]
  |  Branch (264:48): [True: 0, False: 0]
  ------------------
  265|      0|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|      0|                                                     (bx * 4 >> ss_hor);
  267|      0|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|      0|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 0, False: 0]
  ------------------
  269|      0|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|      0|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|      0|                        } else {
  272|      0|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|      0|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|      0|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|      0|                        }
  276|      0|                    } else {
  277|   186k|                st_uv:;
  278|   186k|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|   186k|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|   186k|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|   186k|                    }
  282|   186k|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|   186k|                                         lr_bak[bit][pl], top, bot,
  284|   186k|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|   186k|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   186k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  286|   186k|                }
  287|       |
  288|   198k|            skip_uv:
  289|   198k|                bit ^= 1;
  290|       |
  291|   213k|            next_b:
  292|   213k|                bptrs[0] += 8;
  293|   213k|                bptrs[1] += 8 >> ss_hor;
  294|   213k|                bptrs[2] += 8 >> ss_hor;
  295|   213k|            }
  296|       |
  297|   650k|        next_sb:
  298|   650k|            iptrs[0] += sbsz * 4;
  299|   650k|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|   650k|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|   650k|        }
  302|       |
  303|   263k|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  304|   263k|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  305|   263k|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  306|   263k|        tc->top_pre_cdef_toggle ^= 1;
  307|   263k|    }
  308|  49.6k|}

dav1d_cdef_dsp_init_8bpc:
  320|  3.47k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  3.47k|    c->dir = cdef_find_dir_c;
  322|  3.47k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  3.47k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  3.47k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  3.47k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  3.47k|#endif
  339|  3.47k|}
dav1d_cdef_dsp_init_16bpc:
  320|  4.68k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  4.68k|    c->dir = cdef_find_dir_c;
  322|  4.68k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  4.68k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  4.68k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  4.68k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  4.68k|#endif
  339|  4.68k|}

dav1d_cdf_thread_update:
 3918|  10.9k|{
 3919|  10.9k|#define update_cdf_1d(n1d, name) \
 3920|  10.9k|    do { \
 3921|  10.9k|        dst->name[n1d] = 0; \
 3922|  10.9k|    } while (0)
 3923|  10.9k|#define update_cdf_2d(n1d, n2d, name) \
 3924|  10.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
 3925|  10.9k|#define update_cdf_3d(n1d, n2d, n3d, name) \
 3926|  10.9k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
 3927|  10.9k|#define update_cdf_4d(n1d, n2d, n3d, n4d, name) \
 3928|  10.9k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
 3929|       |
 3930|  10.9k|    memcpy(dst, src, offsetof(CdfContext, m.intrabc));
 3931|       |
 3932|  10.9k|    update_cdf_3d(2, 2, 4, coef.eob_bin_16);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 43.8k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3933|  10.9k|    update_cdf_3d(2, 2, 5, coef.eob_bin_32);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 43.8k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3934|  10.9k|    update_cdf_3d(2, 2, 6, coef.eob_bin_64);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 43.8k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3935|  10.9k|    update_cdf_3d(2, 2, 7, coef.eob_bin_128);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 43.8k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3936|  10.9k|    update_cdf_3d(2, 2, 8, coef.eob_bin_256);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 43.8k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3937|  10.9k|    update_cdf_2d(2, 9, coef.eob_bin_512);
  ------------------
  |  | 3924|  32.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  21.9k|    do { \
  |  |  |  | 3921|  21.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  21.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3938|  10.9k|    update_cdf_2d(2, 10, coef.eob_bin_1024);
  ------------------
  |  | 3924|  32.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  21.9k|    do { \
  |  |  |  | 3921|  21.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  21.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3939|  10.9k|    update_cdf_4d(N_TX_SIZES, 2, 4, 2, coef.eob_base_tok);
  ------------------
  |  | 3928|  65.7k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   164k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   547k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   438k|    do { \
  |  |  |  |  |  |  |  | 3921|   438k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   438k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 438k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 438k, False: 109k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 109k, False: 54.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 54.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3940|  10.9k|    update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 3, coef.base_tok);
  ------------------
  |  | 3928|  65.7k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   164k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  4.60M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  4.49M|    do { \
  |  |  |  |  |  |  |  | 3921|  4.49M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  4.49M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 4.49M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 4.49M, False: 109k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 109k, False: 54.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 54.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3941|  10.9k|    update_cdf_4d(4, 2, 21, 3, coef.br_tok);
  ------------------
  |  | 3928|  54.7k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   131k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  1.92M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  1.84M|    do { \
  |  |  |  |  |  |  |  | 3921|  1.84M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  1.84M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 1.84M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 1.84M, False: 87.6k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 87.6k, False: 43.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 43.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3942|  10.9k|    update_cdf_4d(N_TX_SIZES, 2, 9, 1, coef.eob_hi_bit);
  ------------------
  |  | 3928|  65.7k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   164k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  1.09M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   986k|    do { \
  |  |  |  |  |  |  |  | 3921|   986k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   986k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 986k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 986k, False: 109k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 109k, False: 54.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 54.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3943|  10.9k|    update_cdf_3d(N_TX_SIZES, 13, 1, coef.skip);
  ------------------
  |  | 3926|  65.7k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   766k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   712k|    do { \
  |  |  |  |  |  | 3921|   712k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   712k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 712k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 712k, False: 54.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 54.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3944|  10.9k|    update_cdf_3d(2, 3, 1, coef.dc_sign);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  87.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  65.7k|    do { \
  |  |  |  |  |  | 3921|  65.7k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  65.7k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 65.7k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 65.7k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3945|       |
 3946|  10.9k|    update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - 1 - !k, m.uv_mode);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   306k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   284k|    do { \
  |  |  |  |  |  | 3921|   284k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   284k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 284k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 284k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3947|  10.9k|    update_cdf_2d(4, N_PARTITIONS - 3, m.partition[BL_128X128]);
  ------------------
  |  | 3924|  54.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 43.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3948|  43.8k|    for (int k = BL_64X64; k < BL_8X8; k++)
  ------------------
  |  Branch (3948:28): [True: 32.8k, False: 10.9k]
  ------------------
 3949|  32.8k|        update_cdf_2d(4, N_PARTITIONS - 1, m.partition[k]);
  ------------------
  |  | 3924|   164k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   131k|    do { \
  |  |  |  | 3921|   131k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   131k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 131k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 131k, False: 32.8k]
  |  |  ------------------
  ------------------
 3950|  10.9k|    update_cdf_2d(4, N_SUB8X8_PARTITIONS - 1, m.partition[BL_8X8]);
  ------------------
  |  | 3924|  54.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 43.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3951|  10.9k|    update_cdf_2d(6, 15, m.cfl_alpha);
  ------------------
  |  | 3924|  76.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  65.7k|    do { \
  |  |  |  | 3921|  65.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  65.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 65.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 65.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3952|  10.9k|    update_cdf_2d(2, 15, m.txtp_inter1);
  ------------------
  |  | 3924|  32.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  21.9k|    do { \
  |  |  |  | 3921|  21.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  21.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3953|  10.9k|    update_cdf_1d(11, m.txtp_inter2);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3954|  10.9k|    update_cdf_3d(2, N_INTRA_PRED_MODES, 6, m.txtp_intra1);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   306k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   284k|    do { \
  |  |  |  |  |  | 3921|   284k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   284k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 284k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 284k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3955|  10.9k|    update_cdf_3d(3, N_INTRA_PRED_MODES, 4, m.txtp_intra2);
  ------------------
  |  | 3926|  43.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   460k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   427k|    do { \
  |  |  |  |  |  | 3921|   427k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   427k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 427k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 427k, False: 32.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 32.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3956|  10.9k|    update_cdf_1d(7, m.cfl_sign);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3957|  10.9k|    update_cdf_2d(8, 6, m.angle_delta);
  ------------------
  |  | 3924|  98.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  87.6k|    do { \
  |  |  |  | 3921|  87.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  87.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 87.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 87.6k, False: 10.9k]
  |  |  ------------------
  ------------------
 3958|  10.9k|    update_cdf_1d(4, m.filter_intra);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3959|  10.9k|    update_cdf_2d(3, DAV1D_MAX_SEGMENTS - 1, m.seg_id);
  ------------------
  |  | 3924|  43.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  32.8k|    do { \
  |  |  |  | 3921|  32.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  32.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 32.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 32.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3960|  10.9k|    update_cdf_3d(2, 7, 6, m.pal_sz);
  ------------------
  |  | 3926|  32.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   175k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   153k|    do { \
  |  |  |  |  |  | 3921|   153k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   153k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 153k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 153k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3961|  10.9k|    update_cdf_4d(2, 7, 5, k + 1, m.color_map);
  ------------------
  |  | 3928|  32.8k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   175k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   920k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   766k|    do { \
  |  |  |  |  |  |  |  | 3921|   766k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   766k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 766k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 766k, False: 153k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 153k, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3962|  10.9k|    update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 1, 2), m.txsz);
  ------------------
  |  | 3926|  54.7k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   175k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   131k|    do { \
  |  |  |  |  |  | 3921|   131k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   131k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 131k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 131k, False: 43.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 43.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3963|  10.9k|    update_cdf_1d(3, m.delta_q);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3964|  10.9k|    update_cdf_2d(5, 3, m.delta_lf);
  ------------------
  |  | 3924|  65.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  54.7k|    do { \
  |  |  |  | 3921|  54.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  54.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 54.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 54.7k, False: 10.9k]
  |  |  ------------------
  ------------------
 3965|  10.9k|    update_cdf_1d(2, m.restore_switchable);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3966|  10.9k|    update_cdf_1d(1, m.restore_wiener);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3967|  10.9k|    update_cdf_1d(1, m.restore_sgrproj);
  ------------------
  |  | 3920|  10.9k|    do { \
  |  | 3921|  10.9k|        dst->name[n1d] = 0; \
  |  | 3922|  10.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 10.9k]
  |  |  ------------------
  ------------------
 3968|  10.9k|    update_cdf_2d(4, 1, m.txtp_inter3);
  ------------------
  |  | 3924|  54.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  43.8k|    do { \
  |  |  |  | 3921|  43.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  43.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 43.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 43.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3969|  10.9k|    update_cdf_2d(N_BS_SIZES, 1, m.use_filter_intra);
  ------------------
  |  | 3924|   252k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   241k|    do { \
  |  |  |  | 3921|   241k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   241k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 241k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 241k, False: 10.9k]
  |  |  ------------------
  ------------------
 3970|  10.9k|    update_cdf_3d(7, 3, 1, m.txpart);
  ------------------
  |  | 3926|  87.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   306k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   230k|    do { \
  |  |  |  |  |  | 3921|   230k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   230k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 230k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 230k, False: 76.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 76.6k, False: 10.9k]
  |  |  ------------------
  ------------------
 3971|  10.9k|    update_cdf_2d(3, 1, m.skip);
  ------------------
  |  | 3924|  43.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  32.8k|    do { \
  |  |  |  | 3921|  32.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  32.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 32.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 32.8k, False: 10.9k]
  |  |  ------------------
  ------------------
 3972|  10.9k|    update_cdf_3d(7, 3, 1, m.pal_y);
  ------------------
  |  | 3926|  87.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   306k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   230k|    do { \
  |  |  |  |  |  | 3921|   230k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   230k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 230k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 230k, False: 76.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 76.6k, False: 10.9k]
  |  |  ------------------
  ------------------
 3973|  10.9k|    update_cdf_2d(2, 1, m.pal_uv);
  ------------------
  |  | 3924|  32.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  21.9k|    do { \
  |  |  |  | 3921|  21.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  21.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 21.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 21.9k, False: 10.9k]
  |  |  ------------------
  ------------------
 3974|       |
 3975|  10.9k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|  10.9k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  10.9k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 4.99k, False: 5.96k]
  |  |  ------------------
  ------------------
 3976|  4.99k|        return;
 3977|       |
 3978|  5.96k|    memcpy(dst->m.y_mode, src->m.y_mode,
 3979|  5.96k|           offsetof(CdfContext, kfym) - offsetof(CdfContext, m.y_mode));
 3980|       |
 3981|  5.96k|    update_cdf_2d(4, N_INTRA_PRED_MODES - 1, m.y_mode);
  ------------------
  |  | 3924|  29.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.8k|    do { \
  |  |  |  | 3921|  23.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3982|  5.96k|    update_cdf_2d(9, 15, m.wedge_idx);
  ------------------
  |  | 3924|  59.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  59.6k|    do { \
  |  |  |  | 3921|  53.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  53.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 53.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 53.6k, False: 5.96k]
  |  |  ------------------
  ------------------
 3983|  5.96k|    update_cdf_2d(8, N_COMP_INTER_PRED_MODES - 1, m.comp_inter_mode);
  ------------------
  |  | 3924|  53.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  53.6k|    do { \
  |  |  |  | 3921|  47.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  47.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 47.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 47.6k, False: 5.96k]
  |  |  ------------------
  ------------------
 3984|  5.96k|    update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS - 1, m.filter);
  ------------------
  |  | 3926|  17.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   107k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   101k|    do { \
  |  |  |  |  |  | 3921|  95.3k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  95.3k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 95.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 95.3k, False: 11.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 11.9k, False: 5.96k]
  |  |  ------------------
  ------------------
 3985|  5.96k|    update_cdf_2d(4, 3, m.interintra_mode);
  ------------------
  |  | 3924|  29.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.8k|    do { \
  |  |  |  | 3921|  23.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3986|  5.96k|    update_cdf_2d(N_BS_SIZES, 2, m.motion_mode);
  ------------------
  |  | 3924|   137k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   137k|    do { \
  |  |  |  | 3921|   131k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   131k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 131k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 131k, False: 5.96k]
  |  |  ------------------
  ------------------
 3987|  5.96k|    update_cdf_2d(3, 1, m.skip_mode);
  ------------------
  |  | 3924|  23.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.8k|    do { \
  |  |  |  | 3921|  17.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3988|  5.96k|    update_cdf_2d(6, 1, m.newmv_mode);
  ------------------
  |  | 3924|  41.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  41.7k|    do { \
  |  |  |  | 3921|  35.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  35.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 35.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 35.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 3989|  5.96k|    update_cdf_2d(2, 1, m.globalmv_mode);
  ------------------
  |  | 3924|  17.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.8k|    do { \
  |  |  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  11.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 11.9k, False: 5.96k]
  |  |  ------------------
  ------------------
 3990|  5.96k|    update_cdf_2d(6, 1, m.refmv_mode);
  ------------------
  |  | 3924|  41.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  41.7k|    do { \
  |  |  |  | 3921|  35.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  35.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 35.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 35.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 3991|  5.96k|    update_cdf_2d(3, 1, m.drl_bit);
  ------------------
  |  | 3924|  23.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.8k|    do { \
  |  |  |  | 3921|  17.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3992|  5.96k|    update_cdf_2d(4, 1, m.intra);
  ------------------
  |  | 3924|  29.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.8k|    do { \
  |  |  |  | 3921|  23.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3993|  5.96k|    update_cdf_2d(5, 1, m.comp);
  ------------------
  |  | 3924|  35.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  35.7k|    do { \
  |  |  |  | 3921|  29.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3994|  5.96k|    update_cdf_2d(5, 1, m.comp_dir);
  ------------------
  |  | 3924|  35.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  35.7k|    do { \
  |  |  |  | 3921|  29.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 3995|  5.96k|    update_cdf_2d(6, 1, m.jnt_comp);
  ------------------
  |  | 3924|  41.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  41.7k|    do { \
  |  |  |  | 3921|  35.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  35.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 35.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 35.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 3996|  5.96k|    update_cdf_2d(6, 1, m.mask_comp);
  ------------------
  |  | 3924|  41.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  41.7k|    do { \
  |  |  |  | 3921|  35.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  35.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 35.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 35.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 3997|  5.96k|    update_cdf_2d(9, 1, m.wedge_comp);
  ------------------
  |  | 3924|  59.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  59.6k|    do { \
  |  |  |  | 3921|  53.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  53.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 53.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 53.6k, False: 5.96k]
  |  |  ------------------
  ------------------
 3998|  5.96k|    update_cdf_3d(6, 3, 1, m.ref);
  ------------------
  |  | 3926|  41.7k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   143k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   113k|    do { \
  |  |  |  |  |  | 3921|   107k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   107k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 107k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 107k, False: 35.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 35.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 3999|  5.96k|    update_cdf_3d(3, 3, 1, m.comp_fwd_ref);
  ------------------
  |  | 3926|  23.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  71.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  59.6k|    do { \
  |  |  |  |  |  | 3921|  53.6k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  53.6k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 53.6k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 53.6k, False: 17.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 4000|  5.96k|    update_cdf_3d(2, 3, 1, m.comp_bwd_ref);
  ------------------
  |  | 3926|  17.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  47.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  41.7k|    do { \
  |  |  |  |  |  | 3921|  35.7k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  35.7k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 35.7k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 35.7k, False: 11.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 11.9k, False: 5.96k]
  |  |  ------------------
  ------------------
 4001|  5.96k|    update_cdf_3d(3, 3, 1, m.comp_uni_ref);
  ------------------
  |  | 3926|  23.8k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  71.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  59.6k|    do { \
  |  |  |  |  |  | 3921|  53.6k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  53.6k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 53.6k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 53.6k, False: 17.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 4002|  5.96k|    update_cdf_2d(3, 1, m.seg_pred);
  ------------------
  |  | 3924|  23.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.8k|    do { \
  |  |  |  | 3921|  17.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 4003|  5.96k|    update_cdf_2d(4, 1, m.interintra);
  ------------------
  |  | 3924|  29.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.8k|    do { \
  |  |  |  | 3921|  23.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.8k, False: 5.96k]
  |  |  ------------------
  ------------------
 4004|  5.96k|    update_cdf_2d(7, 1, m.interintra_wedge);
  ------------------
  |  | 3924|  47.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  47.6k|    do { \
  |  |  |  | 3921|  41.7k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  41.7k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 41.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 41.7k, False: 5.96k]
  |  |  ------------------
  ------------------
 4005|  5.96k|    update_cdf_2d(N_BS_SIZES, 1, m.obmc);
  ------------------
  |  | 3924|   137k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   137k|    do { \
  |  |  |  | 3921|   131k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   131k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 131k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 131k, False: 5.96k]
  |  |  ------------------
  ------------------
 4006|       |
 4007|  17.8k|    for (int k = 0; k < 2; k++) {
  ------------------
  |  Branch (4007:21): [True: 11.9k, False: 5.96k]
  ------------------
 4008|  11.9k|        update_cdf_1d(10, mv.comp[k].classes);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4009|  11.9k|        update_cdf_1d(1, mv.comp[k].sign);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4010|  11.9k|        update_cdf_1d(1, mv.comp[k].class0);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4011|  11.9k|        update_cdf_2d(2, 3, mv.comp[k].class0_fp);
  ------------------
  |  | 3924|  35.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  23.8k|    do { \
  |  |  |  | 3921|  23.8k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  23.8k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 23.8k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 23.8k, False: 11.9k]
  |  |  ------------------
  ------------------
 4012|  11.9k|        update_cdf_1d(1, mv.comp[k].class0_hp);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4013|  11.9k|        update_cdf_2d(10, 1, mv.comp[k].classN);
  ------------------
  |  | 3924|   131k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   119k|    do { \
  |  |  |  | 3921|   119k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   119k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 119k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 119k, False: 11.9k]
  |  |  ------------------
  ------------------
 4014|  11.9k|        update_cdf_1d(3, mv.comp[k].classN_fp);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4015|  11.9k|        update_cdf_1d(1, mv.comp[k].classN_hp);
  ------------------
  |  | 3920|  11.9k|    do { \
  |  | 3921|  11.9k|        dst->name[n1d] = 0; \
  |  | 3922|  11.9k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 11.9k]
  |  |  ------------------
  ------------------
 4016|  11.9k|    }
 4017|  5.96k|    update_cdf_1d(N_MV_JOINTS - 1, mv.joint);
  ------------------
  |  | 3920|  5.96k|    do { \
  |  | 3921|  5.96k|        dst->name[n1d] = 0; \
  |  | 3922|  5.96k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 5.96k]
  |  |  ------------------
  ------------------
 4018|  5.96k|}
dav1d_cdf_thread_init_static:
 4023|  30.4k|void dav1d_cdf_thread_init_static(CdfThreadContext *const cdf, const unsigned qidx) {
 4024|       |    cdf->ref = NULL;
 4025|  30.4k|    cdf->data.qcat = (qidx > 20) + (qidx > 60) + (qidx > 120);
 4026|  30.4k|}
dav1d_cdf_thread_copy:
 4028|  58.9k|void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const src) {
 4029|  58.9k|    if (src->ref) {
  ------------------
  |  Branch (4029:9): [True: 12.3k, False: 46.6k]
  ------------------
 4030|  12.3k|        memcpy(dst, src->data.cdf, sizeof(*dst));
 4031|  46.6k|    } else {
 4032|  46.6k|        dst->coef = default_coef_cdf[src->data.qcat];
 4033|  46.6k|        memcpy(&dst->m, &default_cdf.m,
 4034|  46.6k|               offsetof(CdfDefaultContext, mv.joint));
 4035|  46.6k|        memcpy(&dst->mv.comp[1], &default_cdf.mv.comp,
 4036|       |               sizeof(default_cdf) - offsetof(CdfDefaultContext, mv.comp));
 4037|  46.6k|    }
 4038|  58.9k|}
dav1d_cdf_thread_alloc:
 4042|  13.0k|{
 4043|  13.0k|    cdf->ref = dav1d_ref_create_using_pool(c->cdf_pool,
 4044|  13.0k|                                           sizeof(CdfContext) + sizeof(atomic_uint));
 4045|  13.0k|    if (!cdf->ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (4045:9): [True: 0, False: 13.0k]
  ------------------
 4046|  13.0k|    cdf->data.cdf = cdf->ref->data;
 4047|  13.0k|    if (have_frame_mt) {
  ------------------
  |  Branch (4047:9): [True: 0, False: 13.0k]
  ------------------
 4048|      0|        cdf->progress = (atomic_uint *) &cdf->data.cdf[1];
 4049|       |        atomic_init(cdf->progress, 0);
 4050|      0|    }
 4051|  13.0k|    return 0;
 4052|  13.0k|}
dav1d_cdf_thread_ref:
 4056|   311k|{
 4057|   311k|    *dst = *src;
 4058|   311k|    if (src->ref)
  ------------------
  |  Branch (4058:9): [True: 80.7k, False: 230k]
  ------------------
 4059|  80.7k|        dav1d_ref_inc(src->ref);
 4060|   311k|}
dav1d_cdf_thread_unref:
 4062|   735k|void dav1d_cdf_thread_unref(CdfThreadContext *const cdf) {
 4063|       |    memset(&cdf->data, 0, sizeof(*cdf) - offsetof(CdfThreadContext, data));
 4064|   735k|    dav1d_ref_dec(&cdf->ref);
 4065|   735k|}

dav1d_init_cpu:
   63|      1|COLD void dav1d_init_cpu(void) {
   64|      1|#if HAVE_ASM && !__has_feature(memory_sanitizer)
   65|       |// memory sanitizer is inherently incompatible with asm
   66|       |#if ARCH_AARCH64 || ARCH_ARM
   67|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_arm();
   68|       |#elif ARCH_LOONGARCH
   69|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_loongarch();
   70|       |#elif ARCH_PPC64LE
   71|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_ppc();
   72|       |#elif ARCH_RISCV
   73|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_riscv();
   74|       |#elif ARCH_X86
   75|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_x86();
   76|      1|#endif
   77|      1|#endif
   78|      1|}

cpu.c:dav1d_get_default_cpu_flags:
   58|      1|static ALWAYS_INLINE unsigned dav1d_get_default_cpu_flags(void) {
   59|      1|    unsigned flags = 0;
   60|       |
   61|       |#if ARCH_AARCH64 || ARCH_ARM
   62|       |#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64
   63|       |    flags |= DAV1D_ARM_CPU_FLAG_NEON;
   64|       |#endif
   65|       |#ifdef __ARM_FEATURE_DOTPROD
   66|       |    flags |= DAV1D_ARM_CPU_FLAG_DOTPROD;
   67|       |#endif
   68|       |#ifdef __ARM_FEATURE_MATMUL_INT8
   69|       |    flags |= DAV1D_ARM_CPU_FLAG_I8MM;
   70|       |#endif
   71|       |#if ARCH_AARCH64
   72|       |#ifdef __ARM_FEATURE_SVE
   73|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE;
   74|       |#endif
   75|       |#ifdef __ARM_FEATURE_SVE2
   76|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE2;
   77|       |#endif
   78|       |#endif /* ARCH_AARCH64 */
   79|       |#elif ARCH_PPC64LE
   80|       |#if defined(__VSX__)
   81|       |    flags |= DAV1D_PPC_CPU_FLAG_VSX;
   82|       |#endif
   83|       |#if defined(__POWER9_VECTOR__)
   84|       |    flags |= DAV1D_PPC_CPU_FLAG_PWR9;
   85|       |#endif
   86|       |#elif ARCH_RISCV
   87|       |#if defined(__riscv_v)
   88|       |    flags |= DAV1D_RISCV_CPU_FLAG_V;
   89|       |#endif
   90|       |#elif ARCH_X86
   91|       |#if defined(__AVX512F__) && defined(__AVX512CD__) && \
   92|       |    defined(__AVX512BW__) && defined(__AVX512DQ__) && \
   93|       |    defined(__AVX512VL__) && defined(__AVX512VNNI__) && \
   94|       |    defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \
   95|       |    defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
   96|       |    defined(__AVX512BITALG__) && defined(__GFNI__) && \
   97|       |    defined(__VAES__) && defined(__VPCLMULQDQ__)
   98|       |    flags |= DAV1D_X86_CPU_FLAG_AVX512ICL |
   99|       |             DAV1D_X86_CPU_FLAG_AVX2 |
  100|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  101|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  102|       |             DAV1D_X86_CPU_FLAG_SSE2;
  103|       |#elif defined(__AVX2__)
  104|       |    flags |= DAV1D_X86_CPU_FLAG_AVX2 |
  105|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  106|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  107|       |             DAV1D_X86_CPU_FLAG_SSE2;
  108|       |#elif defined(__SSE4_1__) || defined(__AVX__)
  109|       |    flags |= DAV1D_X86_CPU_FLAG_SSE41 |
  110|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  111|       |             DAV1D_X86_CPU_FLAG_SSE2;
  112|       |#elif defined(__SSSE3__)
  113|       |    flags |= DAV1D_X86_CPU_FLAG_SSSE3 |
  114|       |             DAV1D_X86_CPU_FLAG_SSE2;
  115|       |#elif ARCH_X86_64 || defined(__SSE2__) || \
  116|       |      (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
  117|       |    flags |= DAV1D_X86_CPU_FLAG_SSE2;
  118|      1|#endif
  119|      1|#endif
  120|       |
  121|      1|    return flags;
  122|      1|}
pal.c:dav1d_get_cpu_flags:
  124|  9.69k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.69k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.69k|    return flags;
  134|  9.69k|}
refmvs.c:dav1d_get_cpu_flags:
  124|  9.69k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.69k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.69k|    return flags;
  134|  9.69k|}
msac.c:dav1d_get_cpu_flags:
  124|  45.9k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  45.9k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  45.9k|    return flags;
  134|  45.9k|}
cdef_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
filmgrain_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
ipred_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
itx_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
loopfilter_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
looprestoration_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}
mc_tmpl.c:dav1d_get_cpu_flags:
  124|  8.15k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  8.15k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  8.15k|    return flags;
  134|  8.15k|}

ctx.c:memset_w1:
   34|  18.8M|static void memset_w1(void *const ptr, const int value) {
   35|  18.8M|    set_ctx1((uint8_t *) ptr, 0, value);
  ------------------
  |  |   56|  18.8M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  ------------------
   36|  18.8M|}
ctx.c:memset_w2:
   38|  8.30M|static void memset_w2(void *const ptr, const int value) {
   39|  8.30M|    set_ctx2((uint8_t *) ptr, 0, value);
  ------------------
  |  |   58|  8.30M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  ------------------
   40|  8.30M|}
ctx.c:memset_w4:
   42|  7.43M|static void memset_w4(void *const ptr, const int value) {
   43|  7.43M|    set_ctx4((uint8_t *) ptr, 0, value);
  ------------------
  |  |   60|  7.43M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  ------------------
   44|  7.43M|}
ctx.c:memset_w8:
   46|  5.27M|static void memset_w8(void *const ptr, const int value) {
   47|  5.27M|    set_ctx8((uint8_t *) ptr, 0, value);
  ------------------
  |  |   62|  5.27M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  ------------------
   48|  5.27M|}
ctx.c:memset_w16:
   50|  2.39M|static void memset_w16(void *const ptr, const int value) {
   51|  2.39M|    set_ctx16((uint8_t *) ptr, 0, value);
  ------------------
  |  |   63|  2.39M|#define set_ctx16(var, off, val) do { \
  |  |   64|  2.39M|        memset(&(var)[off], val, 16); \
  |  |   65|  2.39M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (65:14): [Folded, False: 2.39M]
  |  |  ------------------
  ------------------
   52|  2.39M|}
ctx.c:memset_w32:
   54|   219k|static void memset_w32(void *const ptr, const int value) {
   55|   219k|    set_ctx32((uint8_t *) ptr, 0, value);
  ------------------
  |  |   66|   219k|#define set_ctx32(var, off, val) do { \
  |  |   67|   219k|        memset(&(var)[off], val, 32); \
  |  |   68|   219k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (68:14): [Folded, False: 219k]
  |  |  ------------------
  ------------------
   56|   219k|}

lf_mask.c:dav1d_memset_likely_pow2:
   44|  2.10M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  2.10M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 2.10M, False: 0]
  |  Branch (45:5): [True: 2.10M, False: 0]
  ------------------
   46|  2.10M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 1.95M, False: 148k]
  ------------------
   47|  1.95M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  1.95M|    } else {
   49|   148k|        memset(ptr, value, n);
   50|   148k|    }
   51|  2.10M|}
recon_tmpl.c:dav1d_memset_likely_pow2:
   44|  15.1M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  15.1M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 15.1M, False: 0]
  |  Branch (45:5): [True: 15.1M, False: 0]
  ------------------
   46|  15.1M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 14.9M, False: 153k]
  ------------------
   47|  14.9M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  14.9M|    } else {
   49|   153k|        memset(ptr, value, n);
   50|   153k|    }
   51|  15.1M|}

dav1d_data_create_internal:
   43|  73.9k|uint8_t *dav1d_data_create_internal(Dav1dData *const buf, const size_t sz) {
   44|  73.9k|    validate_input_or_ret(buf != NULL, NULL);
  ------------------
  |  |   52|  73.9k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 73.9k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
   45|       |
   46|  73.9k|    if (sz > SIZE_MAX / 2) return NULL;
  ------------------
  |  Branch (46:9): [True: 0, False: 73.9k]
  ------------------
   47|  73.9k|    buf->ref = dav1d_ref_create(ALLOC_DAV1DDATA, sz);
  ------------------
  |  |   49|  73.9k|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
   48|  73.9k|    if (!buf->ref) return NULL;
  ------------------
  |  Branch (48:9): [True: 0, False: 73.9k]
  ------------------
   49|  73.9k|    buf->data = buf->ref->const_data;
   50|  73.9k|    buf->sz = sz;
   51|  73.9k|    dav1d_data_props_set_defaults(&buf->m);
   52|  73.9k|    buf->m.size = sz;
   53|       |
   54|  73.9k|    return buf->ref->data;
   55|  73.9k|}
dav1d_data_ref:
   98|   120k|void dav1d_data_ref(Dav1dData *const dst, const Dav1dData *const src) {
   99|   120k|    assert(dst != NULL);
  ------------------
  |  Branch (99:5): [True: 120k, False: 0]
  ------------------
  100|   120k|    assert(dst->data == NULL);
  ------------------
  |  Branch (100:5): [True: 120k, False: 0]
  ------------------
  101|   120k|    assert(src != NULL);
  ------------------
  |  Branch (101:5): [True: 120k, False: 0]
  ------------------
  102|       |
  103|   120k|    if (src->ref) {
  ------------------
  |  Branch (103:9): [True: 120k, False: 0]
  ------------------
  104|   120k|        assert(src->data != NULL);
  ------------------
  |  Branch (104:9): [True: 120k, False: 0]
  ------------------
  105|   120k|        dav1d_ref_inc(src->ref);
  106|   120k|    }
  107|   120k|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (107:9): [True: 0, False: 120k]
  ------------------
  108|   120k|    *dst = *src;
  109|   120k|}
dav1d_data_props_copy:
  113|   107k|{
  114|   107k|    assert(dst != NULL);
  ------------------
  |  Branch (114:5): [True: 107k, False: 0]
  ------------------
  115|   107k|    assert(src != NULL);
  ------------------
  |  Branch (115:5): [True: 107k, False: 0]
  ------------------
  116|       |
  117|   107k|    dav1d_ref_dec(&dst->user_data.ref);
  118|   107k|    *dst = *src;
  119|   107k|    if (dst->user_data.ref) dav1d_ref_inc(dst->user_data.ref);
  ------------------
  |  Branch (119:9): [True: 0, False: 107k]
  ------------------
  120|   107k|}
dav1d_data_props_set_defaults:
  122|   968k|void dav1d_data_props_set_defaults(Dav1dDataProps *const props) {
  123|   968k|    assert(props != NULL);
  ------------------
  |  Branch (123:5): [True: 968k, False: 0]
  ------------------
  124|       |
  125|   968k|    memset(props, 0, sizeof(*props));
  126|       |    props->timestamp = INT64_MIN;
  127|   968k|    props->offset = -1;
  128|   968k|}
dav1d_data_props_unref_internal:
  130|  9.69k|void dav1d_data_props_unref_internal(Dav1dDataProps *const props) {
  131|  9.69k|    validate_input(props != NULL);
  ------------------
  |  |   59|  9.69k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  9.69k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  132|       |
  133|  9.69k|    struct Dav1dRef *user_data_ref = props->user_data.ref;
  134|  9.69k|    dav1d_data_props_set_defaults(props);
  135|  9.69k|    dav1d_ref_dec(&user_data_ref);
  136|  9.69k|}
dav1d_data_unref_internal:
  138|   203k|void dav1d_data_unref_internal(Dav1dData *const buf) {
  139|   203k|    validate_input(buf != NULL);
  ------------------
  |  |   59|   203k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   203k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 203k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  140|       |
  141|   203k|    struct Dav1dRef *user_data_ref = buf->m.user_data.ref;
  142|   203k|    if (buf->ref) {
  ------------------
  |  Branch (142:9): [True: 194k, False: 9.69k]
  ------------------
  143|   194k|        validate_input(buf->data != NULL);
  ------------------
  |  |   59|   194k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   194k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 194k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  144|   194k|        dav1d_ref_dec(&buf->ref);
  145|   194k|    }
  146|   203k|    memset(buf, 0, sizeof(*buf));
  147|   203k|    dav1d_data_props_set_defaults(&buf->m);
  148|   203k|    dav1d_ref_dec(&user_data_ref);
  149|   203k|}

dav1d_decode_tile_sbrow:
 2594|   158k|int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
 2595|   158k|    const Dav1dFrameContext *const f = t->f;
 2596|   158k|    const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
  ------------------
  |  Branch (2596:37): [True: 93.0k, False: 65.8k]
  ------------------
 2597|   158k|    Dav1dTileState *const ts = t->ts;
 2598|   158k|    const Dav1dContext *const c = f->c;
 2599|   158k|    const int sb_step = f->sb_step;
 2600|   158k|    const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
 2601|   158k|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2602|   158k|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2603|       |
 2604|   158k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|   317k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 54.5k, False: 104k]
  |  |  ------------------
  ------------------
  |  Branch (2604:45): [True: 76.0k, False: 28.3k]
  ------------------
 2605|   130k|        dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
 2606|   130k|                                     ts->tiling.col_end, ts->tiling.row_start,
 2607|   130k|                                     ts->tiling.row_end, t->by >> f->sb_shift,
 2608|   130k|                                     ts->tiling.row, t->frame_thread.pass);
 2609|   130k|    }
 2610|       |
 2611|   158k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) {
  ------------------
  |  |   36|   317k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 54.5k, False: 104k]
  |  |  ------------------
  ------------------
  |  Branch (2611:45): [True: 0, False: 54.5k]
  ------------------
 2612|      0|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 2613|      0|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 2614|      0|        for (int n = 0; n < 7; n++)
  ------------------
  |  Branch (2614:25): [True: 0, False: 0]
  ------------------
 2615|      0|            for (int m = 0; m < 2; m++)
  ------------------
  |  Branch (2615:29): [True: 0, False: 0]
  ------------------
 2616|      0|                lowest_px[n][m] = INT_MIN;
 2617|      0|    }
 2618|       |
 2619|   158k|    reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass);
  ------------------
  |  |   43|   158k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   158k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 2620|   158k|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2620:9): [True: 0, False: 158k]
  ------------------
 2621|      0|        const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0;
  ------------------
  |  Branch (2621:31): [True: 0, False: 0]
  ------------------
 2622|      0|        for (t->bx = ts->tiling.col_start,
 2623|      0|             t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w;
 2624|      0|             t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2624:14): [True: 0, False: 0]
  ------------------
 2625|      0|        {
 2626|      0|            if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2626:17): [True: 0, False: 0]
  ------------------
 2627|      0|                return 1;
 2628|      0|            if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2628:17): [True: 0, False: 0]
  ------------------
 2629|      0|                return 1;
 2630|      0|            if (t->bx & 16 || f->seq_hdr->sb128)
  ------------------
  |  Branch (2630:17): [True: 0, False: 0]
  |  Branch (2630:31): [True: 0, False: 0]
  ------------------
 2631|      0|                t->a++;
 2632|      0|        }
 2633|      0|        f->bd_fn.backup_ipred_edge(t);
 2634|      0|        return 0;
 2635|      0|    }
 2636|       |
 2637|   158k|    if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (2637:9): [True: 0, False: 158k]
  |  Branch (2637:27): [True: 0, False: 0]
  ------------------
 2638|      0|        f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row,
 2639|      0|                                   ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2640|      0|                                   t->by >> 1, (t->by + sb_step) >> 1);
 2641|      0|    }
 2642|   158k|    memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
 2643|   158k|    const int sb128y = t->by >> 5;
 2644|   158k|    for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
 2645|   158k|         t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
 2646|   719k|         t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2646:10): [True: 568k, False: 150k]
  ------------------
 2647|   568k|    {
 2648|   568k|        if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2648:13): [True: 0, False: 568k]
  ------------------
 2649|      0|            return 1;
 2650|   568k|        if (root_bl == BL_128X128) {
  ------------------
  |  Branch (2650:13): [True: 221k, False: 347k]
  ------------------
 2651|   221k|            t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
 2652|   221k|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2653|   221k|            t->cur_sb_cdef_idx_ptr[1] = -1;
 2654|   221k|            t->cur_sb_cdef_idx_ptr[2] = -1;
 2655|   221k|            t->cur_sb_cdef_idx_ptr[3] = -1;
 2656|   347k|        } else {
 2657|   347k|            t->cur_sb_cdef_idx_ptr =
 2658|   347k|                &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
 2659|   347k|                                      ((t->by & 16) >> 3)];
 2660|   347k|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2661|   347k|        }
 2662|       |        // Restoration filter
 2663|  2.27M|        for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2663:25): [True: 1.70M, False: 568k]
  ------------------
 2664|  1.70M|            if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2664:17): [True: 1.56M, False: 146k]
  ------------------
 2665|  1.56M|                continue;
 2666|       |
 2667|   146k|            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2667:32): [True: 58.1k, False: 88.2k]
  |  Branch (2667:37): [True: 20.4k, False: 37.7k]
  ------------------
 2668|   146k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2668:32): [True: 58.1k, False: 88.2k]
  |  Branch (2668:37): [True: 22.9k, False: 35.1k]
  ------------------
 2669|   146k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2670|   146k|            const int y = t->by * 4 >> ss_ver;
 2671|   146k|            const int h = (f->cur.p.h + ss_ver) >> ss_ver;
 2672|       |
 2673|   146k|            const int unit_size = 1 << unit_size_log2;
 2674|   146k|            const unsigned mask = unit_size - 1;
 2675|   146k|            if (y & mask) continue;
  ------------------
  |  Branch (2675:17): [True: 36.7k, False: 109k]
  ------------------
 2676|   109k|            const int half_unit = unit_size >> 1;
 2677|       |            // Round half up at frame boundaries, if there's more than one
 2678|       |            // restoration unit
 2679|   109k|            if (y && y + half_unit > h) continue;
  ------------------
  |  Branch (2679:17): [True: 26.8k, False: 82.8k]
  |  Branch (2679:22): [True: 2.49k, False: 24.3k]
  ------------------
 2680|       |
 2681|   107k|            const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
 2682|       |
 2683|   107k|            if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2683:17): [True: 19.4k, False: 87.7k]
  ------------------
 2684|  19.4k|                const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2685|  19.4k|                const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
 2686|       |
 2687|  19.4k|                const int d = f->frame_hdr->super_res.width_scale_denominator;
 2688|  19.4k|                const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
 2689|  19.4k|                const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;
 2690|  19.4k|                const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
 2691|       |
 2692|  43.6k|                for (int x = x0; x < imin(x1, n_units); x++) {
  ------------------
  |  Branch (2692:34): [True: 24.2k, False: 19.4k]
  ------------------
 2693|  24.2k|                    const int px_x = x << (unit_size_log2 + ss_hor);
 2694|  24.2k|                    const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
 2695|  24.2k|                    const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
 2696|  24.2k|                    Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2697|       |
 2698|  24.2k|                    read_restoration_info(t, lr, p, frame_type);
 2699|  24.2k|                }
 2700|  87.7k|            } else {
 2701|  87.7k|                const int x = 4 * t->bx >> ss_hor;
 2702|  87.7k|                if (x & mask) continue;
  ------------------
  |  Branch (2702:21): [True: 11.8k, False: 75.9k]
  ------------------
 2703|  75.9k|                const int w = (f->cur.p.w + ss_hor) >> ss_hor;
 2704|       |                // Round half up at frame boundaries, if there's more than one
 2705|       |                // restoration unit
 2706|  75.9k|                if (x && x + half_unit > w) continue;
  ------------------
  |  Branch (2706:21): [True: 52.1k, False: 23.7k]
  |  Branch (2706:26): [True: 1.75k, False: 50.4k]
  ------------------
 2707|  74.1k|                const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
 2708|  74.1k|                const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
 2709|  74.1k|                Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2710|       |
 2711|  74.1k|                read_restoration_info(t, lr, p, frame_type);
 2712|  74.1k|            }
 2713|   107k|        }
 2714|   568k|        if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2714:13): [True: 8.55k, False: 560k]
  ------------------
 2715|  8.55k|            return 1;
 2716|   560k|        if (t->bx & 16 || f->seq_hdr->sb128) {
  ------------------
  |  Branch (2716:13): [True: 165k, False: 394k]
  |  Branch (2716:27): [True: 215k, False: 179k]
  ------------------
 2717|   381k|            t->a++;
 2718|   381k|            t->lf_mask++;
 2719|   381k|        }
 2720|   560k|    }
 2721|       |
 2722|   150k|    if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (2722:9): [True: 107k, False: 43.3k]
  |  Branch (2722:38): [True: 0, False: 107k]
  ------------------
 2723|      0|        dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
 2724|      0|                               ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2725|      0|                               t->by >> 1, (t->by + sb_step) >> 1);
 2726|      0|    }
 2727|       |
 2728|       |    // backup pre-loopfilter pixels for intra prediction of the next sbrow
 2729|   150k|    if (t->frame_thread.pass != 1)
  ------------------
  |  Branch (2729:9): [True: 150k, False: 0]
  ------------------
 2730|   150k|        f->bd_fn.backup_ipred_edge(t);
 2731|       |
 2732|       |    // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
 2733|       |    // up the initial value in neighbour tiles when running the loopfilter
 2734|   150k|    int align_h = (f->bh + 31) & ~31;
 2735|   150k|    memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
 2736|   150k|           &t->l.tx_lpf_y[t->by & 16], sb_step);
 2737|   150k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2738|   150k|    align_h >>= ss_ver;
 2739|   150k|    memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
 2740|   150k|           &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
 2741|       |
 2742|       |    // error out on symbol decoder overread
 2743|   150k|    if (ts->msac.cnt <= -15) return 1;
  ------------------
  |  Branch (2743:9): [True: 14.1k, False: 136k]
  ------------------
 2744|       |
 2745|   136k|    return c->strict_std_compliance &&
  ------------------
  |  Branch (2745:12): [True: 0, False: 136k]
  ------------------
 2746|      0|           (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] &&
  ------------------
  |  Branch (2746:12): [True: 0, False: 0]
  ------------------
 2747|      0|           check_trailing_bits_after_symbol_coder(&ts->msac);
  ------------------
  |  Branch (2747:12): [True: 0, False: 0]
  ------------------
 2748|   150k|}
dav1d_decode_frame_init:
 2750|  41.2k|int dav1d_decode_frame_init(Dav1dFrameContext *const f) {
 2751|  41.2k|    const Dav1dContext *const c = f->c;
 2752|  41.2k|    int retval = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|  41.2k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 2753|       |
 2754|  41.2k|    if (f->sbh > f->lf.start_of_tile_row_sz) {
  ------------------
  |  Branch (2754:9): [True: 8.49k, False: 32.7k]
  ------------------
 2755|  8.49k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  8.49k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2756|  8.49k|        f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t));
  ------------------
  |  |  132|  8.49k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2757|  8.49k|        if (!f->lf.start_of_tile_row) {
  ------------------
  |  Branch (2757:13): [True: 0, False: 8.49k]
  ------------------
 2758|      0|            f->lf.start_of_tile_row_sz = 0;
 2759|      0|            goto error;
 2760|      0|        }
 2761|  8.49k|        f->lf.start_of_tile_row_sz = f->sbh;
 2762|  8.49k|    }
 2763|  41.2k|    int sby = 0;
 2764|  85.6k|    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2764:28): [True: 44.3k, False: 41.2k]
  ------------------
 2765|  44.3k|        f->lf.start_of_tile_row[sby++] = tile_row;
 2766|   392k|        while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1])
  ------------------
  |  Branch (2766:16): [True: 347k, False: 44.3k]
  ------------------
 2767|   347k|            f->lf.start_of_tile_row[sby++] = 0;
 2768|  44.3k|    }
 2769|       |
 2770|  41.2k|    const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
 2771|  41.2k|    if (n_ts != f->n_ts) {
  ------------------
  |  Branch (2771:9): [True: 8.97k, False: 32.2k]
  ------------------
 2772|  8.97k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (2772:13): [True: 0, False: 8.97k]
  ------------------
 2773|      0|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 2774|      0|            f->frame_thread.tile_start_off =
 2775|      0|                dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts);
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2776|      0|            if (!f->frame_thread.tile_start_off) {
  ------------------
  |  Branch (2776:17): [True: 0, False: 0]
  ------------------
 2777|      0|                f->n_ts = 0;
 2778|      0|                goto error;
 2779|      0|            }
 2780|      0|        }
 2781|  8.97k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  8.97k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2782|  8.97k|        f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32);
  ------------------
  |  |  134|  8.97k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2783|  8.97k|        if (!f->ts) goto error;
  ------------------
  |  Branch (2783:13): [True: 0, False: 8.97k]
  ------------------
 2784|  8.97k|        f->n_ts = n_ts;
 2785|  8.97k|    }
 2786|       |
 2787|  41.2k|    const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1));
  ------------------
  |  Branch (2787:68): [True: 0, False: 41.2k]
  |  Branch (2787:83): [True: 0, False: 0]
  ------------------
 2788|  41.2k|    if (a_sz != f->a_sz) {
  ------------------
  |  Branch (2788:9): [True: 9.75k, False: 31.4k]
  ------------------
 2789|  9.75k|        dav1d_free(f->a);
  ------------------
  |  |  135|  9.75k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2790|  9.75k|        f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz);
  ------------------
  |  |  132|  9.75k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2791|  9.75k|        if (!f->a) {
  ------------------
  |  Branch (2791:13): [True: 0, False: 9.75k]
  ------------------
 2792|      0|            f->a_sz = 0;
 2793|      0|            goto error;
 2794|      0|        }
 2795|  9.75k|        f->a_sz = a_sz;
 2796|  9.75k|    }
 2797|       |
 2798|  41.2k|    const int num_sb128 = f->sb128w * f->sb128h;
 2799|  41.2k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2800|  41.2k|    const int hbd = !!f->seq_hdr->hbd;
 2801|  41.2k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (2801:9): [True: 0, False: 41.2k]
  ------------------
 2802|      0|        const unsigned sb_step4 = f->sb_step * 4;
 2803|      0|        int tile_idx = 0;
 2804|      0|        for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2804:32): [True: 0, False: 0]
  ------------------
 2805|      0|            const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
 2806|      0|                                     sb_step4 * f->sb128w * 128;
 2807|      0|            const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2808|      0|                                     f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4;
 2809|      0|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2809:36): [True: 0, False: 0]
  ------------------
 2810|      0|                f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
 2811|      0|                    f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4;
 2812|      0|            }
 2813|      0|        }
 2814|       |
 2815|      0|        const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh;
 2816|      0|        if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) {
  ------------------
  |  Branch (2816:13): [True: 0, False: 0]
  ------------------
 2817|      0|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 2818|      0|            f->tile_thread.lowest_pixel_mem =
 2819|      0|                dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz *
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2820|      0|                             sizeof(*f->tile_thread.lowest_pixel_mem));
 2821|      0|            if (!f->tile_thread.lowest_pixel_mem) {
  ------------------
  |  Branch (2821:17): [True: 0, False: 0]
  ------------------
 2822|      0|                f->tile_thread.lowest_pixel_mem_sz = 0;
 2823|      0|                goto error;
 2824|      0|            }
 2825|      0|            f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz;
 2826|      0|        }
 2827|      0|        int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem;
 2828|      0|        for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows;
  ------------------
  |  Branch (2828:51): [True: 0, False: 0]
  ------------------
 2829|      0|             tile_row++, tile_row_base += f->frame_hdr->tiling.cols)
 2830|      0|        {
 2831|      0|            const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2832|      0|                                      f->frame_hdr->tiling.row_start_sb[tile_row];
 2833|      0|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2833:36): [True: 0, False: 0]
  ------------------
 2834|      0|                f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr;
 2835|      0|                lowest_pixel_ptr += tile_row_sb_h;
 2836|      0|            }
 2837|      0|        }
 2838|       |
 2839|      0|        const int cbi_sz = num_sb128 * size_mul[0];
 2840|      0|        if (cbi_sz != f->frame_thread.cbi_sz) {
  ------------------
  |  Branch (2840:13): [True: 0, False: 0]
  ------------------
 2841|      0|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2842|      0|            f->frame_thread.cbi =
 2843|      0|                dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2844|      0|                                    cbi_sz * 32 * 32 / 4, 64);
 2845|      0|            if (!f->frame_thread.cbi) {
  ------------------
  |  Branch (2845:17): [True: 0, False: 0]
  ------------------
 2846|      0|                f->frame_thread.cbi_sz = 0;
 2847|      0|                goto error;
 2848|      0|            }
 2849|      0|            f->frame_thread.cbi_sz = cbi_sz;
 2850|      0|        }
 2851|       |
 2852|      0|        const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
 2853|      0|        if (cf_sz != f->frame_thread.cf_sz) {
  ------------------
  |  Branch (2853:13): [True: 0, False: 0]
  ------------------
 2854|      0|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2855|      0|            f->frame_thread.cf =
 2856|      0|                dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64);
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2857|      0|            if (!f->frame_thread.cf) {
  ------------------
  |  Branch (2857:17): [True: 0, False: 0]
  ------------------
 2858|      0|                f->frame_thread.cf_sz = 0;
 2859|      0|                goto error;
 2860|      0|            }
 2861|      0|            memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
 2862|      0|            f->frame_thread.cf_sz = cf_sz;
 2863|      0|        }
 2864|       |
 2865|      0|        if (f->frame_hdr->allow_screen_content_tools) {
  ------------------
  |  Branch (2865:13): [True: 0, False: 0]
  ------------------
 2866|      0|            const int pal_sz = num_sb128 << hbd;
 2867|      0|            if (pal_sz != f->frame_thread.pal_sz) {
  ------------------
  |  Branch (2867:17): [True: 0, False: 0]
  ------------------
 2868|      0|                dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2869|      0|                f->frame_thread.pal =
 2870|      0|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2871|      0|                                        pal_sz * 16 * 16, 64);
 2872|      0|                if (!f->frame_thread.pal) {
  ------------------
  |  Branch (2872:21): [True: 0, False: 0]
  ------------------
 2873|      0|                    f->frame_thread.pal_sz = 0;
 2874|      0|                    goto error;
 2875|      0|                }
 2876|      0|                f->frame_thread.pal_sz = pal_sz;
 2877|      0|            }
 2878|       |
 2879|      0|            const int pal_idx_sz = num_sb128 * size_mul[1];
 2880|      0|            if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
  ------------------
  |  Branch (2880:17): [True: 0, False: 0]
  ------------------
 2881|      0|                dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2882|      0|                f->frame_thread.pal_idx =
 2883|      0|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) *
  ------------------
  |  |  134|      0|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2884|      0|                                        pal_idx_sz * 128 * 128 / 8, 64);
 2885|      0|                if (!f->frame_thread.pal_idx) {
  ------------------
  |  Branch (2885:21): [True: 0, False: 0]
  ------------------
 2886|      0|                    f->frame_thread.pal_idx_sz = 0;
 2887|      0|                    goto error;
 2888|      0|                }
 2889|      0|                f->frame_thread.pal_idx_sz = pal_idx_sz;
 2890|      0|            }
 2891|      0|        } else if (f->frame_thread.pal) {
  ------------------
  |  Branch (2891:20): [True: 0, False: 0]
  ------------------
 2892|      0|            dav1d_freep_aligned(&f->frame_thread.pal);
 2893|      0|            dav1d_freep_aligned(&f->frame_thread.pal_idx);
 2894|      0|            f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
 2895|      0|        }
 2896|      0|    }
 2897|       |
 2898|       |    // update allocation of block contexts for above
 2899|  41.2k|    ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
 2900|  41.2k|    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
 2901|  41.2k|    const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize;
  ------------------
  |  Branch (2901:36): [True: 0, False: 41.2k]
  |  Branch (2901:51): [True: 0, False: 0]
  ------------------
 2902|  41.2k|    if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] ||
  ------------------
  |  Branch (2902:9): [True: 9.45k, False: 31.7k]
  ------------------
 2903|  31.7k|        uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] ||
  ------------------
  |  Branch (2903:9): [True: 227, False: 31.5k]
  ------------------
 2904|  31.5k|        need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy ||
  ------------------
  |  Branch (2904:9): [True: 0, False: 31.5k]
  ------------------
 2905|  31.5k|        f->sbh != f->lf.cdef_buf_sbh)
  ------------------
  |  Branch (2905:9): [True: 321, False: 31.2k]
  ------------------
 2906|  10.0k|    {
 2907|  10.0k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  10.0k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2908|  10.0k|        size_t alloc_sz = 64;
 2909|  10.0k|        alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy;
 2910|  10.0k|        alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy;
 2911|  10.0k|        uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32);
  ------------------
  |  |  134|  10.0k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2912|  10.0k|        if (!ptr) {
  ------------------
  |  Branch (2912:13): [True: 0, False: 10.0k]
  ------------------
 2913|      0|            f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0;
 2914|      0|            goto error;
 2915|      0|        }
 2916|       |
 2917|  10.0k|        ptr += 32;
 2918|  10.0k|        if (y_stride < 0) {
  ------------------
  |  Branch (2918:13): [True: 0, False: 10.0k]
  ------------------
 2919|      0|            f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1);
 2920|      0|            f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3);
 2921|  10.0k|        } else {
 2922|  10.0k|            f->lf.cdef_line[0][0] = ptr + y_stride * 0;
 2923|  10.0k|            f->lf.cdef_line[1][0] = ptr + y_stride * 2;
 2924|  10.0k|        }
 2925|  10.0k|        ptr += llabs(y_stride) * f->sbh * 4;
 2926|  10.0k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2926:13): [True: 0, False: 10.0k]
  ------------------
 2927|      0|            f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1);
 2928|      0|            f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3);
 2929|      0|            f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5);
 2930|      0|            f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7);
 2931|  10.0k|        } else {
 2932|  10.0k|            f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
 2933|  10.0k|            f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
 2934|  10.0k|            f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
 2935|  10.0k|            f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
 2936|  10.0k|        }
 2937|       |
 2938|  10.0k|        if (need_cdef_lpf_copy) {
  ------------------
  |  Branch (2938:13): [True: 0, False: 10.0k]
  ------------------
 2939|      0|            ptr += llabs(uv_stride) * f->sbh * 8;
 2940|      0|            if (y_stride < 0)
  ------------------
  |  Branch (2940:17): [True: 0, False: 0]
  ------------------
 2941|      0|                f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1);
 2942|      0|            else
 2943|      0|                f->lf.cdef_lpf_line[0] = ptr;
 2944|      0|            ptr += llabs(y_stride) * f->sbh * 4;
 2945|      0|            if (uv_stride < 0) {
  ------------------
  |  Branch (2945:17): [True: 0, False: 0]
  ------------------
 2946|      0|                f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1);
 2947|      0|                f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1);
 2948|      0|            } else {
 2949|      0|                f->lf.cdef_lpf_line[1] = ptr;
 2950|      0|                f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4;
 2951|      0|            }
 2952|      0|        }
 2953|       |
 2954|  10.0k|        f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4;
 2955|  10.0k|        f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8;
 2956|  10.0k|        f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
 2957|  10.0k|        f->lf.cdef_buf_sbh = f->sbh;
 2958|  10.0k|    }
 2959|       |
 2960|  41.2k|    const int sb128 = f->seq_hdr->sb128;
 2961|  41.2k|    const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12;
  ------------------
  |  Branch (2961:27): [True: 0, False: 41.2k]
  ------------------
 2962|  41.2k|    y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1];
 2963|  41.2k|    if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] ||
  ------------------
  |  Branch (2963:9): [True: 9.48k, False: 31.7k]
  ------------------
 2964|  31.7k|        uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1])
  ------------------
  |  Branch (2964:9): [True: 231, False: 31.5k]
  ------------------
 2965|  9.71k|    {
 2966|  9.71k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  9.71k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2967|       |        // lr simd may overread the input, so slightly over-allocate the lpf buffer
 2968|  9.71k|        size_t alloc_sz = 128;
 2969|  9.71k|        alloc_sz += (size_t)llabs(y_stride) * num_lines;
 2970|  9.71k|        alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2;
 2971|  9.71k|        uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64);
  ------------------
  |  |  134|  9.71k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2972|  9.71k|        if (!ptr) {
  ------------------
  |  Branch (2972:13): [True: 0, False: 9.71k]
  ------------------
 2973|      0|            f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0;
 2974|      0|            goto error;
 2975|      0|        }
 2976|       |
 2977|  9.71k|        ptr += 64;
 2978|  9.71k|        if (y_stride < 0)
  ------------------
  |  Branch (2978:13): [True: 0, False: 9.71k]
  ------------------
 2979|      0|            f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1);
 2980|  9.71k|        else
 2981|  9.71k|            f->lf.lr_lpf_line[0] = ptr;
 2982|  9.71k|        ptr += llabs(y_stride) * num_lines;
 2983|  9.71k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2983:13): [True: 0, False: 9.71k]
  ------------------
 2984|      0|            f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1);
 2985|      0|            f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1);
 2986|  9.71k|        } else {
 2987|  9.71k|            f->lf.lr_lpf_line[1] = ptr;
 2988|  9.71k|            f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines;
 2989|  9.71k|        }
 2990|       |
 2991|  9.71k|        f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines;
 2992|  9.71k|        f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2;
 2993|  9.71k|    }
 2994|       |
 2995|       |    // update allocation for loopfilter masks
 2996|  41.2k|    if (num_sb128 != f->lf.mask_sz) {
  ------------------
  |  Branch (2996:9): [True: 9.42k, False: 31.8k]
  ------------------
 2997|  9.42k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  9.42k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2998|  9.42k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  9.42k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2999|  9.42k|        f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128);
  ------------------
  |  |  132|  9.42k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3000|       |        // over-allocate by 3 bytes since some of the SIMD implementations
 3001|       |        // index this from the level type and can thus over-read by up to 3
 3002|  9.42k|        f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
  ------------------
  |  |  132|  9.42k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3003|  9.42k|        if (!f->lf.mask || !f->lf.level) {
  ------------------
  |  Branch (3003:13): [True: 0, False: 9.42k]
  |  Branch (3003:28): [True: 0, False: 9.42k]
  ------------------
 3004|      0|            f->lf.mask_sz = 0;
 3005|      0|            goto error;
 3006|      0|        }
 3007|  9.42k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (3007:13): [True: 0, False: 9.42k]
  ------------------
 3008|      0|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
 3009|      0|            f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) *
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3010|      0|                                             num_sb128 * 32 * 32);
 3011|      0|            if (!f->frame_thread.b) {
  ------------------
  |  Branch (3011:17): [True: 0, False: 0]
  ------------------
 3012|      0|                f->lf.mask_sz = 0;
 3013|      0|                goto error;
 3014|      0|            }
 3015|      0|        }
 3016|  9.42k|        f->lf.mask_sz = num_sb128;
 3017|  9.42k|    }
 3018|       |
 3019|  41.2k|    f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
 3020|  41.2k|    const int lr_mask_sz = f->sr_sb128w * f->sb128h;
 3021|  41.2k|    if (lr_mask_sz != f->lf.lr_mask_sz) {
  ------------------
  |  Branch (3021:9): [True: 9.30k, False: 31.9k]
  ------------------
 3022|  9.30k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  9.30k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3023|  9.30k|        f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz);
  ------------------
  |  |  132|  9.30k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3024|  9.30k|        if (!f->lf.lr_mask) {
  ------------------
  |  Branch (3024:13): [True: 0, False: 9.30k]
  ------------------
 3025|      0|            f->lf.lr_mask_sz = 0;
 3026|      0|            goto error;
 3027|      0|        }
 3028|  9.30k|        f->lf.lr_mask_sz = lr_mask_sz;
 3029|  9.30k|    }
 3030|  41.2k|    f->lf.restore_planes =
 3031|  41.2k|        ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
 3032|  41.2k|        ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
 3033|  41.2k|        ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
 3034|  41.2k|    if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
  ------------------
  |  Branch (3034:9): [True: 14.7k, False: 26.4k]
  ------------------
 3035|  14.7k|        dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
 3036|  14.7k|        f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
 3037|  14.7k|    }
 3038|  41.2k|    dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
 3039|  41.2k|    memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
 3040|       |
 3041|  41.2k|    const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
 3042|  41.2k|    if (ipred_edge_sz != f->ipred_edge_sz) {
  ------------------
  |  Branch (3042:9): [True: 9.45k, False: 31.7k]
  ------------------
 3043|  9.45k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  9.45k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 3044|  9.45k|        uint8_t *ptr = f->ipred_edge[0] =
 3045|  9.45k|            dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64);
  ------------------
  |  |  134|  9.45k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 3046|  9.45k|        if (!ptr) {
  ------------------
  |  Branch (3046:13): [True: 0, False: 9.45k]
  ------------------
 3047|      0|            f->ipred_edge_sz = 0;
 3048|      0|            goto error;
 3049|      0|        }
 3050|  9.45k|        f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
 3051|  9.45k|        f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
 3052|  9.45k|        f->ipred_edge_sz = ipred_edge_sz;
 3053|  9.45k|    }
 3054|       |
 3055|  41.2k|    const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
 3056|  41.2k|    if (re_sz != f->lf.re_sz) {
  ------------------
  |  Branch (3056:9): [True: 9.01k, False: 32.2k]
  ------------------
 3057|  9.01k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  9.01k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3058|  9.01k|        f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2);
  ------------------
  |  |  132|  9.01k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3059|  9.01k|        if (!f->lf.tx_lpf_right_edge[0]) {
  ------------------
  |  Branch (3059:13): [True: 0, False: 9.01k]
  ------------------
 3060|      0|            f->lf.re_sz = 0;
 3061|      0|            goto error;
 3062|      0|        }
 3063|  9.01k|        f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
 3064|  9.01k|        f->lf.re_sz = re_sz;
 3065|  9.01k|    }
 3066|       |
 3067|       |    // init ref mvs
 3068|  41.2k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|  82.4k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 12.1k, False: 29.1k]
  |  |  ------------------
  ------------------
  |  Branch (3068:45): [True: 16.2k, False: 12.8k]
  ------------------
 3069|  28.3k|        const int ret =
 3070|  28.3k|            dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
 3071|  28.3k|                                    f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs,
 3072|  28.3k|                                    f->c->n_tc, f->c->n_fc);
 3073|  28.3k|        if (ret < 0) goto error;
  ------------------
  |  Branch (3073:13): [True: 0, False: 28.3k]
  ------------------
 3074|  28.3k|    }
 3075|       |
 3076|       |    // setup dequant tables
 3077|  41.2k|    init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
 3078|  41.2k|    if (f->frame_hdr->quant.qm)
  ------------------
  |  Branch (3078:9): [True: 6.38k, False: 34.8k]
  ------------------
 3079|   127k|        for (int i = 0; i < N_RECT_TX_SIZES; i++) {
  ------------------
  |  Branch (3079:25): [True: 121k, False: 6.38k]
  ------------------
 3080|   121k|            f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
 3081|   121k|            f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
 3082|   121k|            f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
 3083|   121k|        }
 3084|  34.8k|    else
 3085|  34.8k|        memset(f->qm, 0, sizeof(f->qm));
 3086|       |
 3087|       |    // setup jnt_comp weights
 3088|  41.2k|    if (f->frame_hdr->switchable_comp_refs) {
  ------------------
  |  Branch (3088:9): [True: 9.51k, False: 31.7k]
  ------------------
 3089|  76.0k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3089:25): [True: 66.5k, False: 9.51k]
  ------------------
 3090|  66.5k|            const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
 3091|       |
 3092|   266k|            for (int j = i + 1; j < 7; j++) {
  ------------------
  |  Branch (3092:33): [True: 199k, False: 66.5k]
  ------------------
 3093|   199k|                const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
 3094|       |
 3095|   199k|                const unsigned d1 =
 3096|   199k|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
 3097|   199k|                                          f->cur.frame_hdr->frame_offset)), 31);
 3098|   199k|                const unsigned d0 =
 3099|   199k|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
 3100|   199k|                                          f->cur.frame_hdr->frame_offset)), 31);
 3101|   199k|                const int order = d0 <= d1;
 3102|       |
 3103|   199k|                static const uint8_t quant_dist_weight[3][2] = {
 3104|   199k|                    { 2, 3 }, { 2, 5 }, { 2, 7 }
 3105|   199k|                };
 3106|   199k|                static const uint8_t quant_dist_lookup_table[4][2] = {
 3107|   199k|                    { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
 3108|   199k|                };
 3109|       |
 3110|   199k|                int k;
 3111|   630k|                for (k = 0; k < 3; k++) {
  ------------------
  |  Branch (3111:29): [True: 488k, False: 141k]
  ------------------
 3112|   488k|                    const int c0 = quant_dist_weight[k][order];
 3113|   488k|                    const int c1 = quant_dist_weight[k][!order];
 3114|   488k|                    const int d0_c0 = d0 * c0;
 3115|   488k|                    const int d1_c1 = d1 * c1;
 3116|   488k|                    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
  ------------------
  |  Branch (3116:26): [True: 105k, False: 382k]
  |  Branch (3116:37): [True: 5.42k, False: 100k]
  |  Branch (3116:56): [True: 382k, False: 100k]
  |  Branch (3116:68): [True: 52.8k, False: 329k]
  ------------------
 3117|   488k|                }
 3118|       |
 3119|   199k|                f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
 3120|   199k|            }
 3121|  66.5k|        }
 3122|  9.51k|    }
 3123|       |
 3124|       |    /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
 3125|       |     * so just point the chroma pointers in 4:0:0 to the luma plane here to
 3126|       |     * avoid having additional in-loop branches in various places. We never
 3127|       |     * dereference those pointers so it doesn't really matter what they
 3128|       |     * point at, as long as the pointers are valid. */
 3129|  41.2k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 3130|  41.2k|    f->lf.p[0] = f->cur.data[0];
 3131|  41.2k|    f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3131:30): [True: 22.3k, False: 18.9k]
  ------------------
 3132|  41.2k|    f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3132:30): [True: 22.3k, False: 18.9k]
  ------------------
 3133|  41.2k|    f->lf.sr_p[0] = f->sr_cur.p.data[0];
 3134|  41.2k|    f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3134:38): [True: 22.3k, False: 18.9k]
  ------------------
 3135|  41.2k|    f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3135:38): [True: 22.3k, False: 18.9k]
  ------------------
 3136|       |
 3137|  41.2k|    retval = 0;
 3138|  41.2k|error:
 3139|  41.2k|    return retval;
 3140|  41.2k|}
dav1d_decode_frame_init_cdf:
 3142|  41.2k|int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) {
 3143|  41.2k|    const Dav1dContext *const c = f->c;
 3144|  41.2k|    int retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  41.2k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3145|       |
 3146|  41.2k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3146:9): [True: 13.0k, False: 28.1k]
  ------------------
 3147|  13.0k|        dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
 3148|       |
 3149|       |    // parse individual tiles per tile group
 3150|  41.2k|    int tile_row = 0, tile_col = 0;
 3151|  41.2k|    f->task_thread.update_set = 0;
 3152|  81.3k|    for (int i = 0; i < f->n_tile_data; i++) {
  ------------------
  |  Branch (3152:21): [True: 41.4k, False: 39.9k]
  ------------------
 3153|  41.4k|        const uint8_t *data = f->tile[i].data.data;
 3154|  41.4k|        size_t size = f->tile[i].data.sz;
 3155|       |
 3156|  87.3k|        for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
  ------------------
  |  Branch (3156:40): [True: 47.2k, False: 40.1k]
  ------------------
 3157|  47.2k|            size_t tile_sz;
 3158|  47.2k|            if (j == f->tile[i].end) {
  ------------------
  |  Branch (3158:17): [True: 40.1k, False: 7.09k]
  ------------------
 3159|  40.1k|                tile_sz = size;
 3160|  40.1k|            } else {
 3161|  7.09k|                if (f->frame_hdr->tiling.n_bytes > size) goto error;
  ------------------
  |  Branch (3161:21): [True: 464, False: 6.63k]
  ------------------
 3162|  6.63k|                tile_sz = 0;
 3163|  14.1k|                for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
  ------------------
  |  Branch (3163:38): [True: 7.49k, False: 6.63k]
  ------------------
 3164|  7.49k|                    tile_sz |= (unsigned)*data++ << (k * 8);
 3165|  6.63k|                tile_sz++;
 3166|  6.63k|                size -= f->frame_hdr->tiling.n_bytes;
 3167|  6.63k|                if (tile_sz > size) goto error;
  ------------------
  |  Branch (3167:21): [True: 858, False: 5.77k]
  ------------------
 3168|  6.63k|            }
 3169|       |
 3170|  45.9k|            setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
 3171|  45.9k|                       c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
  ------------------
  |  Branch (3171:24): [True: 0, False: 45.9k]
  ------------------
 3172|       |
 3173|  45.9k|            if (tile_col == f->frame_hdr->tiling.cols) {
  ------------------
  |  Branch (3173:17): [True: 41.4k, False: 4.41k]
  ------------------
 3174|  41.4k|                tile_col = 0;
 3175|  41.4k|                tile_row++;
 3176|  41.4k|            }
 3177|  45.9k|            if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3177:17): [True: 40.1k, False: 5.75k]
  |  Branch (3177:53): [True: 12.7k, False: 27.4k]
  ------------------
 3178|  12.7k|                f->task_thread.update_set = 1;
 3179|  45.9k|            data += tile_sz;
 3180|  45.9k|            size -= tile_sz;
 3181|  45.9k|        }
 3182|  41.4k|    }
 3183|       |
 3184|  39.9k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (3184:9): [True: 0, False: 39.9k]
  ------------------
 3185|      0|        const int uses_2pass = c->n_fc > 1;
 3186|      0|        for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++)
  ------------------
  |  Branch (3186:25): [True: 0, False: 0]
  ------------------
 3187|      0|            reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr),
  ------------------
  |  |   43|      0|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 3188|      0|                          uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0);
  ------------------
  |  Branch (3188:27): [True: 0, False: 0]
  ------------------
 3189|      0|    }
 3190|       |
 3191|  39.9k|    retval = 0;
 3192|  41.2k|error:
 3193|  41.2k|    return retval;
 3194|  39.9k|}
dav1d_decode_frame_main:
 3196|  39.9k|int dav1d_decode_frame_main(Dav1dFrameContext *const f) {
 3197|  39.9k|    const Dav1dContext *const c = f->c;
 3198|  39.9k|    int retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  39.9k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3199|       |
 3200|  39.9k|    assert(f->c->n_tc == 1);
  ------------------
  |  Branch (3200:5): [True: 39.9k, False: 0]
  ------------------
 3201|       |
 3202|  39.9k|    Dav1dTaskContext *const t = &c->tc[f - c->fc];
 3203|  39.9k|    t->f = f;
 3204|  39.9k|    t->frame_thread.pass = 0;
 3205|       |
 3206|   379k|    for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++)
  ------------------
  |  Branch (3206:21): [True: 339k, False: 39.9k]
  ------------------
 3207|   339k|        reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0);
  ------------------
  |  |   43|   339k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   339k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 3208|       |
 3209|       |    // no threading - we explicitly interleave tile/sbrow decoding
 3210|       |    // and post-filtering, so that the full process runs in-line
 3211|  58.2k|    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (3211:28): [True: 41.0k, False: 17.2k]
  ------------------
 3212|  41.0k|        const int sbh_end =
 3213|  41.0k|            imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh);
 3214|  41.0k|        for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
 3215|   174k|             sby < sbh_end; sby++)
  ------------------
  |  Branch (3215:14): [True: 156k, False: 18.3k]
  ------------------
 3216|   156k|        {
 3217|   156k|            t->by = sby << (4 + f->seq_hdr->sb128);
 3218|   156k|            const int by_end = (t->by + f->sb_step) >> 1;
 3219|   156k|            if (f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (3219:17): [True: 18.3k, False: 138k]
  ------------------
 3220|  18.3k|                f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row,
 3221|  18.3k|                                           0, f->bw >> 1, t->by >> 1, by_end);
 3222|  18.3k|            }
 3223|   292k|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (3223:36): [True: 158k, False: 133k]
  ------------------
 3224|   158k|                t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
 3225|   158k|                if (dav1d_decode_tile_sbrow(t)) goto error;
  ------------------
  |  Branch (3225:21): [True: 22.6k, False: 136k]
  ------------------
 3226|   158k|            }
 3227|   133k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|   133k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 48.9k, False: 84.8k]
  |  |  ------------------
  ------------------
 3228|  48.9k|                dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
 3229|  48.9k|                                       0, f->bw >> 1, t->by >> 1, by_end);
 3230|  48.9k|            }
 3231|       |
 3232|       |            // loopfilter + cdef + restoration
 3233|   133k|            f->bd_fn.filter_sbrow(f, sby);
 3234|   133k|        }
 3235|  41.0k|    }
 3236|       |
 3237|  17.2k|    retval = 0;
 3238|  39.9k|error:
 3239|  39.9k|    return retval;
 3240|  17.2k|}
dav1d_decode_frame_exit:
 3242|  41.2k|void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) {
 3243|  41.2k|    const Dav1dContext *const c = f->c;
 3244|       |
 3245|  41.2k|    if (f->sr_cur.p.data[0])
  ------------------
  |  Branch (3245:9): [True: 41.2k, False: 0]
  ------------------
 3246|  41.2k|        atomic_init(&f->task_thread.error, 0);
 3247|       |
 3248|  41.2k|    if (c->n_fc > 1 && retval && f->frame_thread.cf) {
  ------------------
  |  Branch (3248:9): [True: 0, False: 41.2k]
  |  Branch (3248:24): [True: 0, False: 0]
  |  Branch (3248:34): [True: 0, False: 0]
  ------------------
 3249|      0|        memset(f->frame_thread.cf, 0,
 3250|      0|               (size_t)f->frame_thread.cf_sz * 128 * 128 / 2);
 3251|      0|    }
 3252|   329k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3252:21): [True: 288k, False: 41.2k]
  ------------------
 3253|   288k|        if (f->refp[i].p.frame_hdr) {
  ------------------
  |  Branch (3253:13): [True: 84.8k, False: 203k]
  ------------------
 3254|  84.8k|            if (!retval && c->n_fc > 1 && c->strict_std_compliance &&
  ------------------
  |  Branch (3254:17): [True: 50.5k, False: 34.2k]
  |  Branch (3254:28): [True: 0, False: 50.5k]
  |  Branch (3254:43): [True: 0, False: 0]
  ------------------
 3255|  84.8k|                atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3255:17): [True: 0, False: 0]
  ------------------
 3256|      0|            {
 3257|      0|                retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3258|      0|                atomic_store(&f->task_thread.error, 1);
 3259|      0|                atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
 3260|      0|            }
 3261|  84.8k|            dav1d_thread_picture_unref(&f->refp[i]);
 3262|  84.8k|        }
 3263|   288k|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3264|   288k|    }
 3265|       |
 3266|  41.2k|    dav1d_picture_unref_internal(&f->cur);
 3267|  41.2k|    dav1d_thread_picture_unref(&f->sr_cur);
 3268|  41.2k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3269|  41.2k|    if (f->frame_hdr && f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3269:9): [True: 41.2k, False: 0]
  |  Branch (3269:25): [True: 13.0k, False: 28.1k]
  ------------------
 3270|  13.0k|        if (f->out_cdf.progress)
  ------------------
  |  Branch (3270:13): [True: 0, False: 13.0k]
  ------------------
 3271|  13.0k|            atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR);
  ------------------
  |  Branch (3271:13): [True: 0, False: 0]
  ------------------
 3272|  13.0k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3273|  13.0k|    }
 3274|  41.2k|    dav1d_ref_dec(&f->cur_segmap_ref);
 3275|  41.2k|    dav1d_ref_dec(&f->prev_segmap_ref);
 3276|  41.2k|    dav1d_ref_dec(&f->mvs_ref);
 3277|  41.2k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3278|  41.2k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3279|       |
 3280|  82.6k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3280:21): [True: 41.4k, False: 41.2k]
  ------------------
 3281|  41.4k|        dav1d_data_unref_internal(&f->tile[i].data);
 3282|  41.2k|    f->task_thread.retval = retval;
 3283|  41.2k|}
dav1d_decode_frame:
 3285|  41.2k|int dav1d_decode_frame(Dav1dFrameContext *const f) {
 3286|  41.2k|    assert(f->c->n_fc == 1);
  ------------------
  |  Branch (3286:5): [True: 41.2k, False: 0]
  ------------------
 3287|       |    // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task
 3288|       |    // threads also. Not sure it makes a measurable difference.
 3289|  41.2k|    int res = dav1d_decode_frame_init(f);
 3290|  41.2k|    if (!res) res = dav1d_decode_frame_init_cdf(f);
  ------------------
  |  Branch (3290:9): [True: 41.2k, False: 0]
  ------------------
 3291|       |    // wait until all threads have completed
 3292|  41.2k|    if (!res) {
  ------------------
  |  Branch (3292:9): [True: 39.9k, False: 1.32k]
  ------------------
 3293|  39.9k|        if (f->c->n_tc > 1) {
  ------------------
  |  Branch (3293:13): [True: 0, False: 39.9k]
  ------------------
 3294|      0|            res = dav1d_task_create_tile_sbrow(f, 0, 1);
 3295|      0|            pthread_mutex_lock(&f->task_thread.ttd->lock);
 3296|      0|            pthread_cond_signal(&f->task_thread.ttd->cond);
 3297|      0|            if (!res) {
  ------------------
  |  Branch (3297:17): [True: 0, False: 0]
  ------------------
 3298|      0|                while (!f->task_thread.done[0] ||
  ------------------
  |  Branch (3298:24): [True: 0, False: 0]
  ------------------
 3299|      0|                       atomic_load(&f->task_thread.task_counter) > 0)
  ------------------
  |  Branch (3299:24): [True: 0, False: 0]
  ------------------
 3300|      0|                {
 3301|      0|                    pthread_cond_wait(&f->task_thread.cond,
 3302|      0|                                      &f->task_thread.ttd->lock);
 3303|      0|                }
 3304|      0|            }
 3305|      0|            pthread_mutex_unlock(&f->task_thread.ttd->lock);
 3306|      0|            res = f->task_thread.retval;
 3307|  39.9k|        } else {
 3308|  39.9k|            res = dav1d_decode_frame_main(f);
 3309|  39.9k|            if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) {
  ------------------
  |  Branch (3309:17): [True: 17.2k, False: 22.6k]
  |  Branch (3309:25): [True: 10.9k, False: 6.27k]
  |  Branch (3309:58): [True: 10.9k, False: 0]
  ------------------
 3310|  10.9k|                dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
 3311|  10.9k|                                        &f->ts[f->frame_hdr->tiling.update].cdf);
 3312|  10.9k|            }
 3313|  39.9k|        }
 3314|  39.9k|    }
 3315|  41.2k|    dav1d_decode_frame_exit(f, res);
 3316|  41.2k|    res = f->task_thread.retval;
 3317|  41.2k|    f->n_tile_data = 0;
 3318|  41.2k|    return res;
 3319|  41.2k|}
dav1d_submit_frame:
 3327|  44.2k|int dav1d_submit_frame(Dav1dContext *const c) {
 3328|  44.2k|    Dav1dFrameContext *f;
 3329|  44.2k|    int res = -1;
 3330|       |
 3331|       |    // wait for c->out_delayed[next] and move into c->out if visible
 3332|  44.2k|    Dav1dThreadPicture *out_delayed;
 3333|  44.2k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (3333:9): [True: 0, False: 44.2k]
  ------------------
 3334|      0|        pthread_mutex_lock(&c->task_thread.lock);
 3335|      0|        const unsigned next = c->frame_thread.next++;
 3336|      0|        if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (3336:13): [True: 0, False: 0]
  ------------------
 3337|      0|            c->frame_thread.next = 0;
 3338|       |
 3339|      0|        f = &c->fc[next];
 3340|      0|        while (f->n_tile_data > 0)
  ------------------
  |  Branch (3340:16): [True: 0, False: 0]
  ------------------
 3341|      0|            pthread_cond_wait(&f->task_thread.cond,
 3342|      0|                              &c->task_thread.lock);
 3343|      0|        out_delayed = &c->frame_thread.out_delayed[next];
 3344|      0|        if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (3344:13): [True: 0, False: 0]
  |  Branch (3344:39): [True: 0, False: 0]
  ------------------
 3345|      0|            unsigned first = atomic_load(&c->task_thread.first);
 3346|      0|            if (first + 1U < c->n_fc)
  ------------------
  |  Branch (3346:17): [True: 0, False: 0]
  ------------------
 3347|      0|                atomic_fetch_add(&c->task_thread.first, 1U);
 3348|      0|            else
 3349|      0|                atomic_store(&c->task_thread.first, 0);
 3350|      0|            atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 3351|      0|                                           &first, UINT_MAX);
 3352|      0|            if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (3352:17): [True: 0, False: 0]
  |  Branch (3352:39): [True: 0, False: 0]
  ------------------
 3353|      0|                c->task_thread.cur--;
 3354|      0|        }
 3355|      0|        const int error = f->task_thread.retval;
 3356|      0|        if (error) {
  ------------------
  |  Branch (3356:13): [True: 0, False: 0]
  ------------------
 3357|      0|            f->task_thread.retval = 0;
 3358|      0|            c->cached_error = error;
 3359|      0|            dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 3360|      0|            dav1d_thread_picture_unref(out_delayed);
 3361|      0|        } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (3361:20): [True: 0, False: 0]
  ------------------
 3362|      0|            const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 3363|      0|                                                           memory_order_relaxed);
 3364|      0|            if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (3364:18): [True: 0, False: 0]
  |  Branch (3364:42): [True: 0, False: 0]
  ------------------
 3365|      0|                progress != FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3365:17): [True: 0, False: 0]
  ------------------
 3366|      0|            {
 3367|      0|                dav1d_thread_picture_ref(&c->out, out_delayed);
 3368|      0|                c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 3369|      0|            }
 3370|      0|            dav1d_thread_picture_unref(out_delayed);
 3371|      0|        }
 3372|  44.2k|    } else {
 3373|  44.2k|        f = c->fc;
 3374|  44.2k|    }
 3375|       |
 3376|  44.2k|    f->seq_hdr = c->seq_hdr;
 3377|  44.2k|    f->seq_hdr_ref = c->seq_hdr_ref;
 3378|  44.2k|    dav1d_ref_inc(f->seq_hdr_ref);
 3379|  44.2k|    f->frame_hdr = c->frame_hdr;
 3380|  44.2k|    f->frame_hdr_ref = c->frame_hdr_ref;
 3381|  44.2k|    c->frame_hdr = NULL;
 3382|  44.2k|    c->frame_hdr_ref = NULL;
 3383|  44.2k|    f->dsp = &c->dsp[f->seq_hdr->hbd];
 3384|       |
 3385|  44.2k|    const int bpc = 8 + 2 * f->seq_hdr->hbd;
 3386|       |
 3387|  44.2k|    if (!f->dsp->ipred.intra_pred[DC_PRED]) {
  ------------------
  |  Branch (3387:9): [True: 8.15k, False: 36.0k]
  ------------------
 3388|  8.15k|        Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
 3389|       |
 3390|  8.15k|        switch (bpc) {
 3391|      0|#define assign_bitdepth_case(bd) \
 3392|      0|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
 3393|      0|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
 3394|      0|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
 3395|      0|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
 3396|      0|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
 3397|      0|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
 3398|      0|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
 3399|      0|            break
 3400|      0|#if CONFIG_8BPC
 3401|  3.47k|        case 8:
  ------------------
  |  Branch (3401:9): [True: 3.47k, False: 4.68k]
  ------------------
 3402|  3.47k|            assign_bitdepth_case(8);
  ------------------
  |  | 3392|  3.47k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  3.47k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  3.47k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  3.47k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  3.47k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  3.47k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  3.47k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  3.47k|            break
  ------------------
 3403|      0|#endif
 3404|      0|#if CONFIG_16BPC
 3405|  2.31k|        case 10:
  ------------------
  |  Branch (3405:9): [True: 2.31k, False: 5.83k]
  ------------------
 3406|  4.68k|        case 12:
  ------------------
  |  Branch (3406:9): [True: 2.36k, False: 5.79k]
  ------------------
 3407|  4.68k|            assign_bitdepth_case(16);
  ------------------
  |  | 3392|  4.68k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  4.68k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  4.68k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  4.68k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  4.68k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  4.68k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  4.68k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  4.68k|            break
  ------------------
 3408|      0|#endif
 3409|      0|#undef assign_bitdepth_case
 3410|      0|        default:
  ------------------
  |  Branch (3410:9): [True: 0, False: 8.15k]
  ------------------
 3411|      0|            dav1d_log(c, "Compiled without support for %d-bit decoding\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
 3412|      0|                    8 + 2 * f->seq_hdr->hbd);
 3413|      0|            res = DAV1D_ERR(ENOPROTOOPT);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3414|      0|            goto error;
 3415|  8.15k|        }
 3416|  8.15k|    }
 3417|       |
 3418|  44.2k|#define assign_bitdepth_case(bd) \
 3419|  44.2k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
 3420|  44.2k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
 3421|  44.2k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
 3422|  44.2k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
 3423|  44.2k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
 3424|  44.2k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
 3425|  44.2k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
 3426|  44.2k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
 3427|  44.2k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
 3428|  44.2k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
 3429|  44.2k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
 3430|  44.2k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
 3431|  44.2k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
 3432|  44.2k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
 3433|  44.2k|    if (!f->seq_hdr->hbd) {
  ------------------
  |  Branch (3433:9): [True: 23.8k, False: 20.3k]
  ------------------
 3434|  23.8k|#if CONFIG_8BPC
 3435|  23.8k|        assign_bitdepth_case(8);
  ------------------
  |  | 3419|  23.8k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|  23.8k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|  23.8k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|  23.8k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|  23.8k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|  23.8k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|  23.8k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|  23.8k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|  23.8k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|  23.8k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|  23.8k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|  23.8k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|  23.8k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|  23.8k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3436|  23.8k|#endif
 3437|  23.8k|    } else {
 3438|  20.3k|#if CONFIG_16BPC
 3439|  20.3k|        assign_bitdepth_case(16);
  ------------------
  |  | 3419|  20.3k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|  20.3k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|  20.3k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|  20.3k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|  20.3k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|  20.3k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|  20.3k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|  20.3k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|  20.3k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|  20.3k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|  20.3k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|  20.3k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|  20.3k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|  20.3k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3440|  20.3k|#endif
 3441|  20.3k|    }
 3442|  44.2k|#undef assign_bitdepth_case
 3443|       |
 3444|  44.2k|    int ref_coded_width[7];
 3445|  44.2k|    if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  44.2k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 15.1k, False: 29.1k]
  |  |  ------------------
  ------------------
 3446|  15.1k|        if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  15.1k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3446:13): [True: 13.4k, False: 1.66k]
  ------------------
 3447|  13.4k|            const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3448|  13.4k|            if (!c->refs[pri_ref].p.p.data[0]) {
  ------------------
  |  Branch (3448:17): [True: 148, False: 13.3k]
  ------------------
 3449|    148|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    148|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3450|    148|                goto error;
 3451|    148|            }
 3452|  13.4k|        }
 3453|   106k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3453:25): [True: 94.1k, False: 12.1k]
  ------------------
 3454|  94.1k|            const int refidx = f->frame_hdr->refidx[i];
 3455|  94.1k|            if (!c->refs[refidx].p.p.data[0] ||
  ------------------
  |  Branch (3455:17): [True: 492, False: 93.6k]
  ------------------
 3456|  93.6k|                f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3456:17): [True: 637, False: 93.0k]
  ------------------
 3457|  93.0k|                f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
  ------------------
  |  Branch (3457:17): [True: 520, False: 92.4k]
  ------------------
 3458|  92.4k|                f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
  ------------------
  |  Branch (3458:17): [True: 937, False: 91.5k]
  ------------------
 3459|  91.5k|                f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
  ------------------
  |  Branch (3459:17): [True: 280, False: 91.2k]
  ------------------
 3460|  91.2k|                f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
  ------------------
  |  Branch (3460:17): [True: 0, False: 91.2k]
  ------------------
 3461|  91.2k|                bpc != c->refs[refidx].p.p.p.bpc)
  ------------------
  |  Branch (3461:17): [True: 0, False: 91.2k]
  ------------------
 3462|  2.86k|            {
 3463|  9.31k|                for (int j = 0; j < i; j++)
  ------------------
  |  Branch (3463:33): [True: 6.44k, False: 2.86k]
  ------------------
 3464|  6.44k|                    dav1d_thread_picture_unref(&f->refp[j]);
 3465|  2.86k|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  2.86k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3466|  2.86k|                goto error;
 3467|  2.86k|            }
 3468|  91.2k|            dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
 3469|  91.2k|            ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
 3470|  91.2k|            if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3470:17): [True: 14.6k, False: 76.6k]
  ------------------
 3471|  76.6k|                f->frame_hdr->height != c->refs[refidx].p.p.p.h)
  ------------------
  |  Branch (3471:17): [True: 2.43k, False: 74.2k]
  ------------------
 3472|  17.0k|            {
 3473|  17.0k|#define scale_fac(ref_sz, this_sz) \
 3474|  17.0k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
 3475|  17.0k|                f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
  ------------------
  |  | 3474|  17.0k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3476|  17.0k|                                               f->frame_hdr->width[0]);
 3477|  17.0k|                f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
  ------------------
  |  | 3474|  17.0k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3478|  17.0k|                                               f->frame_hdr->height);
 3479|  17.0k|                f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
 3480|  17.0k|                f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
 3481|  74.2k|            } else {
 3482|  74.2k|                f->svc[i][0].scale = f->svc[i][1].scale = 0;
 3483|  74.2k|            }
 3484|  91.2k|            f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
  ------------------
  |  Branch (3484:38): [True: 6.29k, False: 84.9k]
  ------------------
 3485|  6.29k|                                     !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (3485:38): [True: 5.58k, False: 711]
  ------------------
 3486|  5.58k|                                     !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
  ------------------
  |  Branch (3486:38): [True: 4.57k, False: 1.00k]
  ------------------
 3487|  4.57k|                                     !f->svc[i][0].scale;
  ------------------
  |  Branch (3487:38): [True: 3.51k, False: 1.05k]
  ------------------
 3488|  91.2k|        }
 3489|  14.9k|    }
 3490|       |
 3491|       |    // setup entropy
 3492|  41.2k|    if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  41.2k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3492:9): [True: 30.4k, False: 10.8k]
  ------------------
 3493|  30.4k|        dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
 3494|  30.4k|    } else {
 3495|  10.8k|        const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3496|  10.8k|        dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
 3497|  10.8k|    }
 3498|  41.2k|    if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3498:9): [True: 13.0k, False: 28.1k]
  ------------------
 3499|  13.0k|        res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1);
 3500|  13.0k|        if (res < 0) goto error;
  ------------------
  |  Branch (3500:13): [True: 0, False: 13.0k]
  ------------------
 3501|  13.0k|    }
 3502|       |
 3503|       |    // FIXME qsort so tiles are in order (for frame threading)
 3504|  41.2k|    if (f->n_tile_data_alloc < c->n_tile_data) {
  ------------------
  |  Branch (3504:9): [True: 8.08k, False: 33.1k]
  ------------------
 3505|  8.08k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  8.08k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3506|  8.08k|        assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
  ------------------
  |  Branch (3506:9): [True: 8.08k, False: 0]
  ------------------
 3507|  8.08k|        f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile));
  ------------------
  |  |  132|  8.08k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3508|  8.08k|        if (!f->tile) {
  ------------------
  |  Branch (3508:13): [True: 0, False: 8.08k]
  ------------------
 3509|      0|            f->n_tile_data_alloc = f->n_tile_data = 0;
 3510|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3511|      0|            goto error;
 3512|      0|        }
 3513|  8.08k|        f->n_tile_data_alloc = c->n_tile_data;
 3514|  8.08k|    }
 3515|  41.2k|    memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
 3516|  41.2k|    memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
 3517|  41.2k|    f->n_tile_data = c->n_tile_data;
 3518|  41.2k|    c->n_tile_data = 0;
 3519|       |
 3520|       |    // allocate frame
 3521|  41.2k|    res = dav1d_thread_picture_alloc(c, f, bpc);
 3522|  41.2k|    if (res < 0) goto error;
  ------------------
  |  Branch (3522:9): [True: 0, False: 41.2k]
  ------------------
 3523|       |
 3524|  41.2k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3524:9): [True: 3.60k, False: 37.6k]
  ------------------
 3525|  3.60k|        res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
 3526|  3.60k|        if (res < 0) goto error;
  ------------------
  |  Branch (3526:13): [True: 0, False: 3.60k]
  ------------------
 3527|  37.6k|    } else {
 3528|  37.6k|        dav1d_picture_ref(&f->cur, &f->sr_cur.p);
 3529|  37.6k|    }
 3530|       |
 3531|  41.2k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3531:9): [True: 3.60k, False: 37.6k]
  ------------------
 3532|  3.60k|        f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
  ------------------
  |  | 3474|  3.60k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3533|  3.60k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 3534|  3.60k|        const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
 3535|  3.60k|        const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 3536|  3.60k|        f->resize_step[1] = scale_fac(in_cw, out_cw);
  ------------------
  |  | 3474|  3.60k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3537|  3.60k|#undef scale_fac
 3538|  3.60k|        f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
 3539|  3.60k|        f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
 3540|  3.60k|    }
 3541|       |
 3542|       |    // move f->cur into output queue
 3543|  41.2k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3543:9): [True: 41.2k, False: 0]
  ------------------
 3544|  41.2k|        if (f->frame_hdr->show_frame || c->output_invisible_frames) {
  ------------------
  |  Branch (3544:13): [True: 37.8k, False: 3.37k]
  |  Branch (3544:41): [True: 0, False: 3.37k]
  ------------------
 3545|  37.8k|            dav1d_thread_picture_ref(&c->out, &f->sr_cur);
 3546|  37.8k|            c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
 3547|  37.8k|        }
 3548|  41.2k|    } else {
 3549|      0|        dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
 3550|      0|    }
 3551|       |
 3552|  41.2k|    f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
 3553|  41.2k|    f->h4 = (f->frame_hdr->height + 3) >> 2;
 3554|  41.2k|    f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
 3555|  41.2k|    f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
 3556|  41.2k|    f->sb128w = (f->bw + 31) >> 5;
 3557|  41.2k|    f->sb128h = (f->bh + 31) >> 5;
 3558|  41.2k|    f->sb_shift = 4 + f->seq_hdr->sb128;
 3559|  41.2k|    f->sb_step = 16 << f->seq_hdr->sb128;
 3560|  41.2k|    f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
 3561|  41.2k|    f->b4_stride = (f->bw + 31) & ~31;
 3562|  41.2k|    f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
 3563|  41.2k|    atomic_init(&f->task_thread.error, 0);
 3564|  41.2k|    const int uses_2pass = c->n_fc > 1;
 3565|  41.2k|    const int cols = f->frame_hdr->tiling.cols;
 3566|  41.2k|    const int rows = f->frame_hdr->tiling.rows;
 3567|  41.2k|    atomic_store(&f->task_thread.task_counter,
 3568|  41.2k|                 (cols * rows + f->sbh) << uses_2pass);
 3569|       |
 3570|       |    // ref_mvs
 3571|  41.2k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|  82.4k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 12.1k, False: 29.1k]
  |  |  ------------------
  ------------------
  |  Branch (3571:45): [True: 16.2k, False: 12.8k]
  ------------------
 3572|  28.3k|        f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
 3573|  28.3k|            sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
 3574|  28.3k|        if (!f->mvs_ref) {
  ------------------
  |  Branch (3574:13): [True: 0, False: 28.3k]
  ------------------
 3575|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3576|      0|            goto error;
 3577|      0|        }
 3578|  28.3k|        f->mvs = f->mvs_ref->data;
 3579|  28.3k|        if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3579:13): [True: 12.1k, False: 16.2k]
  ------------------
 3580|  96.9k|            for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (3580:29): [True: 84.8k, False: 12.1k]
  ------------------
 3581|  84.8k|                f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
 3582|  16.2k|        } else {
 3583|  16.2k|            memset(f->refpoc, 0, sizeof(f->refpoc));
 3584|  16.2k|        }
 3585|  28.3k|        if (f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (3585:13): [True: 7.78k, False: 20.5k]
  ------------------
 3586|  62.2k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3586:29): [True: 54.5k, False: 7.78k]
  ------------------
 3587|  54.5k|                const int refidx = f->frame_hdr->refidx[i];
 3588|  54.5k|                const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1;
 3589|  54.5k|                const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1;
 3590|  54.5k|                if (c->refs[refidx].refmvs != NULL &&
  ------------------
  |  Branch (3590:21): [True: 36.0k, False: 18.4k]
  ------------------
 3591|  36.0k|                    ref_w == f->bw && ref_h == f->bh)
  ------------------
  |  Branch (3591:21): [True: 34.9k, False: 1.02k]
  |  Branch (3591:39): [True: 34.7k, False: 255]
  ------------------
 3592|  34.7k|                {
 3593|  34.7k|                    f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
 3594|  34.7k|                    dav1d_ref_inc(f->ref_mvs_ref[i]);
 3595|  34.7k|                    f->ref_mvs[i] = c->refs[refidx].refmvs->data;
 3596|  34.7k|                } else {
 3597|  19.7k|                    f->ref_mvs[i] = NULL;
 3598|  19.7k|                    f->ref_mvs_ref[i] = NULL;
 3599|  19.7k|                }
 3600|  54.5k|                memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
 3601|  54.5k|                       sizeof(*f->refrefpoc));
 3602|  54.5k|            }
 3603|  20.5k|        } else {
 3604|  20.5k|            memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3605|  20.5k|        }
 3606|  28.3k|    } else {
 3607|  12.8k|        f->mvs_ref = NULL;
 3608|  12.8k|        memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3609|  12.8k|    }
 3610|       |
 3611|       |    // segmap
 3612|  41.2k|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (3612:9): [True: 8.29k, False: 32.9k]
  ------------------
 3613|       |        // By default, the previous segmentation map is not initialised.
 3614|  8.29k|        f->prev_segmap_ref = NULL;
 3615|  8.29k|        f->prev_segmap = NULL;
 3616|       |
 3617|       |        // We might need a previous frame's segmentation map. This
 3618|       |        // happens if there is either no update or a temporal update.
 3619|  8.29k|        if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3619:13): [True: 564, False: 7.73k]
  |  Branch (3619:52): [True: 5.82k, False: 1.90k]
  ------------------
 3620|  6.38k|            const int pri_ref = f->frame_hdr->primary_ref_frame;
 3621|  6.38k|            assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (3621:13): [True: 6.38k, False: 0]
  ------------------
 3622|  6.38k|            const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
 3623|  6.38k|            const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
 3624|  6.38k|            if (ref_w == f->bw && ref_h == f->bh) {
  ------------------
  |  Branch (3624:17): [True: 5.90k, False: 479]
  |  Branch (3624:35): [True: 5.53k, False: 373]
  ------------------
 3625|  5.53k|                f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
 3626|  5.53k|                if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3626:21): [True: 5.04k, False: 491]
  ------------------
 3627|  5.04k|                    dav1d_ref_inc(f->prev_segmap_ref);
 3628|  5.04k|                    f->prev_segmap = f->prev_segmap_ref->data;
 3629|  5.04k|                }
 3630|  5.53k|            }
 3631|  6.38k|        }
 3632|       |
 3633|  8.29k|        if (f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3633:13): [True: 2.47k, False: 5.82k]
  ------------------
 3634|       |            // We're updating an existing map, but need somewhere to
 3635|       |            // put the new values. Allocate them here (the data
 3636|       |            // actually gets set elsewhere)
 3637|  2.47k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
 3638|  2.47k|                sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
 3639|  2.47k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3639:17): [True: 0, False: 2.47k]
  ------------------
 3640|      0|                dav1d_ref_dec(&f->prev_segmap_ref);
 3641|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3642|      0|                goto error;
 3643|      0|            }
 3644|  2.47k|            f->cur_segmap = f->cur_segmap_ref->data;
 3645|  5.82k|        } else if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3645:20): [True: 4.70k, False: 1.12k]
  ------------------
 3646|       |            // We're not updating an existing map, and we have a valid
 3647|       |            // reference. Use that.
 3648|  4.70k|            f->cur_segmap_ref = f->prev_segmap_ref;
 3649|  4.70k|            dav1d_ref_inc(f->cur_segmap_ref);
 3650|  4.70k|            f->cur_segmap = f->prev_segmap_ref->data;
 3651|  4.70k|        } else {
 3652|       |            // We need to make a new map. Allocate one here and zero it out.
 3653|  1.12k|            const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
 3654|  1.12k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
 3655|  1.12k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3655:17): [True: 0, False: 1.12k]
  ------------------
 3656|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3657|      0|                goto error;
 3658|      0|            }
 3659|  1.12k|            f->cur_segmap = f->cur_segmap_ref->data;
 3660|  1.12k|            memset(f->cur_segmap, 0, segmap_size);
 3661|  1.12k|        }
 3662|  32.9k|    } else {
 3663|  32.9k|        f->cur_segmap = NULL;
 3664|  32.9k|        f->cur_segmap_ref = NULL;
 3665|  32.9k|        f->prev_segmap_ref = NULL;
 3666|  32.9k|    }
 3667|       |
 3668|       |    // update references etc.
 3669|  41.2k|    const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
 3670|   371k|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3670:21): [True: 329k, False: 41.2k]
  ------------------
 3671|   329k|        if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3671:13): [True: 266k, False: 63.2k]
  ------------------
 3672|   266k|            if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3672:17): [True: 83.6k, False: 182k]
  ------------------
 3673|  83.6k|                dav1d_thread_picture_unref(&c->refs[i].p);
 3674|   266k|            dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
 3675|       |
 3676|   266k|            dav1d_cdf_thread_unref(&c->cdf[i]);
 3677|   266k|            if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3677:17): [True: 69.6k, False: 196k]
  ------------------
 3678|  69.6k|                dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
 3679|   196k|            } else {
 3680|   196k|                dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
 3681|   196k|            }
 3682|       |
 3683|   266k|            dav1d_ref_dec(&c->refs[i].segmap);
 3684|   266k|            c->refs[i].segmap = f->cur_segmap_ref;
 3685|   266k|            if (f->cur_segmap_ref)
  ------------------
  |  Branch (3685:17): [True: 43.6k, False: 223k]
  ------------------
 3686|  43.6k|                dav1d_ref_inc(f->cur_segmap_ref);
 3687|   266k|            dav1d_ref_dec(&c->refs[i].refmvs);
 3688|   266k|            if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3688:17): [True: 137k, False: 129k]
  ------------------
 3689|   137k|                c->refs[i].refmvs = f->mvs_ref;
 3690|   137k|                if (f->mvs_ref)
  ------------------
  |  Branch (3690:21): [True: 39.6k, False: 97.6k]
  ------------------
 3691|  39.6k|                    dav1d_ref_inc(f->mvs_ref);
 3692|   137k|            }
 3693|   266k|            memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
 3694|   266k|        }
 3695|   329k|    }
 3696|       |
 3697|  41.2k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3697:9): [True: 41.2k, False: 0]
  ------------------
 3698|  41.2k|        if ((res = dav1d_decode_frame(f)) < 0) {
  ------------------
  |  Branch (3698:13): [True: 24.0k, False: 17.2k]
  ------------------
 3699|  24.0k|            dav1d_thread_picture_unref(&c->out);
 3700|   216k|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3700:29): [True: 192k, False: 24.0k]
  ------------------
 3701|   192k|                if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3701:21): [True: 160k, False: 32.0k]
  ------------------
 3702|   160k|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3702:25): [True: 160k, False: 0]
  ------------------
 3703|   160k|                        dav1d_thread_picture_unref(&c->refs[i].p);
 3704|   160k|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 3705|   160k|                    dav1d_ref_dec(&c->refs[i].segmap);
 3706|   160k|                    dav1d_ref_dec(&c->refs[i].refmvs);
 3707|   160k|                }
 3708|   192k|            }
 3709|  24.0k|            goto error;
 3710|  24.0k|        }
 3711|  41.2k|    } else {
 3712|      0|        dav1d_task_frame_init(f);
 3713|      0|        pthread_mutex_unlock(&c->task_thread.lock);
 3714|      0|    }
 3715|       |
 3716|  17.2k|    return 0;
 3717|  27.0k|error:
 3718|  27.0k|    atomic_init(&f->task_thread.error, 1);
 3719|  27.0k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3720|  27.0k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3720:9): [True: 4.85k, False: 22.1k]
  ------------------
 3721|  4.85k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3722|   216k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3722:21): [True: 189k, False: 27.0k]
  ------------------
 3723|   189k|        if (f->refp[i].p.frame_hdr)
  ------------------
  |  Branch (3723:13): [True: 0, False: 189k]
  ------------------
 3724|      0|            dav1d_thread_picture_unref(&f->refp[i]);
 3725|   189k|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3726|   189k|    }
 3727|  27.0k|    if (c->n_fc == 1)
  ------------------
  |  Branch (3727:9): [True: 27.0k, False: 0]
  ------------------
 3728|  27.0k|        dav1d_thread_picture_unref(&c->out);
 3729|      0|    else
 3730|      0|        dav1d_thread_picture_unref(out_delayed);
 3731|  27.0k|    dav1d_picture_unref_internal(&f->cur);
 3732|  27.0k|    dav1d_thread_picture_unref(&f->sr_cur);
 3733|  27.0k|    dav1d_ref_dec(&f->mvs_ref);
 3734|  27.0k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3735|  27.0k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3736|  27.0k|    dav1d_data_props_copy(&c->cached_error_props, &c->in.m);
 3737|       |
 3738|  27.0k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3738:21): [True: 0, False: 27.0k]
  ------------------
 3739|      0|        dav1d_data_unref_internal(&f->tile[i].data);
 3740|  27.0k|    f->n_tile_data = 0;
 3741|       |
 3742|  27.0k|    if (c->n_fc > 1)
  ------------------
  |  Branch (3742:9): [True: 0, False: 27.0k]
  ------------------
 3743|      0|        pthread_mutex_unlock(&c->task_thread.lock);
 3744|       |
 3745|  27.0k|    return res;
 3746|  41.2k|}
decode.c:reset_context:
 2390|   498k|static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
 2391|   498k|    memset(ctx->intra, keyframe, sizeof(ctx->intra));
 2392|   498k|    memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
 2393|   498k|    if (keyframe)
  ------------------
  |  Branch (2393:9): [True: 405k, False: 92.3k]
  ------------------
 2394|   405k|        memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
 2395|       |
 2396|   498k|    if (pass == 2) return;
  ------------------
  |  Branch (2396:9): [True: 0, False: 498k]
  ------------------
 2397|       |
 2398|   498k|    memset(ctx->partition, 0, sizeof(ctx->partition));
 2399|   498k|    memset(ctx->skip, 0, sizeof(ctx->skip));
 2400|   498k|    memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
 2401|   498k|    memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
 2402|   498k|    memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
 2403|   498k|    memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
 2404|   498k|    memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
 2405|   498k|    if (!keyframe) {
  ------------------
  |  Branch (2405:9): [True: 92.3k, False: 405k]
  ------------------
 2406|  92.3k|        memset(ctx->ref, -1, sizeof(ctx->ref));
 2407|  92.3k|        memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
 2408|  92.3k|        memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
 2409|  92.3k|    }
 2410|   498k|    memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
 2411|   498k|    memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
 2412|   498k|    memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
 2413|   498k|    memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
 2414|   498k|    memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
 2415|   498k|}
decode.c:decode_sb:
 2119|  2.79M|{
 2120|  2.79M|    const Dav1dFrameContext *const f = t->f;
 2121|  2.79M|    Dav1dTileState *const ts = t->ts;
 2122|  2.79M|    const int hsz = 16 >> bl;
 2123|  2.79M|    const int have_h_split = f->bw > t->bx + hsz;
 2124|  2.79M|    const int have_v_split = f->bh > t->by + hsz;
 2125|       |
 2126|  2.79M|    if (!have_h_split && !have_v_split) {
  ------------------
  |  Branch (2126:9): [True: 99.6k, False: 2.69M]
  |  Branch (2126:26): [True: 44.3k, False: 55.3k]
  ------------------
 2127|  44.3k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2127:9): [True: 44.3k, False: 0]
  ------------------
 2128|  44.3k|        return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
  ------------------
  |  |   51|  44.3k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
 2129|  44.3k|    }
 2130|       |
 2131|  2.74M|    uint16_t *pc;
 2132|  2.74M|    enum BlockPartition bp;
 2133|  2.74M|    int ctx, bx8, by8;
 2134|  2.74M|    if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (2134:9): [True: 2.74M, False: 0]
  ------------------
 2135|  2.74M|        if (0 && bl == BL_64X64)
  ------------------
  |  Branch (2135:13): [Folded, False: 2.74M]
  |  Branch (2135:18): [True: 0, False: 0]
  ------------------
 2136|      0|            printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
 2137|      0|                   f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng);
 2138|  2.74M|        bx8 = (t->bx & 31) >> 1;
 2139|  2.74M|        by8 = (t->by & 31) >> 1;
 2140|  2.74M|        ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
 2141|  2.74M|        pc = ts->cdf.m.partition[bl][ctx];
 2142|  2.74M|    }
 2143|       |
 2144|  2.74M|    if (have_h_split && have_v_split) {
  ------------------
  |  Branch (2144:9): [True: 2.69M, False: 55.3k]
  |  Branch (2144:25): [True: 2.20M, False: 489k]
  ------------------
 2145|  2.20M|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2145:13): [True: 0, False: 2.20M]
  ------------------
 2146|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2147|      0|            bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
  ------------------
  |  Branch (2147:18): [True: 0, False: 0]
  ------------------
 2148|  2.20M|        } else {
 2149|  2.20M|            bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc,
  ------------------
  |  |   57|  2.20M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 2150|  2.20M|                                                  dav1d_partition_type_count[bl]);
 2151|  2.20M|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
  ------------------
  |  Branch (2151:17): [True: 12.3k, False: 2.18M]
  ------------------
 2152|  12.3k|                (bp == PARTITION_V || bp == PARTITION_V4 ||
  ------------------
  |  Branch (2152:18): [True: 283, False: 12.0k]
  |  Branch (2152:39): [True: 405, False: 11.6k]
  ------------------
 2153|  11.6k|                 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
  ------------------
  |  Branch (2153:18): [True: 262, False: 11.3k]
  |  Branch (2153:50): [True: 244, False: 11.1k]
  ------------------
 2154|  1.19k|            {
 2155|  1.19k|                return 1;
 2156|  1.19k|            }
 2157|  2.20M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.20M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.20M]
  |  |  ------------------
  |  |   35|  2.20M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.20M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2158|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2159|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
 2160|      0|                       ts->msac.rng);
 2161|  2.20M|        }
 2162|  2.20M|        const uint8_t *const b = dav1d_block_sizes[bl][bp];
 2163|       |
 2164|  2.20M|        switch (bp) {
 2165|   856k|        case PARTITION_NONE:
  ------------------
  |  Branch (2165:9): [True: 856k, False: 1.34M]
  ------------------
 2166|   856k|            if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
  ------------------
  |  Branch (2166:17): [True: 367, False: 855k]
  ------------------
 2167|    367|                return -1;
 2168|   855k|            break;
 2169|   855k|        case PARTITION_H:
  ------------------
  |  Branch (2169:9): [True: 253k, False: 1.94M]
  ------------------
 2170|   253k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
  ------------------
  |  Branch (2170:17): [True: 359, False: 252k]
  ------------------
 2171|    359|                return -1;
 2172|   252k|            t->by += hsz;
 2173|   252k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
  ------------------
  |  Branch (2173:17): [True: 195, False: 252k]
  ------------------
 2174|    195|                return -1;
 2175|   252k|            t->by -= hsz;
 2176|   252k|            break;
 2177|   165k|        case PARTITION_V:
  ------------------
  |  Branch (2177:9): [True: 165k, False: 2.03M]
  ------------------
 2178|   165k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
  ------------------
  |  Branch (2178:17): [True: 217, False: 164k]
  ------------------
 2179|    217|                return -1;
 2180|   164k|            t->bx += hsz;
 2181|   164k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
  ------------------
  |  Branch (2181:17): [True: 154, False: 164k]
  ------------------
 2182|    154|                return -1;
 2183|   164k|            t->bx -= hsz;
 2184|   164k|            break;
 2185|   514k|        case PARTITION_SPLIT:
  ------------------
  |  Branch (2185:9): [True: 514k, False: 1.68M]
  ------------------
 2186|   514k|            if (bl == BL_8X8) {
  ------------------
  |  Branch (2186:17): [True: 140k, False: 374k]
  ------------------
 2187|   140k|                const EdgeTip *const tip = (const EdgeTip *) node;
 2188|   140k|                assert(hsz == 1);
  ------------------
  |  Branch (2188:17): [True: 140k, False: 0]
  ------------------
 2189|   140k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2189:21): [True: 273, False: 140k]
  ------------------
 2190|    273|                    return -1;
 2191|   140k|                const enum Filter2d tl_filter = t->tl_4x4_filter;
 2192|   140k|                t->bx++;
 2193|   140k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
  ------------------
  |  Branch (2193:21): [True: 196, False: 139k]
  ------------------
 2194|    196|                    return -1;
 2195|   139k|                t->bx--;
 2196|   139k|                t->by++;
 2197|   139k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
  ------------------
  |  Branch (2197:21): [True: 210, False: 139k]
  ------------------
 2198|    210|                    return -1;
 2199|   139k|                t->bx++;
 2200|   139k|                t->tl_4x4_filter = tl_filter;
 2201|   139k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
  ------------------
  |  Branch (2201:21): [True: 196, False: 139k]
  ------------------
 2202|    196|                    return -1;
 2203|   139k|                t->bx--;
 2204|   139k|                t->by--;
 2205|   139k|#if ARCH_X86_64
 2206|   139k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (2206:21): [True: 0, False: 139k]
  ------------------
 2207|       |                    /* In 8-bit mode with 2-pass decoding the coefficient buffer
 2208|       |                     * can end up misaligned due to skips here. Work around
 2209|       |                     * the issue by explicitly realigning the buffer. */
 2210|      0|                    const int p = t->frame_thread.pass & 1;
 2211|      0|                    ts->frame_thread[p].cf =
 2212|      0|                        (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63);
 2213|      0|                }
 2214|   139k|#endif
 2215|   374k|            } else {
 2216|   374k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
  ------------------
  |  |   51|   374k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2216:21): [True: 1.54k, False: 372k]
  ------------------
 2217|  1.54k|                    return 1;
 2218|   372k|                t->bx += hsz;
 2219|   372k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
  ------------------
  |  |   51|   372k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2219:21): [True: 1.34k, False: 371k]
  ------------------
 2220|  1.34k|                    return 1;
 2221|   371k|                t->bx -= hsz;
 2222|   371k|                t->by += hsz;
 2223|   371k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
  ------------------
  |  |   51|   371k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2223:21): [True: 793, False: 370k]
  ------------------
 2224|    793|                    return 1;
 2225|   370k|                t->bx += hsz;
 2226|   370k|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
  ------------------
  |  |   51|   370k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2226:21): [True: 1.03k, False: 369k]
  ------------------
 2227|  1.03k|                    return 1;
 2228|   369k|                t->bx -= hsz;
 2229|   369k|                t->by -= hsz;
 2230|   369k|            }
 2231|   509k|            break;
 2232|   509k|        case PARTITION_T_TOP_SPLIT: {
  ------------------
  |  Branch (2232:9): [True: 39.5k, False: 2.16M]
  ------------------
 2233|  39.5k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2233:17): [True: 196, False: 39.3k]
  ------------------
 2234|    196|                return -1;
 2235|  39.3k|            t->bx += hsz;
 2236|  39.3k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
  ------------------
  |  Branch (2236:17): [True: 197, False: 39.1k]
  ------------------
 2237|    197|                return -1;
 2238|  39.1k|            t->bx -= hsz;
 2239|  39.1k|            t->by += hsz;
 2240|  39.1k|            if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
  ------------------
  |  Branch (2240:17): [True: 195, False: 38.9k]
  ------------------
 2241|    195|                return -1;
 2242|  38.9k|            t->by -= hsz;
 2243|  38.9k|            break;
 2244|  39.1k|        }
 2245|  41.4k|        case PARTITION_T_BOTTOM_SPLIT: {
  ------------------
  |  Branch (2245:9): [True: 41.4k, False: 2.15M]
  ------------------
 2246|  41.4k|            if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
  ------------------
  |  Branch (2246:17): [True: 263, False: 41.1k]
  ------------------
 2247|    263|                return -1;
 2248|  41.1k|            t->by += hsz;
 2249|  41.1k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
  ------------------
  |  Branch (2249:17): [True: 195, False: 40.9k]
  ------------------
 2250|    195|                return -1;
 2251|  40.9k|            t->bx += hsz;
 2252|  40.9k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
  ------------------
  |  Branch (2252:17): [True: 79, False: 40.9k]
  ------------------
 2253|     79|                return -1;
 2254|  40.9k|            t->bx -= hsz;
 2255|  40.9k|            t->by -= hsz;
 2256|  40.9k|            break;
 2257|  40.9k|        }
 2258|  27.6k|        case PARTITION_T_LEFT_SPLIT: {
  ------------------
  |  Branch (2258:9): [True: 27.6k, False: 2.17M]
  ------------------
 2259|  27.6k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2259:17): [True: 326, False: 27.3k]
  ------------------
 2260|    326|                return -1;
 2261|  27.3k|            t->by += hsz;
 2262|  27.3k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
  ------------------
  |  Branch (2262:17): [True: 202, False: 27.1k]
  ------------------
 2263|    202|                return -1;
 2264|  27.1k|            t->by -= hsz;
 2265|  27.1k|            t->bx += hsz;
 2266|  27.1k|            if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
  ------------------
  |  Branch (2266:17): [True: 195, False: 26.9k]
  ------------------
 2267|    195|                return -1;
 2268|  26.9k|            t->bx -= hsz;
 2269|  26.9k|            break;
 2270|  27.1k|        }
 2271|  55.0k|        case PARTITION_T_RIGHT_SPLIT: {
  ------------------
  |  Branch (2271:9): [True: 55.0k, False: 2.14M]
  ------------------
 2272|  55.0k|            if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
  ------------------
  |  Branch (2272:17): [True: 287, False: 54.7k]
  ------------------
 2273|    287|                return -1;
 2274|  54.7k|            t->bx += hsz;
 2275|  54.7k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
  ------------------
  |  Branch (2275:17): [True: 73, False: 54.6k]
  ------------------
 2276|     73|                return -1;
 2277|  54.6k|            t->by += hsz;
 2278|  54.6k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
  ------------------
  |  Branch (2278:17): [True: 240, False: 54.4k]
  ------------------
 2279|    240|                return -1;
 2280|  54.4k|            t->by -= hsz;
 2281|  54.4k|            t->bx -= hsz;
 2282|  54.4k|            break;
 2283|  54.6k|        }
 2284|   100k|        case PARTITION_H4: {
  ------------------
  |  Branch (2284:9): [True: 100k, False: 2.10M]
  ------------------
 2285|   100k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2286|   100k|            if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
  ------------------
  |  Branch (2286:17): [True: 198, False: 100k]
  ------------------
 2287|    198|                return -1;
 2288|   100k|            t->by += hsz >> 1;
 2289|   100k|            if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
  ------------------
  |  Branch (2289:17): [True: 210, False: 100k]
  ------------------
 2290|    210|                return -1;
 2291|   100k|            t->by += hsz >> 1;
 2292|   100k|            if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
  ------------------
  |  Branch (2292:17): [True: 205, False: 100k]
  ------------------
 2293|    205|                return -1;
 2294|   100k|            t->by += hsz >> 1;
 2295|   100k|            if (t->by < f->bh)
  ------------------
  |  Branch (2295:17): [True: 95.5k, False: 4.76k]
  ------------------
 2296|  95.5k|                if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
  ------------------
  |  Branch (2296:21): [True: 219, False: 95.3k]
  ------------------
 2297|    219|                    return -1;
 2298|   100k|            t->by -= hsz * 3 >> 1;
 2299|   100k|            break;
 2300|   100k|        }
 2301|   147k|        case PARTITION_V4: {
  ------------------
  |  Branch (2301:9): [True: 147k, False: 2.05M]
  ------------------
 2302|   147k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2303|   147k|            if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
  ------------------
  |  Branch (2303:17): [True: 307, False: 147k]
  ------------------
 2304|    307|                return -1;
 2305|   147k|            t->bx += hsz >> 1;
 2306|   147k|            if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
  ------------------
  |  Branch (2306:17): [True: 294, False: 146k]
  ------------------
 2307|    294|                return -1;
 2308|   146k|            t->bx += hsz >> 1;
 2309|   146k|            if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
  ------------------
  |  Branch (2309:17): [True: 250, False: 146k]
  ------------------
 2310|    250|                return -1;
 2311|   146k|            t->bx += hsz >> 1;
 2312|   146k|            if (t->bx < f->bw)
  ------------------
  |  Branch (2312:17): [True: 144k, False: 2.15k]
  ------------------
 2313|   144k|                if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
  ------------------
  |  Branch (2313:21): [True: 194, False: 144k]
  ------------------
 2314|    194|                    return -1;
 2315|   146k|            t->bx -= hsz * 3 >> 1;
 2316|   146k|            break;
 2317|   146k|        }
 2318|      0|        default: assert(0);
  ------------------
  |  Branch (2318:9): [True: 0, False: 2.20M]
  |  Branch (2318:18): [Folded, False: 0]
  ------------------
 2319|  2.20M|        }
 2320|  2.20M|    } else if (have_h_split) {
  ------------------
  |  Branch (2320:16): [True: 489k, False: 55.3k]
  ------------------
 2321|   489k|        unsigned is_split;
 2322|   489k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2322:13): [True: 0, False: 489k]
  ------------------
 2323|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2324|      0|            is_split = b->bl != bl;
 2325|   489k|        } else {
 2326|   489k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|   489k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2327|   489k|                           gather_top_partition_prob(pc, bl));
 2328|   489k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   489k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 489k]
  |  |  ------------------
  |  |   35|   489k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   489k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2329|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2330|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2331|      0|                       is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng);
  ------------------
  |  Branch (2331:24): [True: 0, False: 0]
  ------------------
 2332|   489k|        }
 2333|       |
 2334|   489k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2334:9): [True: 489k, False: 0]
  ------------------
 2335|   489k|        if (is_split) {
  ------------------
  |  Branch (2335:13): [True: 316k, False: 173k]
  ------------------
 2336|   316k|            bp = PARTITION_SPLIT;
 2337|   316k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|   316k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2337:17): [True: 3.91k, False: 312k]
  ------------------
 2338|   312k|            t->bx += hsz;
 2339|   312k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
  ------------------
  |  |   51|   312k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2339:17): [True: 2.55k, False: 309k]
  ------------------
 2340|   309k|            t->bx -= hsz;
 2341|   309k|        } else {
 2342|   173k|            bp = PARTITION_H;
 2343|   173k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
  ------------------
  |  Branch (2343:17): [True: 120, False: 173k]
  ------------------
 2344|   173k|                         PARTITION_H, node->h[0]))
 2345|    120|                return -1;
 2346|   173k|        }
 2347|   489k|    } else {
 2348|  55.3k|        assert(have_v_split);
  ------------------
  |  Branch (2348:9): [True: 55.3k, False: 0]
  ------------------
 2349|  55.3k|        unsigned is_split;
 2350|  55.3k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2350:13): [True: 0, False: 55.3k]
  ------------------
 2351|      0|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2352|      0|            is_split = b->bl != bl;
 2353|  55.3k|        } else {
 2354|  55.3k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|  55.3k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2355|  55.3k|                           gather_left_partition_prob(pc, bl));
 2356|  55.3k|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
  ------------------
  |  Branch (2356:17): [True: 1.49k, False: 53.8k]
  |  Branch (2356:63): [True: 397, False: 1.10k]
  ------------------
 2357|    397|                return 1;
 2358|  54.9k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  54.9k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 54.9k]
  |  |  ------------------
  |  |   35|  54.9k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  54.9k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2359|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2360|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2361|      0|                       is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng);
  ------------------
  |  Branch (2361:24): [True: 0, False: 0]
  ------------------
 2362|  54.9k|        }
 2363|       |
 2364|  55.3k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2364:9): [True: 54.9k, False: 0]
  ------------------
 2365|  54.9k|        if (is_split) {
  ------------------
  |  Branch (2365:13): [True: 31.8k, False: 23.0k]
  ------------------
 2366|  31.8k|            bp = PARTITION_SPLIT;
 2367|  31.8k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|  31.8k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2367:17): [True: 3.81k, False: 28.0k]
  ------------------
 2368|  28.0k|            t->by += hsz;
 2369|  28.0k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
  ------------------
  |  |   51|  28.0k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2369:17): [True: 1.62k, False: 26.4k]
  ------------------
 2370|  26.4k|            t->by -= hsz;
 2371|  26.4k|        } else {
 2372|  23.0k|            bp = PARTITION_V;
 2373|  23.0k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
  ------------------
  |  Branch (2373:17): [True: 349, False: 22.7k]
  ------------------
 2374|  23.0k|                         PARTITION_V, node->v[0]))
 2375|    349|                return -1;
 2376|  23.0k|        }
 2377|  54.9k|    }
 2378|       |
 2379|  2.72M|    if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
  ------------------
  |  Branch (2379:9): [True: 2.72M, False: 0]
  |  Branch (2379:39): [True: 1.87M, False: 845k]
  |  Branch (2379:64): [True: 139k, False: 706k]
  ------------------
 2380|  2.01M|#define set_ctx(rep_macro) \
 2381|  2.01M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
 2382|  2.01M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
 2383|  2.01M|        case_set_upto16(ulog2(hsz));
  ------------------
  |  |   80|  2.01M|    switch (var) { \
  |  |   81|   563k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 2381|   563k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   563k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   563k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   563k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   563k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   563k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 563k, False: 1.45M]
  |  |  ------------------
  |  |   82|   659k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 2381|   659k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   659k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   659k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   659k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   659k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   659k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 659k, False: 1.35M]
  |  |  ------------------
  |  |   83|   363k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 2381|   363k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   363k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   363k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   363k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   363k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   363k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 363k, False: 1.65M]
  |  |  ------------------
  |  |   84|   276k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 2381|   276k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   276k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   276k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   276k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   276k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   276k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 276k, False: 1.73M]
  |  |  ------------------
  |  |   85|   153k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 2381|   153k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   153k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   153k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   153k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   153k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 153k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   153k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   153k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   153k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   153k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   153k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 153k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 153k, False: 1.86M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 2.01M]
  |  |  ------------------
  |  |   87|  2.01M|    }
  ------------------
  |  Branch (2383:9): [Folded, False: 0]
  ------------------
 2384|  2.01M|#undef set_ctx
 2385|  2.01M|    }
 2386|       |
 2387|  2.72M|    return 0;
 2388|  2.72M|}
decode.c:decode_b:
  687|  3.91M|                    const enum EdgeFlags intra_edge_flags) {
  688|  3.91M|    Dav1dTileState *const ts = t->ts;
  689|  3.91M|    const Dav1dFrameContext *const f = t->f;
  690|  3.91M|    Av1Block b_mem, *const b = t->frame_thread.pass ?
  ------------------
  |  Branch (690:32): [True: 0, False: 3.91M]
  ------------------
  691|  3.91M|        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
  692|  3.91M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  693|  3.91M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  694|  3.91M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  695|  3.91M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  696|  3.91M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
  697|  3.91M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  698|  3.91M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
  699|  3.91M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
  700|  3.91M|    const int have_left = t->bx > ts->tiling.col_start;
  701|  3.91M|    const int have_top = t->by > ts->tiling.row_start;
  702|  3.91M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (702:28): [True: 2.46M, False: 1.45M]
  ------------------
  703|  2.46M|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (703:29): [True: 2.31M, False: 148k]
  |  Branch (703:45): [True: 74.3k, False: 74.3k]
  ------------------
  704|  2.38M|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (704:29): [True: 2.28M, False: 100k]
  |  Branch (704:45): [True: 50.4k, False: 50.5k]
  ------------------
  705|       |
  706|  3.91M|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (706:9): [True: 0, False: 3.91M]
  ------------------
  707|      0|        if (b->intra) {
  ------------------
  |  Branch (707:13): [True: 0, False: 0]
  ------------------
  708|      0|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
  709|       |
  710|      0|            const enum IntraPredMode y_mode_nofilt =
  711|      0|                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (711:17): [True: 0, False: 0]
  ------------------
  712|      0|#define set_ctx(rep_macro) \
  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  714|      0|            rep_macro(edge->intra, off, 1)
  715|      0|            BlockContext *edge = t->a;
  716|      0|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (716:40): [True: 0, False: 0]
  ------------------
  717|      0|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|      0|    switch (var) { \
  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  713|      0|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|      0|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   78|      0|    }
  ------------------
  |  Branch (717:17): [Folded, False: 0]
  ------------------
  718|      0|            }
  719|      0|#undef set_ctx
  720|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  721|      0|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  722|      0|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (722:33): [True: 0, False: 0]
  ------------------
  723|      0|                    r[x].ref.ref[0] = 0;
  724|      0|                    r[x].bs = bs;
  725|      0|                }
  726|      0|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  727|      0|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (727:33): [True: 0, False: 0]
  ------------------
  728|      0|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
  729|      0|                    rr[y][t->bx + bw4 - 1].bs = bs;
  730|      0|                }
  731|      0|            }
  732|       |
  733|      0|            if (has_chroma) {
  ------------------
  |  Branch (733:17): [True: 0, False: 0]
  ------------------
  734|      0|                uint8_t uv_mode = b->uv_mode;
  735|      0|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
  736|      0|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
  737|      0|            }
  738|      0|        } else {
  739|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  740|      0|                b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
  ------------------
  |  Branch (740:17): [True: 0, False: 0]
  |  Branch (740:52): [True: 0, False: 0]
  ------------------
  741|      0|            {
  742|      0|                if (b->matrix[0] == INT16_MIN) {
  ------------------
  |  Branch (742:21): [True: 0, False: 0]
  ------------------
  743|      0|                    t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
  744|      0|                } else {
  745|      0|                    t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
  746|      0|                    t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
  747|      0|                    t->warpmv.matrix[3] = b->matrix[1];
  748|      0|                    t->warpmv.matrix[4] = b->matrix[2];
  749|      0|                    t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
  750|      0|                    dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
  751|      0|                                          t->bx, t->by);
  752|      0|                    dav1d_get_shear_params(&t->warpmv);
  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  754|      0|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|      0|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 0]
  |  |  ------------------
  |  |   35|      0|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|      0|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  755|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
  756|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
  757|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  758|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  759|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  760|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  761|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  762|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  763|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  764|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  765|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  766|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  767|      0|                               b->mv2d.y, b->mv2d.x);
  768|      0|#undef signabs
  769|      0|                }
  770|      0|            }
  771|      0|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (771:17): [True: 0, False: 0]
  ------------------
  772|       |
  773|      0|            const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
  774|      0|            BlockContext *edge = t->a;
  775|      0|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (775:40): [True: 0, False: 0]
  ------------------
  776|      0|#define set_ctx(rep_macro) \
  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  779|      0|                rep_macro(edge->intra, off, 0)
  780|      0|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|      0|    switch (var) { \
  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|      0|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|      0|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|      0|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|      0|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|      0|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|      0|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|      0|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|      0|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|      0|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|      0|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|      0|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  777|      0|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|      0|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|      0|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|      0|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|      0|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|      0|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|      0|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 0]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 0]
  |  |  ------------------
  |  |   78|      0|    }
  ------------------
  |  Branch (780:17): [Folded, False: 0]
  ------------------
  781|      0|#undef set_ctx
  782|      0|            }
  783|       |
  784|      0|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  785|      0|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  786|      0|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (786:33): [True: 0, False: 0]
  ------------------
  787|      0|                    r[x].ref.ref[0] = b->ref[0] + 1;
  788|      0|                    r[x].mv.mv[0] = b->mv[0];
  789|      0|                    r[x].bs = bs;
  790|      0|                }
  791|      0|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  792|      0|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (792:33): [True: 0, False: 0]
  ------------------
  793|      0|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
  794|      0|                    rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
  795|      0|                    rr[y][t->bx + bw4 - 1].bs = bs;
  796|      0|                }
  797|      0|            }
  798|       |
  799|      0|            if (has_chroma) {
  ------------------
  |  Branch (799:17): [True: 0, False: 0]
  ------------------
  800|      0|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
  801|      0|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
  802|      0|            }
  803|      0|        }
  804|      0|        return 0;
  805|      0|    }
  806|       |
  807|  3.91M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
  808|       |
  809|  3.91M|    b->bl = bl;
  810|  3.91M|    b->bp = bp;
  811|  3.91M|    b->bs = bs;
  812|       |
  813|  3.91M|    const Dav1dSegmentationData *seg = NULL;
  814|       |
  815|       |    // segment_id (if seg_feature for skip/ref/gmv is enabled)
  816|  3.91M|    int seg_pred = 0;
  817|  3.91M|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (817:9): [True: 1.15M, False: 2.76M]
  ------------------
  818|  1.15M|        if (!f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (818:13): [True: 262k, False: 892k]
  ------------------
  819|   262k|            if (f->prev_segmap) {
  ------------------
  |  Branch (819:17): [True: 163k, False: 99.5k]
  ------------------
  820|   163k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  821|   163k|                                                       f->prev_segmap,
  822|   163k|                                                       f->b4_stride);
  823|   163k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (823:21): [True: 0, False: 163k]
  ------------------
  824|   163k|                b->seg_id = seg_id;
  825|   163k|            } else {
  826|  99.5k|                b->seg_id = 0;
  827|  99.5k|            }
  828|   262k|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  829|   892k|        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
  ------------------
  |  Branch (829:20): [True: 514k, False: 378k]
  ------------------
  830|   514k|            if (f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (830:17): [True: 42.3k, False: 471k]
  ------------------
  831|  42.3k|                (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  42.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (831:17): [True: 21.3k, False: 21.0k]
  ------------------
  832|  42.3k|                                ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  833|  42.3k|                                t->l.seg_pred[by4]])))
  834|  21.3k|            {
  835|       |                // temporal predicted seg_id
  836|  21.3k|                if (f->prev_segmap) {
  ------------------
  |  Branch (836:21): [True: 6.01k, False: 15.3k]
  ------------------
  837|  6.01k|                    unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
  838|  6.01k|                                                           w4, h4,
  839|  6.01k|                                                           f->prev_segmap,
  840|  6.01k|                                                           f->b4_stride);
  841|  6.01k|                    if (seg_id >= 8) return -1;
  ------------------
  |  Branch (841:25): [True: 0, False: 6.01k]
  ------------------
  842|  6.01k|                    b->seg_id = seg_id;
  843|  15.3k|                } else {
  844|  15.3k|                    b->seg_id = 0;
  845|  15.3k|                }
  846|   492k|            } else {
  847|   492k|                int seg_ctx;
  848|   492k|                const unsigned pred_seg_id =
  849|   492k|                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  850|   492k|                                        &seg_ctx, f->cur_segmap, f->b4_stride);
  851|   492k|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   492k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  852|   492k|                                          ts->cdf.m.seg_id[seg_ctx],
  853|   492k|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|   492k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  854|   492k|                const unsigned last_active_seg_id =
  855|   492k|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  856|   492k|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  857|   492k|                                             last_active_seg_id + 1);
  858|   492k|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (858:21): [True: 35.9k, False: 456k]
  ------------------
  859|   492k|                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|   492k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (859:21): [True: 0, False: 492k]
  ------------------
  860|   492k|            }
  861|       |
  862|   514k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   514k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 514k]
  |  |  ------------------
  |  |   35|   514k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   514k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  863|      0|                printf("Post-segid[preskip;%d]: r=%d\n",
  864|      0|                       b->seg_id, ts->msac.rng);
  865|       |
  866|   514k|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  867|   514k|        }
  868|  2.76M|    } else {
  869|  2.76M|        b->seg_id = 0;
  870|  2.76M|    }
  871|       |
  872|       |    // skip_mode
  873|  3.91M|    if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
  ------------------
  |  Branch (873:10): [True: 3.14M, False: 776k]
  |  Branch (873:19): [True: 423k, False: 353k]
  |  Branch (873:37): [True: 166k, False: 256k]
  |  Branch (873:55): [True: 85.7k, False: 81.2k]
  ------------------
  874|  3.22M|        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (874:9): [True: 72.1k, False: 3.15M]
  |  Branch (874:44): [True: 58.3k, False: 13.8k]
  ------------------
  875|  58.3k|    {
  876|  58.3k|        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
  877|  58.3k|        b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  58.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  878|  58.3k|                           ts->cdf.m.skip_mode[smctx]);
  879|  58.3k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  58.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 58.3k]
  |  |  ------------------
  |  |   35|  58.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  58.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  880|      0|            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
  881|  3.86M|    } else {
  882|  3.86M|        b->skip_mode = 0;
  883|  3.86M|    }
  884|       |
  885|       |    // skip
  886|  3.91M|    if (b->skip_mode || (seg && seg->skip)) {
  ------------------
  |  Branch (886:9): [True: 16.3k, False: 3.90M]
  |  Branch (886:26): [True: 776k, False: 3.12M]
  |  Branch (886:33): [True: 572k, False: 204k]
  ------------------
  887|   588k|        b->skip = 1;
  888|  3.33M|    } else {
  889|  3.33M|        const int sctx = t->a->skip[bx4] + t->l.skip[by4];
  890|  3.33M|        b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
  ------------------
  |  |   52|  3.33M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  891|  3.33M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  3.33M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.33M]
  |  |  ------------------
  |  |   35|  3.33M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.33M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  892|      0|            printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
  893|  3.33M|    }
  894|       |
  895|       |    // segment_id
  896|  3.91M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (896:9): [True: 1.15M, False: 2.76M]
  ------------------
  897|  1.15M|        f->frame_hdr->segmentation.update_map &&
  ------------------
  |  Branch (897:9): [True: 892k, False: 262k]
  ------------------
  898|   892k|        !f->frame_hdr->segmentation.seg_data.preskip)
  ------------------
  |  Branch (898:9): [True: 378k, False: 514k]
  ------------------
  899|   378k|    {
  900|   378k|        if (!b->skip && f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (900:13): [True: 153k, False: 225k]
  |  Branch (900:25): [True: 5.93k, False: 147k]
  ------------------
  901|  5.93k|            (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  5.93k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (901:13): [True: 3.13k, False: 2.80k]
  ------------------
  902|  5.93k|                            ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  903|  5.93k|                            t->l.seg_pred[by4]])))
  904|  3.13k|        {
  905|       |            // temporal predicted seg_id
  906|  3.13k|            if (f->prev_segmap) {
  ------------------
  |  Branch (906:17): [True: 1.47k, False: 1.66k]
  ------------------
  907|  1.47k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  908|  1.47k|                                                       f->prev_segmap,
  909|  1.47k|                                                       f->b4_stride);
  910|  1.47k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (910:21): [True: 0, False: 1.47k]
  ------------------
  911|  1.47k|                b->seg_id = seg_id;
  912|  1.66k|            } else {
  913|  1.66k|                b->seg_id = 0;
  914|  1.66k|            }
  915|   375k|        } else {
  916|   375k|            int seg_ctx;
  917|   375k|            const unsigned pred_seg_id =
  918|   375k|                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  919|   375k|                                    &seg_ctx, f->cur_segmap, f->b4_stride);
  920|   375k|            if (b->skip) {
  ------------------
  |  Branch (920:17): [True: 225k, False: 150k]
  ------------------
  921|   225k|                b->seg_id = pred_seg_id;
  922|   225k|            } else {
  923|   150k|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   150k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  924|   150k|                                          ts->cdf.m.seg_id[seg_ctx],
  925|   150k|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|   150k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  926|   150k|                const unsigned last_active_seg_id =
  927|   150k|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  928|   150k|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  929|   150k|                                             last_active_seg_id + 1);
  930|   150k|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (930:21): [True: 3.18k, False: 146k]
  ------------------
  931|   150k|            }
  932|   375k|            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|   375k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (932:17): [True: 6.62k, False: 368k]
  ------------------
  933|   375k|        }
  934|       |
  935|   378k|        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  936|       |
  937|   378k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   378k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 378k]
  |  |  ------------------
  |  |   35|   378k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   378k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  938|      0|            printf("Post-segid[postskip;%d]: r=%d\n",
  939|      0|                   b->seg_id, ts->msac.rng);
  940|   378k|    }
  941|       |
  942|       |    // cdef index
  943|  3.91M|    if (!b->skip) {
  ------------------
  |  Branch (943:9): [True: 2.07M, False: 1.84M]
  ------------------
  944|  2.07M|        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
  ------------------
  |  Branch (944:25): [True: 1.04M, False: 1.02M]
  ------------------
  945|  1.04M|                                           ((t->by & 16) >> 3) : 0;
  946|  2.07M|        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
  ------------------
  |  Branch (946:13): [True: 336k, False: 1.73M]
  ------------------
  947|   336k|            const int v = dav1d_msac_decode_bools(&ts->msac,
  948|   336k|                              f->frame_hdr->cdef.n_bits);
  949|   336k|            t->cur_sb_cdef_idx_ptr[idx] = v;
  950|   336k|            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
  ------------------
  |  Branch (950:17): [True: 35.8k, False: 300k]
  ------------------
  951|   336k|            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
  ------------------
  |  Branch (951:17): [True: 28.4k, False: 308k]
  ------------------
  952|   336k|            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
  ------------------
  |  Branch (952:17): [True: 35.8k, False: 300k]
  |  Branch (952:30): [True: 25.2k, False: 10.6k]
  ------------------
  953|       |
  954|   336k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   336k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 336k]
  |  |  ------------------
  |  |   35|   336k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   336k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  955|      0|                printf("Post-cdef_idx[%d]: r=%d\n",
  956|      0|                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
  957|   336k|        }
  958|  2.07M|    }
  959|       |
  960|       |    // delta-q/lf
  961|  3.91M|    if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) {
  ------------------
  |  Branch (961:9): [True: 567k, False: 3.35M]
  ------------------
  962|   567k|        const int prev_qidx = ts->last_qidx;
  963|   567k|        const int have_delta_q = f->frame_hdr->delta.q.present &&
  ------------------
  |  Branch (963:34): [True: 202k, False: 365k]
  ------------------
  964|   202k|            (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
  ------------------
  |  Branch (964:14): [True: 122k, False: 79.5k]
  |  Branch (964:21): [True: 18.7k, False: 183k]
  |  Branch (964:67): [True: 11.0k, False: 68.5k]
  ------------------
  965|       |
  966|   567k|        uint32_t prev_delta_lf = ts->last_delta_lf.u32;
  967|       |
  968|   567k|        if (have_delta_q) {
  ------------------
  |  Branch (968:13): [True: 133k, False: 433k]
  ------------------
  969|   133k|            int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   133k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  970|   133k|                                                          ts->cdf.m.delta_q, 3);
  971|   133k|            if (delta_q == 3) {
  ------------------
  |  Branch (971:17): [True: 30.6k, False: 103k]
  ------------------
  972|  30.6k|                const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  973|  30.6k|                delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  974|  30.6k|                          1 + (1 << n_bits);
  975|  30.6k|            }
  976|   133k|            if (delta_q) {
  ------------------
  |  Branch (976:17): [True: 42.4k, False: 91.2k]
  ------------------
  977|  42.4k|                if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
  ------------------
  |  |   53|  42.4k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (977:21): [True: 35.0k, False: 7.42k]
  ------------------
  978|  42.4k|                delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
  979|  42.4k|            }
  980|   133k|            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
  981|   133k|            if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   133k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 133k]
  |  |  ------------------
  |  |   35|   133k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   133k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (981:17): [True: 133k, False: 0]
  ------------------
  982|      0|                printf("Post-delta_q[%d->%d]: r=%d\n",
  983|      0|                       delta_q, ts->last_qidx, ts->msac.rng);
  984|       |
  985|   133k|            if (f->frame_hdr->delta.lf.present) {
  ------------------
  |  Branch (985:17): [True: 51.3k, False: 82.4k]
  ------------------
  986|  51.3k|                const int n_lfs = f->frame_hdr->delta.lf.multi ?
  ------------------
  |  Branch (986:35): [True: 35.8k, False: 15.4k]
  ------------------
  987|  35.8k|                    f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
  ------------------
  |  Branch (987:21): [True: 28.3k, False: 7.50k]
  ------------------
  988|       |
  989|   195k|                for (int i = 0; i < n_lfs; i++) {
  ------------------
  |  Branch (989:33): [True: 143k, False: 51.3k]
  ------------------
  990|   143k|                    int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   143k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  991|   143k|                        ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
  992|   143k|                    if (delta_lf == 3) {
  ------------------
  |  Branch (992:25): [True: 16.7k, False: 127k]
  ------------------
  993|  16.7k|                        const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  994|  16.7k|                        delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  995|  16.7k|                                   1 + (1 << n_bits);
  996|  16.7k|                    }
  997|   143k|                    if (delta_lf) {
  ------------------
  |  Branch (997:25): [True: 35.4k, False: 108k]
  ------------------
  998|  35.4k|                        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|  35.4k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (998:29): [True: 26.1k, False: 9.29k]
  ------------------
  999|  26.1k|                            delta_lf = -delta_lf;
 1000|  35.4k|                        delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
 1001|  35.4k|                    }
 1002|   143k|                    ts->last_delta_lf.i8[i] =
 1003|   143k|                        iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
 1004|   143k|                    if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   143k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 143k]
  |  |  ------------------
  |  |   35|   143k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   143k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1004:25): [True: 143k, False: 0]
  ------------------
 1005|      0|                        printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
 1006|      0|                               ts->msac.rng);
 1007|   143k|                }
 1008|  51.3k|            }
 1009|   133k|        }
 1010|   567k|        if (ts->last_qidx == f->frame_hdr->quant.yac) {
  ------------------
  |  Branch (1010:13): [True: 464k, False: 103k]
  ------------------
 1011|       |            // assign frame-wide q values to this sb
 1012|   464k|            ts->dq = f->dq;
 1013|   464k|        } else if (ts->last_qidx != prev_qidx) {
  ------------------
  |  Branch (1013:20): [True: 11.3k, False: 91.7k]
  ------------------
 1014|       |            // find sb-specific quant parameters
 1015|  11.3k|            init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
 1016|  11.3k|            ts->dq = ts->dqmem;
 1017|  11.3k|        }
 1018|   567k|        if (!ts->last_delta_lf.u32) {
  ------------------
  |  Branch (1018:13): [True: 515k, False: 52.3k]
  ------------------
 1019|       |            // assign frame-wide lf values to this sb
 1020|   515k|            ts->lflvl = f->lf.lvl;
 1021|   515k|        } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
  ------------------
  |  Branch (1021:20): [True: 15.3k, False: 37.0k]
  ------------------
 1022|       |            // find sb-specific lf lvl parameters
 1023|  15.3k|            ts->lflvl = ts->lflvlmem;
 1024|  15.3k|            dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
 1025|  15.3k|        }
 1026|   567k|    }
 1027|       |
 1028|  3.91M|    if (b->skip_mode) {
  ------------------
  |  Branch (1028:9): [True: 16.3k, False: 3.90M]
  ------------------
 1029|  16.3k|        b->intra = 0;
 1030|  3.90M|    } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  3.90M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 1.20M, False: 2.70M]
  |  |  ------------------
  ------------------
 1031|  1.20M|        if (seg && (seg->ref >= 0 || seg->globalmv)) {
  ------------------
  |  Branch (1031:13): [True: 410k, False: 791k]
  |  Branch (1031:21): [True: 288k, False: 121k]
  |  Branch (1031:38): [True: 50.5k, False: 71.3k]
  ------------------
 1032|   338k|            b->intra = !seg->ref;
 1033|   862k|        } else {
 1034|   862k|            const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
 1035|   862k|                                           have_top, have_left);
 1036|   862k|            b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   862k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1037|   862k|                            ts->cdf.m.intra[ictx]);
 1038|   862k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   862k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 862k]
  |  |  ------------------
  |  |   35|   862k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   862k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1039|      0|                printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
 1040|   862k|        }
 1041|  2.70M|    } else if (f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (1041:16): [True: 2.02M, False: 673k]
  ------------------
 1042|  2.02M|        b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
  ------------------
  |  |   52|  2.02M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1043|  2.02M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.02M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.02M]
  |  |  ------------------
  |  |   35|  2.02M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.02M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1044|      0|            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
 1045|  2.02M|    } else {
 1046|   673k|        b->intra = 1;
 1047|   673k|    }
 1048|       |
 1049|       |    // intra/inter-specific stuff
 1050|  3.91M|    if (b->intra) {
  ------------------
  |  Branch (1050:9): [True: 2.21M, False: 1.70M]
  ------------------
 1051|  2.21M|        uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
  ------------------
  |  |   36|  2.21M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 119k, False: 2.09M]
  |  |  ------------------
  ------------------
 1052|   119k|            ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
 1053|  2.21M|            ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
 1054|  2.09M|                        [dav1d_intra_mode_context[t->l.mode[by4]]];
 1055|  2.21M|        b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
  ------------------
  |  |   57|  2.21M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1056|  2.21M|                                                     N_INTRA_PRED_MODES - 1);
 1057|  2.21M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.21M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.21M]
  |  |  ------------------
  |  |   35|  2.21M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.21M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1058|      0|            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
 1059|       |
 1060|       |        // angle delta
 1061|  2.21M|        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
  ------------------
  |  Branch (1061:13): [True: 1.76M, False: 455k]
  |  Branch (1061:41): [True: 940k, False: 820k]
  ------------------
 1062|   940k|            b->y_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1062:13): [True: 500k, False: 439k]
  ------------------
 1063|   500k|        {
 1064|   500k|            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
 1065|   500k|            const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|   500k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1066|   500k|            b->y_angle = angle - 3;
 1067|  1.71M|        } else {
 1068|  1.71M|            b->y_angle = 0;
 1069|  1.71M|        }
 1070|       |
 1071|  2.21M|        if (has_chroma) {
  ------------------
  |  Branch (1071:13): [True: 1.49M, False: 719k]
  ------------------
 1072|  1.49M|            const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
  ------------------
  |  Branch (1072:37): [True: 36.1k, False: 1.46M]
  ------------------
 1073|  1.46M|                cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
  ------------------
  |  Branch (1073:17): [True: 16.0k, False: 20.0k]
  |  Branch (1073:30): [True: 9.36k, False: 6.69k]
  ------------------
 1074|  1.49M|            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
 1075|  1.49M|            b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
  ------------------
  |  |   57|  1.49M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1076|  1.49M|                             N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
 1077|  1.49M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.49M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.49M]
  |  |  ------------------
  |  |   35|  1.49M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.49M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1078|      0|                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
 1079|       |
 1080|  1.49M|            b->uv_angle = 0;
 1081|  1.49M|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1081:17): [True: 302k, False: 1.19M]
  ------------------
 1082|   302k|#define SIGN(a) (!!(a) + ((a) > 0))
 1083|   302k|                const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   302k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1084|   302k|                                     ts->cdf.m.cfl_sign, 7) + 1;
 1085|   302k|                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
 1086|   302k|                assert(sign_u == sign / 3);
  ------------------
  |  Branch (1086:17): [True: 302k, False: 0]
  ------------------
 1087|   302k|                if (sign_u) {
  ------------------
  |  Branch (1087:21): [True: 280k, False: 22.3k]
  ------------------
 1088|   280k|                    const int ctx = (sign_u == 2) * 3 + sign_v;
 1089|   280k|                    b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   280k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1090|   280k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1091|   280k|                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
  ------------------
  |  Branch (1091:25): [True: 199k, False: 80.9k]
  ------------------
 1092|   280k|                } else {
 1093|  22.3k|                    b->cfl_alpha[0] = 0;
 1094|  22.3k|                }
 1095|   302k|                if (sign_v) {
  ------------------
  |  Branch (1095:21): [True: 200k, False: 101k]
  ------------------
 1096|   200k|                    const int ctx = (sign_v == 2) * 3 + sign_u;
 1097|   200k|                    b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   200k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1098|   200k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1099|   200k|                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
  ------------------
  |  Branch (1099:25): [True: 86.8k, False: 113k]
  ------------------
 1100|   200k|                } else {
 1101|   101k|                    b->cfl_alpha[1] = 0;
 1102|   101k|                }
 1103|   302k|#undef SIGN
 1104|   302k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   302k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 302k]
  |  |  ------------------
  |  |   35|   302k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   302k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1105|      0|                    printf("Post-uvalphas[%d/%d]: r=%d\n",
 1106|      0|                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
 1107|  1.19M|            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
  ------------------
  |  Branch (1107:24): [True: 1.01M, False: 180k]
  |  Branch (1107:52): [True: 615k, False: 399k]
  ------------------
 1108|   615k|                       b->uv_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1108:24): [True: 303k, False: 312k]
  ------------------
 1109|   303k|            {
 1110|   303k|                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
 1111|   303k|                const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|   303k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1112|   303k|                b->uv_angle = angle - 3;
 1113|   303k|            }
 1114|  1.49M|        }
 1115|       |
 1116|  2.21M|        b->pal_sz[0] = b->pal_sz[1] = 0;
 1117|  2.21M|        if (f->frame_hdr->allow_screen_content_tools &&
  ------------------
  |  Branch (1117:13): [True: 1.62M, False: 594k]
  ------------------
 1118|  1.62M|            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
  ------------------
  |  Branch (1118:13): [True: 1.54M, False: 80.7k]
  |  Branch (1118:37): [True: 1.28M, False: 256k]
  ------------------
 1119|  1.28M|        {
 1120|  1.28M|            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
 1121|  1.28M|            if (b->y_mode == DC_PRED) {
  ------------------
  |  Branch (1121:17): [True: 601k, False: 682k]
  ------------------
 1122|   601k|                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
 1123|   601k|                const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   601k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1124|   601k|                                          ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
 1125|   601k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   601k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 601k]
  |  |  ------------------
  |  |   35|   601k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   601k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1126|      0|                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
 1127|   601k|                if (use_y_pal)
  ------------------
  |  Branch (1127:21): [True: 56.4k, False: 545k]
  ------------------
 1128|  56.4k|                    f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
 1129|   601k|            }
 1130|       |
 1131|  1.28M|            if (has_chroma && b->uv_mode == DC_PRED) {
  ------------------
  |  Branch (1131:17): [True: 978k, False: 305k]
  |  Branch (1131:31): [True: 293k, False: 685k]
  ------------------
 1132|   293k|                const int pal_ctx = b->pal_sz[0] > 0;
 1133|   293k|                const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   293k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1134|   293k|                                           ts->cdf.m.pal_uv[pal_ctx]);
 1135|   293k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   293k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 293k]
  |  |  ------------------
  |  |   35|   293k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   293k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1136|      0|                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
 1137|   293k|                if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
  ------------------
  |  Branch (1137:21): [True: 19.7k, False: 273k]
  ------------------
 1138|  19.7k|                    f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
 1139|   293k|            }
 1140|  1.28M|        }
 1141|       |
 1142|  2.21M|        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
  ------------------
  |  Branch (1142:13): [True: 972k, False: 1.24M]
  |  Branch (1142:37): [True: 916k, False: 56.4k]
  ------------------
 1143|   916k|            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
  ------------------
  |  Branch (1143:13): [True: 702k, False: 214k]
  |  Branch (1143:46): [True: 451k, False: 251k]
  ------------------
 1144|   451k|        {
 1145|   451k|            const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   451k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1146|   451k|                                      ts->cdf.m.use_filter_intra[bs]);
 1147|   451k|            if (is_filter) {
  ------------------
  |  Branch (1147:17): [True: 299k, False: 151k]
  ------------------
 1148|   299k|                b->y_mode = FILTER_PRED;
 1149|   299k|                b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   299k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1150|   299k|                                 ts->cdf.m.filter_intra, 4);
 1151|   299k|            }
 1152|   451k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   451k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 451k]
  |  |  ------------------
  |  |   35|   451k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   451k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1153|      0|                printf("Post-filterintramode[%d/%d]: r=%d\n",
 1154|      0|                       b->y_mode, b->y_angle, ts->msac.rng);
 1155|   451k|        }
 1156|       |
 1157|  2.21M|        if (b->pal_sz[0]) {
  ------------------
  |  Branch (1157:13): [True: 56.4k, False: 2.16M]
  ------------------
 1158|  56.4k|            uint8_t *pal_idx;
 1159|  56.4k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1159:17): [True: 0, False: 56.4k]
  ------------------
 1160|      0|                const int p = t->frame_thread.pass & 1;
 1161|      0|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1161:17): [True: 0, False: 0]
  ------------------
 1162|      0|                pal_idx = ts->frame_thread[p].pal_idx;
 1163|      0|                ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1164|      0|            } else
 1165|  56.4k|                pal_idx = t->scratch.pal_idx_y;
 1166|  56.4k|            read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
 1167|  56.4k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  56.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 56.4k]
  |  |  ------------------
  |  |   35|  56.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  56.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1168|      0|                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
 1169|  56.4k|        }
 1170|       |
 1171|  2.21M|        if (has_chroma && b->pal_sz[1]) {
  ------------------
  |  Branch (1171:13): [True: 1.49M, False: 719k]
  |  Branch (1171:27): [True: 19.7k, False: 1.47M]
  ------------------
 1172|  19.7k|            uint8_t *pal_idx;
 1173|  19.7k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1173:17): [True: 0, False: 19.7k]
  ------------------
 1174|      0|                const int p = t->frame_thread.pass & 1;
 1175|      0|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1175:17): [True: 0, False: 0]
  ------------------
 1176|      0|                pal_idx = ts->frame_thread[p].pal_idx;
 1177|      0|                ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1178|      0|            } else
 1179|  19.7k|                pal_idx = t->scratch.pal_idx_uv;
 1180|  19.7k|            read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
 1181|  19.7k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  19.7k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 19.7k]
  |  |  ------------------
  |  |   35|  19.7k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  19.7k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1182|      0|                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
 1183|  19.7k|        }
 1184|       |
 1185|  2.21M|        const TxfmInfo *t_dim;
 1186|  2.21M|        if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1186:13): [True: 45.7k, False: 2.17M]
  ------------------
 1187|  45.7k|            b->tx = b->uvtx = (int) TX_4X4;
 1188|  45.7k|            t_dim = &dav1d_txfm_dimensions[TX_4X4];
 1189|  2.17M|        } else {
 1190|  2.17M|            b->tx = dav1d_max_txfm_size_for_bs[bs][0];
 1191|  2.17M|            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
 1192|  2.17M|            t_dim = &dav1d_txfm_dimensions[b->tx];
 1193|  2.17M|            if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
  ------------------
  |  Branch (1193:17): [True: 557k, False: 1.61M]
  |  Branch (1193:67): [True: 435k, False: 121k]
  ------------------
 1194|   435k|                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
 1195|   435k|                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
 1196|   435k|                int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
  ------------------
  |  |   47|   435k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1197|   435k|                                imin(t_dim->max, 2));
 1198|       |
 1199|   872k|                while (depth--) {
  ------------------
  |  Branch (1199:24): [True: 436k, False: 435k]
  ------------------
 1200|   436k|                    b->tx = t_dim->sub;
 1201|   436k|                    t_dim = &dav1d_txfm_dimensions[b->tx];
 1202|   436k|                }
 1203|   435k|            }
 1204|  2.17M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.17M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.17M]
  |  |  ------------------
  |  |   35|  2.17M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.17M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1205|      0|                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
 1206|  2.17M|        }
 1207|       |
 1208|       |        // reconstruction
 1209|  2.21M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1209:13): [True: 0, False: 2.21M]
  ------------------
 1210|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1211|  2.21M|        } else {
 1212|  2.21M|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
 1213|  2.21M|        }
 1214|       |
 1215|  2.21M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1215:13): [True: 382k, False: 1.83M]
  ------------------
 1216|  1.83M|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1216:13): [True: 131k, False: 1.70M]
  ------------------
 1217|   513k|        {
 1218|   513k|            dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
 1219|   513k|                                       (const uint8_t (*)[8][2])
 1220|   513k|                                       &ts->lflvl[b->seg_id][0][0][0],
 1221|   513k|                                       t->bx, t->by, f->w4, f->h4, bs,
 1222|   513k|                                       b->tx, b->uvtx, f->cur.p.layout,
 1223|   513k|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1224|   513k|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1224:40): [True: 316k, False: 196k]
  ------------------
 1225|   513k|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1225:40): [True: 316k, False: 196k]
  ------------------
 1226|   513k|        }
 1227|       |        // update contexts
 1228|  2.21M|        const enum IntraPredMode y_mode_nofilt =
 1229|  2.21M|            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (1229:13): [True: 299k, False: 1.91M]
  ------------------
 1230|  2.21M|        BlockContext *edge = t->a;
 1231|  6.64M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1231:36): [True: 4.43M, False: 2.21M]
  ------------------
 1232|  4.43M|            int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
 1233|  4.43M|#define set_ctx(rep_macro) \
 1234|  4.43M|            rep_macro(edge->tx_intra, off, t_lsz); \
 1235|  4.43M|            rep_macro(edge->tx, off, t_lsz); \
 1236|  4.43M|            rep_macro(edge->mode, off, y_mode_nofilt); \
 1237|  4.43M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
 1238|  4.43M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1239|  4.43M|            rep_macro(edge->skip_mode, off, 0); \
 1240|  4.43M|            rep_macro(edge->intra, off, 1); \
 1241|  4.43M|            rep_macro(edge->skip, off, b->skip); \
 1242|       |            /* see aomedia bug 2183 for why we use luma coordinates here */ \
 1243|  4.43M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
 1244|  4.43M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
 1245|  4.43M|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
 1246|  4.43M|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
 1247|  4.43M|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
 1248|  4.43M|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1249|  4.43M|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1250|  4.43M|            }
 1251|  4.43M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  4.43M|    switch (var) { \
  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1234|   929k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   929k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   929k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   929k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   929k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   929k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   929k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   929k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   929k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   929k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   929k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   929k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.85M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (56:43): [True: 529k, False: 400k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   929k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   929k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 50.4k, False: 879k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  50.4k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  50.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  50.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  50.4k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  50.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  50.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  50.4k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  50.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  50.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  50.4k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  50.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  50.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  50.4k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  50.4k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  50.4k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  50.4k|            }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 929k, False: 3.50M]
  |  |  ------------------
  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.22M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.22M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.22M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.22M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.22M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.22M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.22M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.22M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.22M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.22M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.22M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.22M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.45M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (58:45): [True: 895k, False: 332k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.22M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.22M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 91.7k, False: 1.13M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  91.7k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  91.7k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  91.7k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  91.7k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  91.7k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  91.7k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  91.7k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  91.7k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  91.7k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  91.7k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  91.7k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  91.7k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  91.7k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  91.7k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  91.7k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  91.7k|            }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 1.22M, False: 3.20M]
  |  |  ------------------
  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.12M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.12M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.12M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.12M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.12M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.12M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.12M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.12M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.12M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.12M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.12M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.12M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.25M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (60:45): [True: 861k, False: 266k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.12M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.12M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 66.0k, False: 1.06M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  66.0k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  66.0k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  66.0k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  66.0k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  66.0k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  66.0k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  66.0k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  66.0k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  66.0k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  66.0k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  66.0k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  66.0k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  66.0k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  66.0k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  66.0k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  66.0k|            }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 1.12M, False: 3.30M]
  |  |  ------------------
  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1234|   581k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   581k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   581k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   581k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   581k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   581k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   581k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   581k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   581k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   581k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   581k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   581k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.16M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (62:45): [True: 398k, False: 182k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   581k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   581k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 20.1k, False: 561k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  20.1k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  20.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  20.1k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  20.1k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  20.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  20.1k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  20.1k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  20.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  20.1k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  20.1k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  20.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  20.1k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  20.1k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  20.1k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  20.1k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  20.1k|            }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 581k, False: 3.85M]
  |  |  ------------------
  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1234|   388k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   388k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   388k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   388k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   388k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   388k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   388k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   388k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   388k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   388k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   388k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   388k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   388k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   777k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (64:29): [True: 203k, False: 184k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   65|   388k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 388k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   388k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   388k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 5.09k, False: 383k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  5.09k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  5.09k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  5.09k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  5.09k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  5.09k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 5.09k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  5.09k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  5.09k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  5.09k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  5.09k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  5.09k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 5.09k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  5.09k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  5.09k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  5.09k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  5.09k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  5.09k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 5.09k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  5.09k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  5.09k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  5.09k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  5.09k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  5.09k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 5.09k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  5.09k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  5.09k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  5.09k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  5.09k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  5.09k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 5.09k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  5.09k|            }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 388k, False: 4.04M]
  |  |  ------------------
  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1234|   177k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   177k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   177k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   177k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   177k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   177k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   177k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   177k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   177k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   177k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   177k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   177k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   177k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   355k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (67:29): [True: 106k, False: 71.2k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   68|   177k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 177k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   177k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   177k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 5.36k, False: 172k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  5.36k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.36k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.36k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.36k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.36k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.36k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  5.36k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.36k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.36k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.36k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.36k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.36k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  5.36k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.36k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.36k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.36k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.36k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.36k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  5.36k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.36k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.36k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.36k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.36k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.36k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  5.36k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  5.36k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  5.36k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  5.36k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  5.36k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 5.36k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  5.36k|            }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 177k, False: 4.25M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 4.43M]
  |  |  ------------------
  |  |   78|  4.43M|    }
  ------------------
  |  Branch (1251:13): [Folded, False: 0]
  ------------------
 1252|  4.43M|#undef set_ctx
 1253|  4.43M|        }
 1254|  2.21M|        if (b->pal_sz[0])
  ------------------
  |  Branch (1254:13): [True: 56.4k, False: 2.16M]
  ------------------
 1255|  56.4k|            f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
 1256|  2.21M|        if (has_chroma) {
  ------------------
  |  Branch (1256:13): [True: 1.49M, False: 719k]
  ------------------
 1257|  1.49M|            uint8_t uv_mode = b->uv_mode;
 1258|  1.49M|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
 1259|  1.49M|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
 1260|  1.49M|            if (b->pal_sz[1])
  ------------------
  |  Branch (1260:17): [True: 19.7k, False: 1.47M]
  ------------------
 1261|  19.7k|                f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
 1262|  1.49M|        }
 1263|  2.21M|        if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
  ------------------
  |  |   36|  4.43M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 119k, False: 2.09M]
  |  |  ------------------
  ------------------
  |  Branch (1263:49): [True: 1.42M, False: 673k]
  ------------------
 1264|  1.54M|            splat_intraref(f->c, t, bs, bw4, bh4);
 1265|  2.21M|    } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|  1.70M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  1.70M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 604k, False: 1.09M]
  |  |  ------------------
  ------------------
 1266|       |        // intra block copy
 1267|   604k|        refmvs_candidate mvstack[8];
 1268|   604k|        int n_mvs, ctx;
 1269|   604k|        dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1270|   604k|                          (union refmvs_refpair) { .ref = { 0, -1 }},
 1271|   604k|                          bs, intra_edge_flags, t->by, t->bx);
 1272|       |
 1273|   604k|        if (mvstack[0].mv.mv[0].n)
  ------------------
  |  Branch (1273:13): [True: 546k, False: 58.5k]
  ------------------
 1274|   546k|            b->mv[0] = mvstack[0].mv.mv[0];
 1275|  58.5k|        else if (mvstack[1].mv.mv[0].n)
  ------------------
  |  Branch (1275:18): [True: 0, False: 58.5k]
  ------------------
 1276|      0|            b->mv[0] = mvstack[1].mv.mv[0];
 1277|  58.5k|        else {
 1278|  58.5k|            if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
  ------------------
  |  Branch (1278:17): [True: 57.1k, False: 1.34k]
  ------------------
 1279|  57.1k|                b->mv[0].y = 0;
 1280|  57.1k|                b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
 1281|  57.1k|            } else {
 1282|  1.34k|                b->mv[0].y = -(512 << f->seq_hdr->sb128);
 1283|  1.34k|                b->mv[0].x = 0;
 1284|  1.34k|            }
 1285|  58.5k|        }
 1286|       |
 1287|   604k|        const union mv ref = b->mv[0];
 1288|   604k|        read_mv_residual(ts, &b->mv[0], -1);
 1289|       |
 1290|       |        // clip intrabc motion vector to decoded parts of current tile
 1291|   604k|        int border_left = ts->tiling.col_start * 4;
 1292|   604k|        int border_top  = ts->tiling.row_start * 4;
 1293|   604k|        if (has_chroma) {
  ------------------
  |  Branch (1293:13): [True: 284k, False: 320k]
  ------------------
 1294|   284k|            if (bw4 < 2 &&  ss_hor)
  ------------------
  |  Branch (1294:17): [True: 96.0k, False: 188k]
  |  Branch (1294:29): [True: 11.7k, False: 84.2k]
  ------------------
 1295|  11.7k|                border_left += 4;
 1296|   284k|            if (bh4 < 2 &&  ss_ver)
  ------------------
  |  Branch (1296:17): [True: 69.8k, False: 214k]
  |  Branch (1296:29): [True: 4.71k, False: 65.1k]
  ------------------
 1297|  4.71k|                border_top  += 4;
 1298|   284k|        }
 1299|   604k|        int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
 1300|   604k|        int src_top    = t->by * 4 + (b->mv[0].y >> 3);
 1301|   604k|        int src_right  = src_left + bw4 * 4;
 1302|   604k|        int src_bottom = src_top  + bh4 * 4;
 1303|   604k|        const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
 1304|       |
 1305|       |        // check against left or right tile boundary and adjust if necessary
 1306|   604k|        if (src_left < border_left) {
  ------------------
  |  Branch (1306:13): [True: 200k, False: 404k]
  ------------------
 1307|   200k|            src_right += border_left - src_left;
 1308|   200k|            src_left  += border_left - src_left;
 1309|   404k|        } else if (src_right > border_right) {
  ------------------
  |  Branch (1309:20): [True: 189k, False: 214k]
  ------------------
 1310|   189k|            src_left  -= src_right - border_right;
 1311|   189k|            src_right -= src_right - border_right;
 1312|   189k|        }
 1313|       |        // check against top tile boundary and adjust if necessary
 1314|   604k|        if (src_top < border_top) {
  ------------------
  |  Branch (1314:13): [True: 516k, False: 88.2k]
  ------------------
 1315|   516k|            src_bottom += border_top - src_top;
 1316|   516k|            src_top    += border_top - src_top;
 1317|   516k|        }
 1318|       |
 1319|   604k|        const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1320|   604k|        const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1321|   604k|        const int sb_size = 1 << (6 + f->seq_hdr->sb128);
 1322|       |        // check for overlap with current superblock
 1323|   604k|        if (src_bottom > sby && src_right > sbx) {
  ------------------
  |  Branch (1323:13): [True: 587k, False: 17.1k]
  |  Branch (1323:33): [True: 196k, False: 390k]
  ------------------
 1324|   196k|            if (src_top - border_top >= src_bottom - sby) {
  ------------------
  |  Branch (1324:17): [True: 1.03k, False: 195k]
  ------------------
 1325|       |                // if possible move src up into the previous suberblock row
 1326|  1.03k|                src_top    -= src_bottom - sby;
 1327|  1.03k|                src_bottom -= src_bottom - sby;
 1328|   195k|            } else if (src_left - border_left >= src_right - sbx) {
  ------------------
  |  Branch (1328:24): [True: 188k, False: 6.96k]
  ------------------
 1329|       |                // if possible move src left into the previous suberblock
 1330|   188k|                src_left  -= src_right - sbx;
 1331|   188k|                src_right -= src_right - sbx;
 1332|   188k|            }
 1333|   196k|        }
 1334|       |        // move src up if it is below current superblock row
 1335|   604k|        if (src_bottom > sby + sb_size) {
  ------------------
  |  Branch (1335:13): [True: 3.73k, False: 601k]
  ------------------
 1336|  3.73k|            src_top    -= src_bottom - (sby + sb_size);
 1337|  3.73k|            src_bottom -= src_bottom - (sby + sb_size);
 1338|  3.73k|        }
 1339|       |        // error out if mv still overlaps with the current superblock
 1340|   604k|        if (src_bottom > sby && src_right > sbx)
  ------------------
  |  Branch (1340:13): [True: 586k, False: 18.2k]
  |  Branch (1340:33): [True: 6.96k, False: 579k]
  ------------------
 1341|  6.96k|            return -1;
 1342|       |
 1343|   597k|        b->mv[0].x = (src_left - t->bx * 4) * 8;
 1344|   597k|        b->mv[0].y = (src_top  - t->by * 4) * 8;
 1345|       |
 1346|   597k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   597k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 597k]
  |  |  ------------------
  |  |   35|   597k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   597k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1347|      0|            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
 1348|      0|                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
 1349|      0|                   mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
 1350|   597k|        read_vartx_tree(t, b, bs, bx4, by4);
 1351|       |
 1352|       |        // reconstruction
 1353|   597k|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1353:13): [True: 0, False: 597k]
  ------------------
 1354|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1355|      0|            b->filter2d = FILTER_2D_BILINEAR;
 1356|   597k|        } else {
 1357|   597k|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1357:17): [True: 0, False: 597k]
  ------------------
 1358|   597k|        }
 1359|       |
 1360|   597k|        splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
 1361|   597k|        BlockContext *edge = t->a;
 1362|  1.79M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1362:36): [True: 1.19M, False: 597k]
  ------------------
 1363|  1.19M|#define set_ctx(rep_macro) \
 1364|  1.19M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1365|  1.19M|            rep_macro(edge->mode, off, DC_PRED); \
 1366|  1.19M|            rep_macro(edge->pal_sz, off, 0); \
 1367|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1368|  1.19M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1369|  1.19M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1370|  1.19M|            rep_macro(edge->skip_mode, off, 0); \
 1371|  1.19M|            rep_macro(edge->intra, off, 0); \
 1372|  1.19M|            rep_macro(edge->skip, off, b->skip)
 1373|  1.19M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  1.19M|    switch (var) { \
  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1364|   613k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   613k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   613k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   613k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   613k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   613k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   613k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   613k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   613k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   613k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   613k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 613k, False: 582k]
  |  |  ------------------
  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1364|   123k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   123k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   123k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   123k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   123k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   123k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   123k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   123k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   123k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   123k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   123k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 123k, False: 1.07M]
  |  |  ------------------
  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1364|   226k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   226k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   226k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   226k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   226k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   226k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   226k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   226k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   226k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   226k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   226k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 226k, False: 968k]
  |  |  ------------------
  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1364|  80.9k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  80.9k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  80.9k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  80.9k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  80.9k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  80.9k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  80.9k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  80.9k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  80.9k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  80.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  80.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 80.9k, False: 1.11M]
  |  |  ------------------
  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1364|   139k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   139k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   139k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   139k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   139k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   139k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   139k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   139k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   139k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   139k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   139k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   139k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   139k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 139k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 139k, False: 1.05M]
  |  |  ------------------
  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1364|  10.6k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  10.6k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  10.6k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  10.6k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  10.6k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  10.6k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  10.6k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  10.6k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  10.6k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  10.6k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  10.6k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  10.6k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  10.6k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 10.6k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 10.6k, False: 1.18M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 1.19M]
  |  |  ------------------
  |  |   78|  1.19M|    }
  ------------------
  |  Branch (1373:13): [Folded, False: 0]
  ------------------
 1374|  1.19M|#undef set_ctx
 1375|  1.19M|        }
 1376|   597k|        if (has_chroma) {
  ------------------
  |  Branch (1376:13): [True: 279k, False: 318k]
  ------------------
 1377|   279k|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1378|   279k|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1379|   279k|        }
 1380|  1.09M|    } else {
 1381|       |        // inter-specific mode/mv coding
 1382|  1.09M|        int is_comp, has_subpel_filter;
 1383|       |
 1384|  1.09M|        if (b->skip_mode) {
  ------------------
  |  Branch (1384:13): [True: 16.3k, False: 1.08M]
  ------------------
 1385|  16.3k|            is_comp = 1;
 1386|  1.08M|        } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
  ------------------
  |  Branch (1386:21): [True: 685k, False: 396k]
  |  Branch (1386:30): [True: 111k, False: 284k]
  |  Branch (1386:48): [True: 61.2k, False: 50.5k]
  |  Branch (1386:66): [True: 49.2k, False: 11.9k]
  ------------------
 1387|   734k|                   f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (1387:20): [True: 488k, False: 246k]
  |  Branch (1387:58): [True: 338k, False: 149k]
  ------------------
 1388|   338k|        {
 1389|   338k|            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
 1390|   338k|                                         have_top, have_left);
 1391|   338k|            is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   338k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1392|   338k|                          ts->cdf.m.comp[ctx]);
 1393|   338k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   338k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 338k]
  |  |  ------------------
  |  |   35|   338k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   338k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1394|      0|                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
 1395|   743k|        } else {
 1396|   743k|            is_comp = 0;
 1397|   743k|        }
 1398|       |
 1399|  1.09M|        if (b->skip_mode) {
  ------------------
  |  Branch (1399:13): [True: 16.3k, False: 1.08M]
  ------------------
 1400|  16.3k|            b->ref[0] = f->frame_hdr->skip_mode_refs[0];
 1401|  16.3k|            b->ref[1] = f->frame_hdr->skip_mode_refs[1];
 1402|  16.3k|            b->comp_type = COMP_INTER_AVG;
 1403|  16.3k|            b->inter_mode = NEARESTMV_NEARESTMV;
 1404|  16.3k|            b->drl_idx = NEAREST_DRL;
 1405|  16.3k|            has_subpel_filter = 0;
 1406|       |
 1407|  16.3k|            refmvs_candidate mvstack[8];
 1408|  16.3k|            int n_mvs, ctx;
 1409|  16.3k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1410|  16.3k|                              (union refmvs_refpair) { .ref = {
 1411|  16.3k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1412|  16.3k|                              bs, intra_edge_flags, t->by, t->bx);
 1413|       |
 1414|  16.3k|            b->mv[0] = mvstack[0].mv.mv[0];
 1415|  16.3k|            b->mv[1] = mvstack[0].mv.mv[1];
 1416|  16.3k|            fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1417|  16.3k|            fix_mv_precision(f->frame_hdr, &b->mv[1]);
 1418|  16.3k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  16.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 16.3k]
  |  |  ------------------
  |  |   35|  16.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  16.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1419|      0|                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
 1420|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1421|      0|                       b->ref[0], b->ref[1]);
 1422|  1.08M|        } else if (is_comp) {
  ------------------
  |  Branch (1422:20): [True: 176k, False: 905k]
  ------------------
 1423|   176k|            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
 1424|   176k|                                                 have_top, have_left);
 1425|   176k|            if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   176k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1425:17): [True: 149k, False: 26.8k]
  ------------------
 1426|   176k|                    ts->cdf.m.comp_dir[dir_ctx]))
 1427|   149k|            {
 1428|       |                // bidir - first reference (fw)
 1429|   149k|                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
 1430|   149k|                                                     have_top, have_left);
 1431|   149k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   149k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1431:21): [True: 55.8k, False: 93.6k]
  ------------------
 1432|   149k|                        ts->cdf.m.comp_fwd_ref[0][ctx1]))
 1433|  55.8k|                {
 1434|  55.8k|                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
 1435|  55.8k|                                                           have_top, have_left);
 1436|  55.8k|                    b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  55.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1437|  55.8k|                                        ts->cdf.m.comp_fwd_ref[2][ctx2]);
 1438|  93.6k|                } else {
 1439|  93.6k|                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1440|  93.6k|                                                           have_top, have_left);
 1441|  93.6k|                    b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  93.6k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1442|  93.6k|                                    ts->cdf.m.comp_fwd_ref[1][ctx2]);
 1443|  93.6k|                }
 1444|       |
 1445|       |                // second reference (bw)
 1446|   149k|                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
 1447|   149k|                                                     have_top, have_left);
 1448|   149k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   149k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1448:21): [True: 74.2k, False: 75.3k]
  ------------------
 1449|   149k|                        ts->cdf.m.comp_bwd_ref[0][ctx3]))
 1450|  74.2k|                {
 1451|  74.2k|                    b->ref[1] = 6;
 1452|  75.3k|                } else {
 1453|  75.3k|                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1454|  75.3k|                                                           have_top, have_left);
 1455|  75.3k|                    b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  75.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1456|  75.3k|                                        ts->cdf.m.comp_bwd_ref[1][ctx4]);
 1457|  75.3k|                }
 1458|   149k|            } else {
 1459|       |                // unidir
 1460|  26.8k|                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  280|  26.8k|#define av1_get_uni_p_ctx av1_get_ref_ctx
  ------------------
 1461|  26.8k|                                                     have_top, have_left);
 1462|  26.8k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  26.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1462:21): [True: 5.37k, False: 21.4k]
  ------------------
 1463|  26.8k|                        ts->cdf.m.comp_uni_ref[0][uctx_p]))
 1464|  5.37k|                {
 1465|  5.37k|                    b->ref[0] = 4;
 1466|  5.37k|                    b->ref[1] = 6;
 1467|  21.4k|                } else {
 1468|  21.4k|                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
 1469|  21.4k|                                                           have_top, have_left);
 1470|  21.4k|                    b->ref[0] = 0;
 1471|  21.4k|                    b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  21.4k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1472|  21.4k|                                        ts->cdf.m.comp_uni_ref[1][uctx_p1]);
 1473|  21.4k|                    if (b->ref[1] == 2) {
  ------------------
  |  Branch (1473:25): [True: 13.9k, False: 7.54k]
  ------------------
 1474|  13.9k|                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  281|  13.9k|#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1475|  13.9k|                                                               have_top, have_left);
 1476|  13.9k|                        b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  13.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1477|  13.9k|                                         ts->cdf.m.comp_uni_ref[2][uctx_p2]);
 1478|  13.9k|                    }
 1479|  21.4k|                }
 1480|  26.8k|            }
 1481|   176k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   176k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 176k]
  |  |  ------------------
  |  |   35|   176k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   176k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1482|      0|                printf("Post-refs[%d/%d]: r=%d\n",
 1483|      0|                       b->ref[0], b->ref[1], ts->msac.rng);
 1484|       |
 1485|   176k|            refmvs_candidate mvstack[8];
 1486|   176k|            int n_mvs, ctx;
 1487|   176k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1488|   176k|                              (union refmvs_refpair) { .ref = {
 1489|   176k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1490|   176k|                              bs, intra_edge_flags, t->by, t->bx);
 1491|       |
 1492|   176k|            b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   176k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1493|   176k|                                ts->cdf.m.comp_inter_mode[ctx],
 1494|   176k|                                N_COMP_INTER_PRED_MODES - 1);
 1495|   176k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   176k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 176k]
  |  |  ------------------
  |  |   35|   176k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   176k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1496|      0|                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
 1497|      0|                       b->inter_mode, ctx, n_mvs, ts->msac.rng);
 1498|       |
 1499|   176k|            const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
 1500|   176k|            b->drl_idx = NEAREST_DRL;
 1501|   176k|            if (b->inter_mode == NEWMV_NEWMV) {
  ------------------
  |  Branch (1501:17): [True: 37.8k, False: 138k]
  ------------------
 1502|  37.8k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1502:21): [True: 37.8k, False: 0]
  ------------------
 1503|  37.8k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1504|  37.8k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  37.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1505|  37.8k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1506|  37.8k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
  ------------------
  |  Branch (1506:25): [True: 25.6k, False: 12.1k]
  |  Branch (1506:53): [True: 7.86k, False: 17.8k]
  ------------------
 1507|  7.86k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1508|  7.86k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  7.86k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1509|  7.86k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1510|  7.86k|                    }
 1511|  37.8k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  37.8k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 37.8k]
  |  |  ------------------
  |  |   35|  37.8k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  37.8k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1512|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1513|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1514|  37.8k|                }
 1515|   138k|            } else if (im[0] == NEARMV || im[1] == NEARMV) {
  ------------------
  |  Branch (1515:24): [True: 36.8k, False: 101k]
  |  Branch (1515:43): [True: 5.40k, False: 96.3k]
  ------------------
 1516|  42.2k|                b->drl_idx = NEARER_DRL;
 1517|  42.2k|                if (n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1517:21): [True: 6.63k, False: 35.5k]
  ------------------
 1518|  6.63k|                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1519|  6.63k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  6.63k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1520|  6.63k|                                      ts->cdf.m.drl_bit[drl_ctx_v2]);
 1521|  6.63k|                    if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
  ------------------
  |  Branch (1521:25): [True: 3.31k, False: 3.32k]
  |  Branch (1521:51): [True: 1.43k, False: 1.87k]
  ------------------
 1522|  1.43k|                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
 1523|  1.43k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.43k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1524|  1.43k|                                          ts->cdf.m.drl_bit[drl_ctx_v3]);
 1525|  1.43k|                    }
 1526|  6.63k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  6.63k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 6.63k]
  |  |  ------------------
  |  |   35|  6.63k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  6.63k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1527|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1528|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1529|  6.63k|                }
 1530|  42.2k|            }
 1531|   176k|            assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1531:13): [True: 176k, False: 0]
  |  Branch (1531:13): [True: 176k, False: 0]
  ------------------
 1532|       |
 1533|   176k|#define assign_comp_mv(idx) \
 1534|   176k|            switch (im[idx]) { \
 1535|   176k|            case NEARMV: \
 1536|   176k|            case NEARESTMV: \
 1537|   176k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1538|   176k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
 1539|   176k|                break; \
 1540|   176k|            case GLOBALMV: \
 1541|   176k|                has_subpel_filter |= \
 1542|   176k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
 1543|   176k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
 1544|   176k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
 1545|   176k|                break; \
 1546|   176k|            case NEWMV: \
 1547|   176k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1548|   176k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
 1549|   176k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
 1550|   176k|                break; \
 1551|   176k|            }
 1552|   176k|            has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1552:33): [True: 0, False: 176k]
  ------------------
 1553|   176k|                                b->inter_mode != GLOBALMV_GLOBALMV;
  ------------------
  |  Branch (1553:33): [True: 160k, False: 15.5k]
  ------------------
 1554|   176k|            assign_comp_mv(0);
  ------------------
  |  | 1534|   176k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 176k, False: 0]
  |  |  ------------------
  |  | 1535|  36.8k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 36.8k, False: 139k]
  |  |  ------------------
  |  | 1536|   107k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 71.0k, False: 105k]
  |  |  ------------------
  |  | 1537|   107k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   107k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   107k|                break; \
  |  | 1540|  36.8k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 15.5k, False: 160k]
  |  |  ------------------
  |  | 1541|  15.5k|                has_subpel_filter |= \
  |  | 1542|  15.5k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  15.5k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  15.5k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  15.5k|                break; \
  |  | 1546|  52.9k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 52.9k, False: 123k]
  |  |  ------------------
  |  | 1547|  52.9k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  52.9k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  52.9k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  52.9k|                break; \
  |  | 1551|   176k|            }
  ------------------
 1555|   176k|            assign_comp_mv(1);
  ------------------
  |  | 1534|   176k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 176k, False: 0]
  |  |  ------------------
  |  | 1535|  36.8k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 36.8k, False: 139k]
  |  |  ------------------
  |  | 1536|   106k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 69.9k, False: 106k]
  |  |  ------------------
  |  | 1537|   106k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   106k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   106k|                break; \
  |  | 1540|  36.8k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 15.5k, False: 160k]
  |  |  ------------------
  |  | 1541|  15.5k|                has_subpel_filter |= \
  |  | 1542|  15.5k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  15.5k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  15.5k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  15.5k|                break; \
  |  | 1546|  54.0k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 54.0k, False: 122k]
  |  |  ------------------
  |  | 1547|  54.0k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  54.0k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  54.0k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  54.0k|                break; \
  |  | 1551|   176k|            }
  ------------------
 1556|   176k|#undef assign_comp_mv
 1557|   176k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   176k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 176k]
  |  |  ------------------
  |  |   35|   176k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   176k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1558|      0|                printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
 1559|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1560|      0|                       ts->msac.rng);
 1561|       |
 1562|       |            // jnt_comp vs. seg vs. wedge
 1563|   176k|            int is_segwedge = 0;
 1564|   176k|            if (f->seq_hdr->masked_compound) {
  ------------------
  |  Branch (1564:17): [True: 151k, False: 24.4k]
  ------------------
 1565|   151k|                const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
 1566|       |
 1567|   151k|                is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   151k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1568|   151k|                                  ts->cdf.m.mask_comp[mask_ctx]);
 1569|   151k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   151k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 151k]
  |  |  ------------------
  |  |   35|   151k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   151k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1570|      0|                    printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
 1571|      0|                           is_segwedge, mask_ctx, ts->msac.rng);
 1572|   151k|            }
 1573|       |
 1574|   176k|            if (!is_segwedge) {
  ------------------
  |  Branch (1574:17): [True: 126k, False: 50.2k]
  ------------------
 1575|   126k|                if (f->seq_hdr->jnt_comp) {
  ------------------
  |  Branch (1575:21): [True: 89.8k, False: 36.2k]
  ------------------
 1576|  89.8k|                    const int jnt_ctx =
 1577|  89.8k|                        get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
 1578|  89.8k|                                         f->cur.frame_hdr->frame_offset,
 1579|  89.8k|                                         f->refp[b->ref[0]].p.frame_hdr->frame_offset,
 1580|  89.8k|                                         f->refp[b->ref[1]].p.frame_hdr->frame_offset,
 1581|  89.8k|                                         t->a, &t->l, by4, bx4);
 1582|  89.8k|                    b->comp_type = COMP_INTER_WEIGHTED_AVG +
 1583|  89.8k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  89.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1584|  89.8k|                                       ts->cdf.m.jnt_comp[jnt_ctx]);
 1585|  89.8k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  89.8k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 89.8k]
  |  |  ------------------
  |  |   35|  89.8k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  89.8k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1586|      0|                        printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
 1587|      0|                               b->comp_type == COMP_INTER_AVG,
 1588|      0|                               jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
 1589|      0|                               t->l.comp_type[by4], t->l.ref[0][by4],
 1590|      0|                               ts->msac.rng);
 1591|  89.8k|                } else {
 1592|  36.2k|                    b->comp_type = COMP_INTER_AVG;
 1593|  36.2k|                }
 1594|   126k|            } else {
 1595|  50.2k|                if (wedge_allowed_mask & (1 << bs)) {
  ------------------
  |  Branch (1595:21): [True: 40.8k, False: 9.47k]
  ------------------
 1596|  40.8k|                    const int ctx = dav1d_wedge_ctx_lut[bs];
 1597|  40.8k|                    b->comp_type = COMP_INTER_WEDGE -
 1598|  40.8k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  40.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1599|  40.8k|                                       ts->cdf.m.wedge_comp[ctx]);
 1600|  40.8k|                    if (b->comp_type == COMP_INTER_WEDGE)
  ------------------
  |  Branch (1600:25): [True: 15.1k, False: 25.6k]
  ------------------
 1601|  15.1k|                        b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  15.1k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1602|  40.8k|                                           ts->cdf.m.wedge_idx[ctx], 15);
 1603|  40.8k|                } else {
 1604|  9.47k|                    b->comp_type = COMP_INTER_SEG;
 1605|  9.47k|                }
 1606|  50.2k|                b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  50.2k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
 1607|  50.2k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  50.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 50.2k]
  |  |  ------------------
  |  |   35|  50.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  50.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1608|      0|                    printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
 1609|      0|                           b->comp_type == COMP_INTER_WEDGE,
 1610|      0|                           b->wedge_idx, b->mask_sign, ts->msac.rng);
 1611|  50.2k|            }
 1612|   905k|        } else {
 1613|   905k|            b->comp_type = COMP_INTER_NONE;
 1614|       |
 1615|       |            // ref
 1616|   905k|            if (seg && seg->ref > 0) {
  ------------------
  |  Branch (1616:17): [True: 387k, False: 518k]
  |  Branch (1616:24): [True: 284k, False: 102k]
  ------------------
 1617|   284k|                b->ref[0] = seg->ref - 1;
 1618|   620k|            } else if (seg && (seg->globalmv || seg->skip)) {
  ------------------
  |  Branch (1618:24): [True: 102k, False: 518k]
  |  Branch (1618:32): [True: 50.5k, False: 51.7k]
  |  Branch (1618:49): [True: 11.9k, False: 39.8k]
  ------------------
 1619|  62.4k|                b->ref[0] = 0;
 1620|   558k|            } else {
 1621|   558k|                const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
 1622|   558k|                                                 have_top, have_left);
 1623|   558k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   558k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1623:21): [True: 203k, False: 355k]
  ------------------
 1624|   558k|                                                 ts->cdf.m.ref[0][ctx1]))
 1625|   203k|                {
 1626|   203k|                    const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  275|   203k|#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx
  ------------------
 1627|   203k|                                                       have_top, have_left);
 1628|   203k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   203k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1628:25): [True: 132k, False: 70.2k]
  ------------------
 1629|   203k|                                                     ts->cdf.m.ref[1][ctx2]))
 1630|   132k|                    {
 1631|   132k|                        b->ref[0] = 6;
 1632|   132k|                    } else {
 1633|  70.2k|                        const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  279|  70.2k|#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx
  ------------------
 1634|  70.2k|                                                           have_top, have_left);
 1635|  70.2k|                        b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  70.2k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1636|  70.2k|                                            ts->cdf.m.ref[5][ctx3]);
 1637|  70.2k|                    }
 1638|   355k|                } else {
 1639|   355k|                    const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  276|   355k|#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx
  ------------------
 1640|   355k|                                                       have_top, have_left);
 1641|   355k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   355k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1641:25): [True: 70.8k, False: 284k]
  ------------------
 1642|   355k|                                                     ts->cdf.m.ref[2][ctx2]))
 1643|  70.8k|                    {
 1644|  70.8k|                        const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  278|  70.8k|#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1645|  70.8k|                                                           have_top, have_left);
 1646|  70.8k|                        b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  70.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1647|  70.8k|                                            ts->cdf.m.ref[4][ctx3]);
 1648|   284k|                    } else {
 1649|   284k|                        const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  277|   284k|#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx
  ------------------
 1650|   284k|                                                           have_top, have_left);
 1651|   284k|                        b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   284k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1652|   284k|                                        ts->cdf.m.ref[3][ctx3]);
 1653|   284k|                    }
 1654|   355k|                }
 1655|   558k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   558k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 558k]
  |  |  ------------------
  |  |   35|   558k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   558k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1656|      0|                    printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
 1657|   558k|            }
 1658|   905k|            b->ref[1] = -1;
 1659|       |
 1660|   905k|            refmvs_candidate mvstack[8];
 1661|   905k|            int n_mvs, ctx;
 1662|   905k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1663|   905k|                              (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
 1664|   905k|                              bs, intra_edge_flags, t->by, t->bx);
 1665|       |
 1666|       |            // mode parsing and mv derivation from ref_mvs
 1667|   905k|            if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1667:18): [True: 387k, False: 518k]
  |  Branch (1667:26): [True: 321k, False: 65.3k]
  |  Branch (1667:39): [True: 22.4k, False: 42.8k]
  ------------------
 1668|   561k|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   561k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1668:17): [True: 380k, False: 180k]
  ------------------
 1669|   561k|                                             ts->cdf.m.newmv_mode[ctx & 7]))
 1670|   724k|            {
 1671|   724k|                if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1671:22): [True: 372k, False: 352k]
  |  Branch (1671:30): [True: 321k, False: 51.0k]
  |  Branch (1671:43): [True: 22.4k, False: 28.5k]
  ------------------
 1672|   380k|                    !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   380k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1672:21): [True: 19.2k, False: 361k]
  ------------------
 1673|   380k|                         ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
 1674|   363k|                {
 1675|   363k|                    b->inter_mode = GLOBALMV;
 1676|   363k|                    b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
 1677|   363k|                                          t->bx, t->by, bw4, bh4, f->frame_hdr);
 1678|   363k|                    has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1678:41): [True: 102k, False: 260k]
  ------------------
 1679|   260k|                        f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
  ------------------
  |  Branch (1679:25): [True: 85.7k, False: 174k]
  ------------------
 1680|   363k|                } else {
 1681|   361k|                    has_subpel_filter = 1;
 1682|   361k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   361k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1682:25): [True: 166k, False: 195k]
  ------------------
 1683|   361k|                            ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
 1684|   166k|                    { // NEAREST, NEARER, NEAR or NEARISH
 1685|   166k|                        b->inter_mode = NEARMV;
 1686|   166k|                        b->drl_idx = NEARER_DRL;
 1687|   166k|                        if (n_mvs > 2) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1687:29): [True: 66.8k, False: 99.5k]
  ------------------
 1688|  66.8k|                            const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1689|  66.8k|                            b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  66.8k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1690|  66.8k|                                              ts->cdf.m.drl_bit[drl_ctx_v2]);
 1691|  66.8k|                            if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
  ------------------
  |  Branch (1691:33): [True: 34.8k, False: 31.9k]
  |  Branch (1691:59): [True: 21.0k, False: 13.8k]
  ------------------
 1692|  21.0k|                                const int drl_ctx_v3 =
 1693|  21.0k|                                    get_drl_context(mvstack, 2);
 1694|  21.0k|                                b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  21.0k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1695|  21.0k|                                                  ts->cdf.m.drl_bit[drl_ctx_v3]);
 1696|  21.0k|                            }
 1697|  66.8k|                        }
 1698|   195k|                    } else {
 1699|   195k|                        b->inter_mode = NEARESTMV;
 1700|   195k|                        b->drl_idx = NEAREST_DRL;
 1701|   195k|                    }
 1702|   361k|                    assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1702:21): [True: 361k, False: 0]
  |  Branch (1702:21): [True: 361k, False: 0]
  ------------------
 1703|   361k|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1704|   361k|                    if (b->drl_idx < NEAR_DRL)
  ------------------
  |  Branch (1704:25): [True: 326k, False: 34.8k]
  ------------------
 1705|   326k|                        fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1706|   361k|                }
 1707|       |
 1708|   724k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   724k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 724k]
  |  |  ------------------
  |  |   35|   724k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   724k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1709|      0|                    printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
 1710|      0|                           b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
 1711|      0|                           ts->msac.rng);
 1712|   724k|            } else {
 1713|   180k|                has_subpel_filter = 1;
 1714|   180k|                b->inter_mode = NEWMV;
 1715|   180k|                b->drl_idx = NEAREST_DRL;
 1716|   180k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1716:21): [True: 136k, False: 44.0k]
  ------------------
 1717|   136k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1718|   136k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   136k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1719|   136k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1720|   136k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1720:25): [True: 59.8k, False: 76.9k]
  |  Branch (1720:53): [True: 33.7k, False: 26.0k]
  ------------------
 1721|  33.7k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1722|  33.7k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  33.7k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1723|  33.7k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1724|  33.7k|                    }
 1725|   136k|                }
 1726|   180k|                assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1726:17): [True: 180k, False: 0]
  |  Branch (1726:17): [True: 180k, False: 0]
  ------------------
 1727|   180k|                if (n_mvs > 1) {
  ------------------
  |  Branch (1727:21): [True: 136k, False: 44.0k]
  ------------------
 1728|   136k|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1729|   136k|                } else {
 1730|  44.0k|                    assert(!b->drl_idx);
  ------------------
  |  Branch (1730:21): [True: 44.0k, False: 0]
  ------------------
 1731|  44.0k|                    b->mv[0] = mvstack[0].mv.mv[0];
 1732|  44.0k|                    fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1733|  44.0k|                }
 1734|   180k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   180k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 180k]
  |  |  ------------------
  |  |   35|   180k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   180k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1735|      0|                    printf("Post-intermode[%d,drl=%d]: r=%d\n",
 1736|      0|                           b->inter_mode, b->drl_idx, ts->msac.rng);
 1737|   180k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
 1738|   180k|                read_mv_residual(ts, &b->mv[0], mv_prec);
 1739|   180k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   180k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 180k]
  |  |  ------------------
  |  |   35|   180k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   180k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1740|      0|                    printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
 1741|      0|                           b->mv[0].y, b->mv[0].x, ts->msac.rng);
 1742|   180k|            }
 1743|       |
 1744|       |            // interintra flags
 1745|   905k|            const int ii_sz_grp = dav1d_ymode_size_context[bs];
 1746|   905k|            if (f->seq_hdr->inter_intra &&
  ------------------
  |  Branch (1746:17): [True: 727k, False: 178k]
  ------------------
 1747|   727k|                interintra_allowed_mask & (1 << bs) &&
  ------------------
  |  Branch (1747:17): [True: 324k, False: 402k]
  ------------------
 1748|   324k|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   324k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1748:17): [True: 48.9k, False: 275k]
  ------------------
 1749|   324k|                                             ts->cdf.m.interintra[ii_sz_grp]))
 1750|  48.9k|            {
 1751|  48.9k|                b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|  48.9k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1752|  48.9k|                                         ts->cdf.m.interintra_mode[ii_sz_grp],
 1753|  48.9k|                                         N_INTER_INTRA_PRED_MODES - 1);
 1754|  48.9k|                const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
 1755|  48.9k|                b->interintra_type = INTER_INTRA_BLEND +
 1756|  48.9k|                                     dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  48.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1757|  48.9k|                                         ts->cdf.m.interintra_wedge[wedge_ctx]);
 1758|  48.9k|                if (b->interintra_type == INTER_INTRA_WEDGE)
  ------------------
  |  Branch (1758:21): [True: 12.6k, False: 36.2k]
  ------------------
 1759|  12.6k|                    b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  12.6k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1760|  48.9k|                                       ts->cdf.m.wedge_idx[wedge_ctx], 15);
 1761|   856k|            } else {
 1762|   856k|                b->interintra_type = INTER_INTRA_NONE;
 1763|   856k|            }
 1764|   905k|            if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
  ------------------
  |  |   34|   905k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 905k]
  |  |  ------------------
  |  |   35|   905k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   905k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1764:37): [True: 0, False: 0]
  ------------------
 1765|      0|                interintra_allowed_mask & (1 << bs))
  ------------------
  |  Branch (1765:17): [True: 0, False: 0]
  ------------------
 1766|      0|            {
 1767|      0|                printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
 1768|      0|                       b->interintra_type, b->interintra_mode,
 1769|      0|                       b->wedge_idx, ts->msac.rng);
 1770|      0|            }
 1771|       |
 1772|       |            // motion variation
 1773|   905k|            if (f->frame_hdr->switchable_motion_mode &&
  ------------------
  |  Branch (1773:17): [True: 837k, False: 67.8k]
  ------------------
 1774|   837k|                b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
  ------------------
  |  Branch (1774:17): [True: 792k, False: 45.6k]
  |  Branch (1774:59): [True: 493k, False: 298k]
  ------------------
 1775|       |                // is not warped global motion
 1776|   493k|                !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
  ------------------
  |  Branch (1776:19): [True: 370k, False: 122k]
  |  Branch (1776:54): [True: 147k, False: 223k]
  ------------------
 1777|   147k|                  f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
  ------------------
  |  Branch (1777:19): [True: 20.8k, False: 126k]
  ------------------
 1778|       |                // has overlappable neighbours
 1779|   472k|                ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
  ------------------
  |  Branch (1779:19): [True: 426k, False: 46.4k]
  |  Branch (1779:32): [True: 410k, False: 15.9k]
  ------------------
 1780|  62.4k|                 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
  ------------------
  |  Branch (1780:19): [True: 53.6k, False: 8.80k]
  |  Branch (1780:31): [True: 48.9k, False: 4.64k]
  ------------------
 1781|   459k|            {
 1782|       |                // reaching here means the block allows obmc - check warp by
 1783|       |                // finding matching-ref blocks in top/left edges
 1784|   459k|                uint64_t mask[2] = { 0, 0 };
 1785|   459k|                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
 1786|   459k|                                  have_left, have_top, b->ref[0], mask);
 1787|   459k|                const int allow_warp = !f->svc[b->ref[0]][0].scale &&
  ------------------
  |  Branch (1787:40): [True: 319k, False: 139k]
  ------------------
 1788|   319k|                    !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (1788:21): [True: 310k, False: 8.68k]
  ------------------
 1789|   310k|                    f->frame_hdr->warp_motion && (mask[0] | mask[1]);
  ------------------
  |  Branch (1789:21): [True: 245k, False: 64.7k]
  |  Branch (1789:50): [True: 229k, False: 16.2k]
  ------------------
 1790|       |
 1791|   459k|                b->motion_mode = allow_warp ?
  ------------------
  |  Branch (1791:34): [True: 229k, False: 229k]
  ------------------
 1792|   229k|                    dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   229k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1793|   229k|                        ts->cdf.m.motion_mode[bs], 2) :
 1794|   459k|                    dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
  ------------------
  |  |   52|   229k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1795|   459k|                if (b->motion_mode == MM_WARP) {
  ------------------
  |  Branch (1795:21): [True: 78.1k, False: 381k]
  ------------------
 1796|  78.1k|                    has_subpel_filter = 0;
 1797|  78.1k|                    derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
 1798|  78.1k|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
 1799|  78.1k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  78.1k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 78.1k]
  |  |  ------------------
  |  |   35|  78.1k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  78.1k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1800|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
 1801|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
 1802|      0|                               "mv=y:%d,x:%d\n",
 1803|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1804|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1805|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1806|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1807|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1808|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1809|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1810|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1811|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1812|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1813|      0|                               b->mv[0].y, b->mv[0].x);
 1814|  78.1k|#undef signabs
 1815|  78.1k|                    if (t->frame_thread.pass) {
  ------------------
  |  Branch (1815:25): [True: 0, False: 78.1k]
  ------------------
 1816|      0|                        if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1816:29): [True: 0, False: 0]
  ------------------
 1817|      0|                            b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
 1818|      0|                            b->matrix[1] = t->warpmv.matrix[3];
 1819|      0|                            b->matrix[2] = t->warpmv.matrix[4];
 1820|      0|                            b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
 1821|      0|                        } else {
 1822|      0|                            b->matrix[0] = INT16_MIN;
 1823|      0|                        }
 1824|      0|                    }
 1825|  78.1k|                }
 1826|       |
 1827|   459k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   459k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 459k]
  |  |  ------------------
  |  |   35|   459k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   459k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1828|      0|                    printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%"
 1829|      0|                           PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0],
 1830|      0|                            mask[1]);
 1831|   459k|            } else {
 1832|   446k|                b->motion_mode = MM_TRANSLATION;
 1833|   446k|            }
 1834|   905k|        }
 1835|       |
 1836|       |        // subpel filter
 1837|  1.09M|        enum Dav1dFilterMode filter[2];
 1838|  1.09M|        if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
  ------------------
  |  Branch (1838:13): [True: 430k, False: 667k]
  ------------------
 1839|   430k|            if (has_subpel_filter) {
  ------------------
  |  Branch (1839:17): [True: 280k, False: 149k]
  ------------------
 1840|   280k|                const int comp = b->comp_type != COMP_INTER_NONE;
 1841|   280k|                const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
 1842|   280k|                                                by4, bx4);
 1843|   280k|                filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   280k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1844|   280k|                               ts->cdf.m.filter[0][ctx1],
 1845|   280k|                               DAV1D_N_SWITCHABLE_FILTERS - 1);
 1846|   280k|                if (f->seq_hdr->dual_filter) {
  ------------------
  |  Branch (1846:21): [True: 202k, False: 77.9k]
  ------------------
 1847|   202k|                    const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
 1848|   202k|                                                    b->ref[0], by4, bx4);
 1849|   202k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   202k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 202k]
  |  |  ------------------
  |  |   35|   202k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   202k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1850|      0|                        printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
 1851|      0|                               filter[0], ctx1, ts->msac.rng);
 1852|   202k|                    filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   202k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1853|   202k|                                    ts->cdf.m.filter[1][ctx2],
 1854|   202k|                                    DAV1D_N_SWITCHABLE_FILTERS - 1);
 1855|   202k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   202k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 202k]
  |  |  ------------------
  |  |   35|   202k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   202k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1856|      0|                        printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
 1857|      0|                               filter[1], ctx2, ts->msac.rng);
 1858|   202k|                } else {
 1859|  77.9k|                    filter[1] = filter[0];
 1860|  77.9k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  77.9k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 77.9k]
  |  |  ------------------
  |  |   35|  77.9k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  77.9k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1861|      0|                        printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
 1862|      0|                               filter[0], ctx1, ts->msac.rng);
 1863|  77.9k|                }
 1864|   280k|            } else {
 1865|   149k|                filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
 1866|   149k|            }
 1867|   667k|        } else {
 1868|   667k|            filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
 1869|   667k|        }
 1870|  1.09M|        b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
 1871|       |
 1872|  1.09M|        read_vartx_tree(t, b, bs, bx4, by4);
 1873|       |
 1874|       |        // reconstruction
 1875|  1.09M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1875:13): [True: 0, False: 1.09M]
  ------------------
 1876|      0|            f->bd_fn.read_coef_blocks(t, bs, b);
 1877|  1.09M|        } else {
 1878|  1.09M|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1878:17): [True: 0, False: 1.09M]
  ------------------
 1879|  1.09M|        }
 1880|       |
 1881|  1.09M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1881:13): [True: 642k, False: 456k]
  ------------------
 1882|   456k|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1882:13): [True: 105k, False: 350k]
  ------------------
 1883|   747k|        {
 1884|   747k|            const int is_globalmv =
 1885|   747k|                b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
  ------------------
  |  Branch (1885:35): [True: 88.3k, False: 659k]
  ------------------
 1886|   747k|            const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
 1887|   747k|                &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
 1888|   747k|            const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1889|   747k|            enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx;
 1890|   747k|            if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1890:17): [True: 4.52k, False: 743k]
  ------------------
 1891|  4.52k|                ytx  = (enum RectTxfmSize) TX_4X4;
 1892|  4.52k|                uvtx = (enum RectTxfmSize) TX_4X4;
 1893|  4.52k|            }
 1894|   747k|            dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
 1895|   747k|                                       t->bx, t->by, f->w4, f->h4, b->skip, bs,
 1896|   747k|                                       ytx, tx_split, uvtx, f->cur.p.layout,
 1897|   747k|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1898|   747k|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1898:40): [True: 261k, False: 485k]
  ------------------
 1899|   747k|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1899:40): [True: 261k, False: 485k]
  ------------------
 1900|   747k|        }
 1901|       |
 1902|       |        // context updates
 1903|  1.09M|        if (is_comp)
  ------------------
  |  Branch (1903:13): [True: 192k, False: 905k]
  ------------------
 1904|   192k|            splat_tworef_mv(f->c, t, bs, b, bw4, bh4);
 1905|   905k|        else
 1906|   905k|            splat_oneref_mv(f->c, t, bs, b, bw4, bh4);
 1907|  1.09M|        BlockContext *edge = t->a;
 1908|  3.29M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1908:36): [True: 2.19M, False: 1.09M]
  ------------------
 1909|  2.19M|#define set_ctx(rep_macro) \
 1910|  2.19M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1911|  2.19M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
 1912|  2.19M|            rep_macro(edge->intra, off, 0); \
 1913|  2.19M|            rep_macro(edge->skip, off, b->skip); \
 1914|  2.19M|            rep_macro(edge->pal_sz, off, 0); \
 1915|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1916|  2.19M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1917|  2.19M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1918|  2.19M|            rep_macro(edge->comp_type, off, b->comp_type); \
 1919|  2.19M|            rep_macro(edge->filter[0], off, filter[0]); \
 1920|  2.19M|            rep_macro(edge->filter[1], off, filter[1]); \
 1921|  2.19M|            rep_macro(edge->mode, off, b->inter_mode); \
 1922|  2.19M|            rep_macro(edge->ref[0], off, b->ref[0]); \
 1923|  2.19M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
 1924|  2.19M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  2.19M|    switch (var) { \
  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1910|   409k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   409k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   409k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   409k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   409k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   409k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   409k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   409k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   409k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   409k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   409k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   409k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   409k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   409k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   409k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   409k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 409k, False: 1.78M]
  |  |  ------------------
  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1910|   707k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   707k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   707k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   707k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   707k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   707k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   707k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   707k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   707k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   707k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   707k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   707k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   707k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   707k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   707k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   707k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 707k, False: 1.48M]
  |  |  ------------------
  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1910|   567k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   567k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   567k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   567k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   567k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   567k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   567k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   567k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   567k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   567k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   567k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   567k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   567k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   567k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   567k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   567k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 567k, False: 1.62M]
  |  |  ------------------
  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1910|   227k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   227k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   227k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   227k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   227k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   227k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   227k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   227k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   227k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   227k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   227k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   227k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   227k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   227k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   227k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   227k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 227k, False: 1.96M]
  |  |  ------------------
  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1910|   210k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   210k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   210k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   210k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   210k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   210k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   210k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   210k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   210k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   210k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   210k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   210k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   210k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   210k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   210k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   210k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   210k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   210k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 210k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 210k, False: 1.98M]
  |  |  ------------------
  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1910|  74.8k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|  74.8k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|  74.8k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|  74.8k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|  74.8k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|  74.8k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|  74.8k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|  74.8k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|  74.8k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|  74.8k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|  74.8k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|  74.8k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|  74.8k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|  74.8k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  74.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  74.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  74.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  74.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 74.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 74.8k, False: 2.12M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 2.19M]
  |  |  ------------------
  |  |   78|  2.19M|    }
  ------------------
  |  Branch (1924:13): [Folded, False: 0]
  ------------------
 1925|  2.19M|#undef set_ctx
 1926|  2.19M|        }
 1927|  1.09M|        if (has_chroma) {
  ------------------
  |  Branch (1927:13): [True: 557k, False: 541k]
  ------------------
 1928|   557k|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1929|   557k|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1930|   557k|        }
 1931|  1.09M|    }
 1932|       |
 1933|       |    // update contexts
 1934|  3.91M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (1934:9): [True: 1.15M, False: 2.75M]
  ------------------
 1935|  1.15M|        f->frame_hdr->segmentation.update_map)
  ------------------
  |  Branch (1935:9): [True: 892k, False: 262k]
  ------------------
 1936|   892k|    {
 1937|   892k|        uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
 1938|   892k|#define set_ctx(rep_macro) \
 1939|   892k|        for (int y = 0; y < bh4; y++) { \
 1940|   892k|            rep_macro(seg_ptr, 0, b->seg_id); \
 1941|   892k|            seg_ptr += f->b4_stride; \
 1942|   892k|        }
 1943|   892k|        case_set(b_dim[2]);
  ------------------
  |  |   70|   892k|    switch (var) { \
  |  |   71|   254k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1939|   623k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 369k, False: 254k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   369k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   369k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   369k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   369k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   369k|        }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 254k, False: 638k]
  |  |  ------------------
  |  |   72|   171k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1939|   677k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 505k, False: 171k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   505k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   505k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   505k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   505k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   505k|        }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 171k, False: 721k]
  |  |  ------------------
  |  |   73|   190k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1939|   845k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 654k, False: 190k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   654k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   654k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   654k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   654k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   654k|        }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 190k, False: 702k]
  |  |  ------------------
  |  |   74|  83.0k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1939|   542k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 459k, False: 83.0k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   459k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   459k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   459k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   459k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   459k|        }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 83.0k, False: 809k]
  |  |  ------------------
  |  |   75|   118k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1939|  1.94M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 1.82M, False: 118k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  1.82M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.82M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.82M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.82M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.82M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.82M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  1.82M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  1.82M|        }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 118k, False: 774k]
  |  |  ------------------
  |  |   76|  75.0k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1939|  2.44M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 2.37M, False: 75.0k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  2.37M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  2.37M|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  2.37M|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  2.37M|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  2.37M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 2.37M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  2.37M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  2.37M|        }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 75.0k, False: 817k]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 892k]
  |  |  ------------------
  |  |   78|   892k|    }
  ------------------
  |  Branch (1943:9): [Folded, False: 0]
  ------------------
 1944|   892k|#undef set_ctx
 1945|   892k|    }
 1946|  3.91M|    if (!b->skip) {
  ------------------
  |  Branch (1946:9): [True: 2.06M, False: 1.84M]
  ------------------
 1947|  2.06M|        uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
 1948|  2.06M|        const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
 1949|  2.06M|        const int bx_idx = (bx4 & 16) >> 4;
 1950|  6.45M|        for (int y = 0; y < bh4; y += 2, noskip_mask++) {
  ------------------
  |  Branch (1950:25): [True: 4.39M, False: 2.06M]
  ------------------
 1951|  4.39M|            (*noskip_mask)[bx_idx] |= mask;
 1952|  4.39M|            if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
  ------------------
  |  Branch (1952:17): [True: 488k, False: 3.90M]
  ------------------
 1953|   488k|                (*noskip_mask)[1] |= mask;
 1954|  4.39M|        }
 1955|  2.06M|    }
 1956|       |
 1957|  3.91M|    if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|      0|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1957:9): [True: 0, False: 3.91M]
  |  Branch (1957:38): [True: 0, False: 0]
  ------------------
 1958|      0|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 1959|      0|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 1960|       |
 1961|       |        // keep track of motion vectors for each reference
 1962|      0|        if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1962:13): [True: 0, False: 0]
  ------------------
 1963|       |            // y
 1964|      0|            if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1964:17): [True: 0, False: 0]
  ------------------
 1965|      0|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1965:19): [True: 0, False: 0]
  |  Branch (1965:48): [True: 0, False: 0]
  ------------------
 1966|      0|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1966:19): [True: 0, False: 0]
  |  Branch (1966:48): [True: 0, False: 0]
  ------------------
 1967|      0|            {
 1968|      0|                affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim,
 1969|      0|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1969:39): [True: 0, False: 0]
  ------------------
 1970|      0|                                      &f->frame_hdr->gmv[b->ref[0]]);
 1971|      0|            } else {
 1972|      0|                mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y,
 1973|      0|                             0, &f->svc[b->ref[0]][1]);
 1974|      0|                if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1974:21): [True: 0, False: 0]
  ------------------
 1975|      0|                    obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4);
 1976|      0|                }
 1977|      0|            }
 1978|       |
 1979|       |            // uv
 1980|      0|            if (has_chroma) {
  ------------------
  |  Branch (1980:17): [True: 0, False: 0]
  ------------------
 1981|       |                // sub8x8 derivation
 1982|      0|                int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1982:33): [True: 0, False: 0]
  |  Branch (1982:50): [True: 0, False: 0]
  ------------------
 1983|      0|                refmvs_block *const *r;
 1984|      0|                if (is_sub8x8) {
  ------------------
  |  Branch (1984:21): [True: 0, False: 0]
  ------------------
 1985|      0|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1985:21): [True: 0, False: 0]
  ------------------
 1986|      0|                    r = &t->rt.r[(t->by & 31) + 5];
 1987|      0|                    if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1987:25): [True: 0, False: 0]
  ------------------
 1988|      0|                    if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1988:25): [True: 0, False: 0]
  ------------------
 1989|      0|                    if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1989:25): [True: 0, False: 0]
  |  Branch (1989:37): [True: 0, False: 0]
  ------------------
 1990|      0|                        is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1991|      0|                }
 1992|       |
 1993|       |                // chroma prediction
 1994|      0|                if (is_sub8x8) {
  ------------------
  |  Branch (1994:21): [True: 0, False: 0]
  ------------------
 1995|      0|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1995:21): [True: 0, False: 0]
  ------------------
 1996|      0|                    if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1996:25): [True: 0, False: 0]
  |  Branch (1996:37): [True: 0, False: 0]
  ------------------
 1997|      0|                        const refmvs_block *const rr = &r[-1][t->bx - 1];
 1998|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 1999|      0|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2000|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2001|      0|                    }
 2002|      0|                    if (bw4 == 1) {
  ------------------
  |  Branch (2002:25): [True: 0, False: 0]
  ------------------
 2003|      0|                        const refmvs_block *const rr = &r[0][t->bx - 1];
 2004|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2005|      0|                                     t->by, bh4, rr->mv.mv[0].y, ss_ver,
 2006|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2007|      0|                    }
 2008|      0|                    if (bh4 == ss_ver) {
  ------------------
  |  Branch (2008:25): [True: 0, False: 0]
  ------------------
 2009|      0|                        const refmvs_block *const rr = &r[-1][t->bx];
 2010|      0|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2011|      0|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2012|      0|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2013|      0|                    }
 2014|      0|                    mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4,
 2015|      0|                                 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2016|      0|                } else {
 2017|      0|                    if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (2017:25): [True: 0, False: 0]
  ------------------
 2018|      0|                        ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (2018:27): [True: 0, False: 0]
  |  Branch (2018:56): [True: 0, False: 0]
  ------------------
 2019|      0|                         (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (2019:27): [True: 0, False: 0]
  |  Branch (2019:56): [True: 0, False: 0]
  ------------------
 2020|      0|                    {
 2021|      0|                        affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim,
 2022|      0|                                                b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (2022:49): [True: 0, False: 0]
  ------------------
 2023|      0|                                                &f->frame_hdr->gmv[b->ref[0]]);
 2024|      0|                    } else {
 2025|      0|                        mc_lowest_px(&lowest_px[b->ref[0]][1],
 2026|      0|                                     t->by & ~ss_ver, bh4 << (bh4 == ss_ver),
 2027|      0|                                     b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2028|      0|                        if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (2028:29): [True: 0, False: 0]
  ------------------
 2029|      0|                            obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4);
 2030|      0|                        }
 2031|      0|                    }
 2032|      0|                }
 2033|      0|            }
 2034|      0|        } else {
 2035|       |            // y
 2036|      0|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2036:29): [True: 0, False: 0]
  ------------------
 2037|      0|                if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (2037:21): [True: 0, False: 0]
  |  Branch (2037:59): [True: 0, False: 0]
  ------------------
 2038|      0|                    affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim,
 2039|      0|                                          &f->frame_hdr->gmv[b->ref[i]]);
 2040|      0|                } else {
 2041|      0|                    mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4,
 2042|      0|                                 b->mv[i].y, 0, &f->svc[b->ref[i]][1]);
 2043|      0|                }
 2044|      0|            }
 2045|       |
 2046|       |            // uv
 2047|      0|            if (has_chroma) for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2047:17): [True: 0, False: 0]
  |  Branch (2047:45): [True: 0, False: 0]
  ------------------
 2048|      0|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (2048:21): [True: 0, False: 0]
  ------------------
 2049|      0|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (2049:21): [True: 0, False: 0]
  |  Branch (2049:45): [True: 0, False: 0]
  ------------------
 2050|      0|                {
 2051|      0|                    affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim,
 2052|      0|                                            &f->frame_hdr->gmv[b->ref[i]]);
 2053|      0|                } else {
 2054|      0|                    mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4,
 2055|      0|                                 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]);
 2056|      0|                }
 2057|      0|            }
 2058|      0|        }
 2059|      0|    }
 2060|       |
 2061|  3.91M|    return 0;
 2062|  3.91M|}
decode.c:get_prev_frame_segid:
  499|   170k|{
  500|   170k|    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (500:5): [True: 170k, False: 0]
  ------------------
  501|       |
  502|   170k|    unsigned seg_id = 8;
  503|   170k|    ref_seg_map += by * stride + bx;
  504|   187k|    do {
  505|  1.34M|        for (int x = 0; x < w4; x++)
  ------------------
  |  Branch (505:25): [True: 1.15M, False: 187k]
  ------------------
  506|  1.15M|            seg_id = imin(seg_id, ref_seg_map[x]);
  507|   187k|        ref_seg_map += stride;
  508|   187k|    } while (--h4 > 0 && seg_id);
  ------------------
  |  Branch (508:14): [True: 148k, False: 39.2k]
  |  Branch (508:26): [True: 17.3k, False: 131k]
  ------------------
  509|   170k|    assert(seg_id < 8);
  ------------------
  |  Branch (509:5): [True: 170k, False: 0]
  ------------------
  510|       |
  511|   170k|    return seg_id;
  512|   170k|}
decode.c:neg_deinterleave:
  169|   642k|static int neg_deinterleave(int diff, int ref, int max) {
  170|   642k|    if (!ref) return diff;
  ------------------
  |  Branch (170:9): [True: 368k, False: 274k]
  ------------------
  171|   274k|    if (ref >= (max - 1)) return max - diff - 1;
  ------------------
  |  Branch (171:9): [True: 59.4k, False: 215k]
  ------------------
  172|   215k|    if (2 * ref < max) {
  ------------------
  |  Branch (172:9): [True: 133k, False: 82.2k]
  ------------------
  173|   133k|        if (diff <= 2 * ref) {
  ------------------
  |  Branch (173:13): [True: 108k, False: 24.6k]
  ------------------
  174|   108k|            if (diff & 1)
  ------------------
  |  Branch (174:17): [True: 11.7k, False: 96.8k]
  ------------------
  175|  11.7k|                return ref + ((diff + 1) >> 1);
  176|  96.8k|            else
  177|  96.8k|                return ref - (diff >> 1);
  178|   108k|        }
  179|  24.6k|        return diff;
  180|   133k|    } else {
  181|  82.2k|        if (diff <= 2 * (max - ref - 1)) {
  ------------------
  |  Branch (181:13): [True: 69.2k, False: 13.0k]
  ------------------
  182|  69.2k|            if (diff & 1)
  ------------------
  |  Branch (182:17): [True: 9.30k, False: 59.9k]
  ------------------
  183|  9.30k|                return ref + ((diff + 1) >> 1);
  184|  59.9k|            else
  185|  59.9k|                return ref - (diff >> 1);
  186|  69.2k|        }
  187|  13.0k|        return max - (diff + 1);
  188|  82.2k|    }
  189|   215k|}
decode.c:read_pal_indices:
  419|  76.1k|{
  420|  76.1k|    Dav1dTileState *const ts = t->ts;
  421|  76.1k|    const ptrdiff_t stride = bw4 * 4;
  422|  76.1k|    assert(pal_idx);
  ------------------
  |  Branch (422:5): [True: 76.1k, False: 0]
  ------------------
  423|  76.1k|    uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
  424|  76.1k|    pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
  425|  76.1k|    uint16_t (*const color_map_cdf)[8] =
  426|  76.1k|        ts->cdf.m.color_map[pl][pal_sz - 2];
  427|  76.1k|    uint8_t (*const order)[8] = t->scratch.pal_order;
  428|  76.1k|    uint8_t *const ctx = t->scratch.pal_ctx;
  429|  2.10M|    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
  ------------------
  |  Branch (429:21): [True: 2.02M, False: 76.1k]
  ------------------
  430|       |        // top/left-to-bottom/right diagonals ("wave-front")
  431|  2.02M|        const int first = imin(i, w4 * 4 - 1);
  432|  2.02M|        const int last = imax(0, i - h4 * 4 + 1);
  433|  2.02M|        order_palette(pal_tmp, stride, i, first, last, order, ctx);
  434|  17.9M|        for (int j = first, m = 0; j >= last; j--, m++) {
  ------------------
  |  Branch (434:36): [True: 15.9M, False: 2.02M]
  ------------------
  435|  15.9M|            const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  15.9M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  436|  15.9M|                                      color_map_cdf[ctx[m]], pal_sz - 1);
  437|  15.9M|            pal_tmp[(i - j) * stride + j] = order[m][color_idx];
  438|  15.9M|        }
  439|  2.02M|    }
  440|       |
  441|  76.1k|    t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
  442|  76.1k|                                 w4 * 4, h4 * 4);
  443|  76.1k|}
decode.c:order_palette:
  356|  2.02M|{
  357|  2.02M|    int have_top = i > first;
  358|       |
  359|  2.02M|    assert(pal_idx);
  ------------------
  |  Branch (359:5): [True: 2.02M, False: 0]
  ------------------
  360|  2.02M|    pal_idx += first + (i - first) * stride;
  361|  17.9M|    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
  ------------------
  |  Branch (361:32): [True: 15.9M, False: 2.02M]
  ------------------
  362|  15.9M|        const int have_left = j > 0;
  363|       |
  364|  15.9M|        assert(have_left || have_top);
  ------------------
  |  Branch (364:9): [True: 15.1M, False: 786k]
  |  Branch (364:9): [True: 786k, False: 0]
  ------------------
  365|       |
  366|  15.9M|#define add(v_in) do { \
  367|  15.9M|        const int v = v_in; \
  368|  15.9M|        assert((unsigned)v < 8U); \
  369|  15.9M|        order[n][o_idx++] = v; \
  370|  15.9M|        mask |= 1 << v; \
  371|  15.9M|    } while (0)
  372|       |
  373|  15.9M|        unsigned mask = 0;
  374|  15.9M|        int o_idx = 0;
  375|  15.9M|        if (!have_left) {
  ------------------
  |  Branch (375:13): [True: 786k, False: 15.1M]
  ------------------
  376|   786k|            ctx[n] = 0;
  377|   786k|            add(pal_idx[-stride]);
  ------------------
  |  |  366|   786k|#define add(v_in) do { \
  |  |  367|   786k|        const int v = v_in; \
  |  |  368|   786k|        assert((unsigned)v < 8U); \
  |  |  369|   786k|        order[n][o_idx++] = v; \
  |  |  370|   786k|        mask |= 1 << v; \
  |  |  371|   786k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 786k]
  |  |  ------------------
  ------------------
  |  Branch (377:13): [True: 786k, False: 0]
  ------------------
  378|  15.1M|        } else if (!have_top) {
  ------------------
  |  Branch (378:20): [True: 1.23M, False: 13.8M]
  ------------------
  379|  1.23M|            ctx[n] = 0;
  380|  1.23M|            add(pal_idx[-1]);
  ------------------
  |  |  366|  1.23M|#define add(v_in) do { \
  |  |  367|  1.23M|        const int v = v_in; \
  |  |  368|  1.23M|        assert((unsigned)v < 8U); \
  |  |  369|  1.23M|        order[n][o_idx++] = v; \
  |  |  370|  1.23M|        mask |= 1 << v; \
  |  |  371|  1.23M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.23M]
  |  |  ------------------
  ------------------
  |  Branch (380:13): [True: 1.23M, False: 0]
  ------------------
  381|  13.8M|        } else {
  382|  13.8M|            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
  383|  13.8M|            const int same_t_l = t == l;
  384|  13.8M|            const int same_t_tl = t == tl;
  385|  13.8M|            const int same_l_tl = l == tl;
  386|  13.8M|            const int same_all = same_t_l & same_t_tl & same_l_tl;
  387|       |
  388|  13.8M|            if (same_all) {
  ------------------
  |  Branch (388:17): [True: 7.58M, False: 6.30M]
  ------------------
  389|  7.58M|                ctx[n] = 4;
  390|  7.58M|                add(t);
  ------------------
  |  |  366|  7.58M|#define add(v_in) do { \
  |  |  367|  7.58M|        const int v = v_in; \
  |  |  368|  7.58M|        assert((unsigned)v < 8U); \
  |  |  369|  7.58M|        order[n][o_idx++] = v; \
  |  |  370|  7.58M|        mask |= 1 << v; \
  |  |  371|  7.58M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 7.58M]
  |  |  ------------------
  ------------------
  |  Branch (390:17): [True: 7.58M, False: 0]
  ------------------
  391|  7.58M|            } else if (same_t_l) {
  ------------------
  |  Branch (391:24): [True: 460k, False: 5.84M]
  ------------------
  392|   460k|                ctx[n] = 3;
  393|   460k|                add(t);
  ------------------
  |  |  366|   460k|#define add(v_in) do { \
  |  |  367|   460k|        const int v = v_in; \
  |  |  368|   460k|        assert((unsigned)v < 8U); \
  |  |  369|   460k|        order[n][o_idx++] = v; \
  |  |  370|   460k|        mask |= 1 << v; \
  |  |  371|   460k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 460k]
  |  |  ------------------
  ------------------
  |  Branch (393:17): [True: 460k, False: 0]
  ------------------
  394|   460k|                add(tl);
  ------------------
  |  |  366|   460k|#define add(v_in) do { \
  |  |  367|   460k|        const int v = v_in; \
  |  |  368|   460k|        assert((unsigned)v < 8U); \
  |  |  369|   460k|        order[n][o_idx++] = v; \
  |  |  370|   460k|        mask |= 1 << v; \
  |  |  371|   460k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 460k]
  |  |  ------------------
  ------------------
  |  Branch (394:17): [True: 460k, False: 0]
  ------------------
  395|  5.84M|            } else if (same_t_tl | same_l_tl) {
  ------------------
  |  Branch (395:24): [True: 4.70M, False: 1.14M]
  ------------------
  396|  4.70M|                ctx[n] = 2;
  397|  4.70M|                add(tl);
  ------------------
  |  |  366|  4.70M|#define add(v_in) do { \
  |  |  367|  4.70M|        const int v = v_in; \
  |  |  368|  4.70M|        assert((unsigned)v < 8U); \
  |  |  369|  4.70M|        order[n][o_idx++] = v; \
  |  |  370|  4.70M|        mask |= 1 << v; \
  |  |  371|  4.70M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 4.70M]
  |  |  ------------------
  ------------------
  |  Branch (397:17): [True: 4.70M, False: 0]
  ------------------
  398|  4.70M|                add(same_t_tl ? l : t);
  ------------------
  |  |  366|  4.70M|#define add(v_in) do { \
  |  |  367|  9.40M|        const int v = v_in; \
  |  |  ------------------
  |  |  |  Branch (367:23): [True: 2.38M, False: 2.31M]
  |  |  ------------------
  |  |  368|  4.70M|        assert((unsigned)v < 8U); \
  |  |  369|  4.70M|        order[n][o_idx++] = v; \
  |  |  370|  4.70M|        mask |= 1 << v; \
  |  |  371|  4.70M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 4.70M]
  |  |  ------------------
  ------------------
  |  Branch (398:17): [True: 4.70M, False: 0]
  ------------------
  399|  4.70M|            } else {
  400|  1.14M|                ctx[n] = 1;
  401|  1.14M|                add(imin(t, l));
  ------------------
  |  |  366|  1.14M|#define add(v_in) do { \
  |  |  367|  1.14M|        const int v = v_in; \
  |  |  368|  1.14M|        assert((unsigned)v < 8U); \
  |  |  369|  1.14M|        order[n][o_idx++] = v; \
  |  |  370|  1.14M|        mask |= 1 << v; \
  |  |  371|  1.14M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.14M]
  |  |  ------------------
  ------------------
  |  Branch (401:17): [True: 1.14M, False: 0]
  ------------------
  402|  1.14M|                add(imax(t, l));
  ------------------
  |  |  366|  1.14M|#define add(v_in) do { \
  |  |  367|  1.14M|        const int v = v_in; \
  |  |  368|  1.14M|        assert((unsigned)v < 8U); \
  |  |  369|  1.14M|        order[n][o_idx++] = v; \
  |  |  370|  1.14M|        mask |= 1 << v; \
  |  |  371|  1.14M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.14M]
  |  |  ------------------
  ------------------
  |  Branch (402:17): [True: 1.14M, False: 0]
  ------------------
  403|  1.14M|                add(tl);
  ------------------
  |  |  366|  1.14M|#define add(v_in) do { \
  |  |  367|  1.14M|        const int v = v_in; \
  |  |  368|  1.14M|        assert((unsigned)v < 8U); \
  |  |  369|  1.14M|        order[n][o_idx++] = v; \
  |  |  370|  1.14M|        mask |= 1 << v; \
  |  |  371|  1.14M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.14M]
  |  |  ------------------
  ------------------
  |  Branch (403:17): [True: 1.14M, False: 0]
  ------------------
  404|  1.14M|            }
  405|  13.8M|        }
  406|   143M|        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
  ------------------
  |  Branch (406:39): [True: 127M, False: 15.9M]
  ------------------
  407|   127M|            if (!(mask & m))
  ------------------
  |  Branch (407:17): [True: 103M, False: 23.3M]
  ------------------
  408|   103M|                order[n][o_idx++] = bit;
  409|       |        assert(o_idx == 8);
  ------------------
  |  Branch (409:9): [True: 15.9M, False: 0]
  ------------------
  410|  15.9M|#undef add
  411|  15.9M|    }
  412|  2.02M|}
decode.c:splat_intraref:
  566|  1.54M|{
  567|  1.54M|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  568|  1.54M|        .ref.ref = { 0, -1 },
  569|  1.54M|        .mv.mv[0].n = INVALID_MV,
  ------------------
  |  |   40|  1.54M|#define INVALID_MV 0x80008000
  ------------------
  570|  1.54M|        .bs = bs,
  571|  1.54M|        .mf = 0,
  572|  1.54M|    };
  573|  1.54M|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  574|  1.54M|}
decode.c:read_mv_residual:
  109|   892k|{
  110|   892k|    MsacContext *const msac = &ts->msac;
  111|   892k|    const enum MVJoint mv_joint =
  112|   892k|        dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
  ------------------
  |  |   47|   892k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  113|   892k|    if (mv_joint & MV_JOINT_V)
  ------------------
  |  Branch (113:9): [True: 768k, False: 123k]
  ------------------
  114|   768k|        ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
  115|   892k|    if (mv_joint & MV_JOINT_H)
  ------------------
  |  Branch (115:9): [True: 759k, False: 133k]
  ------------------
  116|   759k|        ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
  117|   892k|}
decode.c:read_mv_component_diff:
   79|  1.52M|{
   80|  1.52M|    const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
  ------------------
  |  |   52|  1.52M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   81|  1.52M|    const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
  ------------------
  |  |   57|  1.52M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
   82|  1.52M|    int up, fp = 3, hp = 1;
   83|       |
   84|  1.52M|    if (!cl) {
  ------------------
  |  Branch (84:9): [True: 369k, False: 1.15M]
  ------------------
   85|   369k|        up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
  ------------------
  |  |   52|   369k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   86|   369k|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (86:13): [True: 213k, False: 156k]
  ------------------
   87|   213k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
  ------------------
  |  |   47|   213k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   88|   213k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (88:17): [True: 144k, False: 69.5k]
  ------------------
   89|   144k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
  ------------------
  |  |   52|   144k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   90|   213k|        }
   91|  1.15M|    } else {
   92|  1.15M|        up = 1 << cl;
   93|  12.2M|        for (int n = 0; n < cl; n++)
  ------------------
  |  Branch (93:25): [True: 11.1M, False: 1.15M]
  ------------------
   94|  11.1M|            up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
  ------------------
  |  |   52|  11.1M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   95|  1.15M|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (95:13): [True: 76.8k, False: 1.08M]
  ------------------
   96|  76.8k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
  ------------------
  |  |   47|  76.8k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   97|  76.8k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (97:17): [True: 57.3k, False: 19.4k]
  ------------------
   98|  57.3k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
  ------------------
  |  |   52|  57.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   99|  76.8k|        }
  100|  1.15M|    }
  101|       |
  102|  1.52M|    const int diff = ((up << 3) | (fp << 1) | hp) + 1;
  103|       |
  104|  1.52M|    return sign ? -diff : diff;
  ------------------
  |  Branch (104:12): [True: 1.29M, False: 233k]
  ------------------
  105|  1.52M|}
decode.c:read_vartx_tree:
  448|  1.69M|{
  449|  1.69M|    const Dav1dFrameContext *const f = t->f;
  450|  1.69M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  451|  1.69M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  452|       |
  453|       |    // var-tx tree coding
  454|  1.69M|    uint16_t tx_split[2] = { 0 };
  455|  1.69M|    b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
  456|  1.69M|    if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
  ------------------
  |  Branch (456:9): [True: 560k, False: 1.13M]
  |  Branch (456:22): [True: 20.7k, False: 539k]
  ------------------
  457|   539k|                     b->max_ytx == TX_4X4))
  ------------------
  |  Branch (457:22): [True: 27.5k, False: 512k]
  ------------------
  458|  48.3k|    {
  459|  48.3k|        b->max_ytx = b->uvtx = TX_4X4;
  460|  48.3k|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (460:13): [True: 11.7k, False: 36.6k]
  ------------------
  461|  11.7k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
  462|  11.7k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
  463|  11.7k|        }
  464|  1.64M|    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
  ------------------
  |  Branch (464:16): [True: 1.29M, False: 353k]
  |  Branch (464:66): [True: 182k, False: 171k]
  ------------------
  465|  1.47M|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (465:13): [True: 182k, False: 1.29M]
  ------------------
  466|   182k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
  467|   182k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
  468|   182k|        }
  469|  1.47M|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  470|  1.47M|    } else {
  471|   171k|        assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
  ------------------
  |  Branch (471:9): [True: 167k, False: 3.23k]
  |  Branch (471:9): [True: 605, False: 2.62k]
  |  Branch (471:9): [True: 2.62k, False: 0]
  ------------------
  472|   171k|        int y, x, y_off, x_off;
  473|   171k|        const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
  474|   345k|        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
  ------------------
  |  Branch (474:32): [True: 174k, False: 171k]
  ------------------
  475|   354k|            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
  ------------------
  |  Branch (475:36): [True: 179k, False: 174k]
  ------------------
  476|   179k|                read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
  477|       |                // contexts are updated inside read_tx_tree()
  478|   179k|                t->bx += ytx->w;
  479|   179k|            }
  480|   174k|            t->bx -= x;
  481|   174k|            t->by += ytx->h;
  482|   174k|        }
  483|   171k|        t->by -= y;
  484|   171k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   171k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 171k]
  |  |  ------------------
  |  |   35|   171k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   171k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  485|      0|            printf("Post-vartxtree[%x/%x]: r=%d\n",
  486|      0|                   tx_split[0], tx_split[1], t->ts->msac.rng);
  487|   171k|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  488|   171k|    }
  489|  1.69M|    assert(!(tx_split[0] & ~0x33));
  ------------------
  |  Branch (489:5): [True: 1.69M, False: 0]
  ------------------
  490|  1.69M|    b->tx_split0 = (uint8_t)tx_split[0];
  491|  1.69M|    b->tx_split1 = tx_split[1];
  492|  1.69M|}
decode.c:read_tx_tree:
  123|   366k|{
  124|   366k|    const Dav1dFrameContext *const f = t->f;
  125|   366k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  126|   366k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
  127|   366k|    const int txw = t_dim->lw, txh = t_dim->lh;
  128|   366k|    int is_split;
  129|       |
  130|   366k|    if (depth < 2 && from > (int) TX_4X4) {
  ------------------
  |  Branch (130:9): [True: 302k, False: 64.3k]
  |  Branch (130:22): [True: 302k, False: 0]
  ------------------
  131|   302k|        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
  132|   302k|        const int a = t->a->tx[bx4] < txw;
  133|   302k|        const int l = t->l.tx[by4] < txh;
  134|       |
  135|   302k|        is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
  ------------------
  |  |   52|   302k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  136|   302k|                       t->ts->cdf.m.txpart[cat][a + l]);
  137|   302k|        if (is_split)
  ------------------
  |  Branch (137:13): [True: 85.9k, False: 216k]
  ------------------
  138|  85.9k|            masks[depth] |= 1 << (y_off * 4 + x_off);
  139|   302k|    } else {
  140|  64.3k|        is_split = 0;
  141|  64.3k|    }
  142|       |
  143|   366k|    if (is_split && t_dim->max > TX_8X8) {
  ------------------
  |  Branch (143:9): [True: 85.9k, False: 280k]
  |  Branch (143:21): [True: 65.8k, False: 20.1k]
  ------------------
  144|  65.8k|        const enum RectTxfmSize sub = t_dim->sub;
  145|  65.8k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  146|  65.8k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  147|       |
  148|  65.8k|        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
  149|  65.8k|        t->bx += txsw;
  150|  65.8k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (150:13): [True: 50.5k, False: 15.2k]
  |  Branch (150:27): [True: 49.9k, False: 624]
  ------------------
  151|  49.9k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
  152|  65.8k|        t->bx -= txsw;
  153|  65.8k|        t->by += txsh;
  154|  65.8k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (154:13): [True: 44.5k, False: 21.3k]
  |  Branch (154:27): [True: 43.3k, False: 1.19k]
  ------------------
  155|  43.3k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
  156|  43.3k|            t->bx += txsw;
  157|  43.3k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (157:17): [True: 28.0k, False: 15.2k]
  |  Branch (157:31): [True: 27.4k, False: 603]
  ------------------
  158|  27.4k|                read_tx_tree(t, sub, depth + 1, masks,
  159|  27.4k|                             x_off * 2 + 1, y_off * 2 + 1);
  160|  43.3k|            t->bx -= txsw;
  161|  43.3k|        }
  162|  65.8k|        t->by -= txsh;
  163|   300k|    } else {
  164|   300k|        dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
  ------------------
  |  Branch (164:54): [True: 20.1k, False: 280k]
  ------------------
  165|   300k|        dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
  ------------------
  |  Branch (165:53): [True: 20.1k, False: 280k]
  ------------------
  166|   300k|    }
  167|   366k|}
decode.c:splat_intrabc_mv:
  535|   597k|{
  536|   597k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  537|   597k|        .ref.ref = { 0, -1 },
  538|   597k|        .mv.mv[0] = b->mv[0],
  539|   597k|        .bs = bs,
  540|   597k|        .mf = 0,
  541|   597k|    };
  542|   597k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  543|   597k|}
decode.c:findoddzero:
  339|   479k|static inline int findoddzero(const uint8_t *buf, int len) {
  340|   526k|    for (int n = 0; n < len; n++)
  ------------------
  |  Branch (340:21): [True: 505k, False: 20.5k]
  ------------------
  341|   505k|        if (!buf[n * 2]) return 1;
  ------------------
  |  Branch (341:13): [True: 459k, False: 46.3k]
  ------------------
  342|  20.5k|    return 0;
  343|   479k|}
decode.c:find_matching_ref:
  197|   459k|{
  198|   459k|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  199|   459k|    int count = 0;
  200|   459k|    int have_topleft = have_top && have_left;
  ------------------
  |  Branch (200:24): [True: 419k, False: 39.9k]
  |  Branch (200:36): [True: 380k, False: 38.4k]
  ------------------
  201|   459k|    int have_topright = imax(bw4, bh4) < 32 &&
  ------------------
  |  Branch (201:25): [True: 428k, False: 30.5k]
  ------------------
  202|   428k|                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
  ------------------
  |  Branch (202:25): [True: 394k, False: 34.0k]
  |  Branch (202:37): [True: 361k, False: 33.0k]
  ------------------
  203|   361k|                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  ------------------
  |  Branch (203:25): [True: 223k, False: 138k]
  ------------------
  204|       |
  205|   459k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  206|   459k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  207|       |
  208|   459k|    if (have_top) {
  ------------------
  |  Branch (208:9): [True: 419k, False: 39.9k]
  ------------------
  209|   419k|        const refmvs_block *r2 = &r[-1][t->bx];
  210|   419k|        if (matches(r2)) {
  ------------------
  |  |  206|   419k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 355k, False: 63.7k]
  |  |  |  Branch (206:53): [True: 327k, False: 28.0k]
  |  |  ------------------
  ------------------
  211|   327k|            masks[0] |= 1;
  212|   327k|            count = 1;
  213|   327k|        }
  214|   419k|        int aw4 = bs(r2)[0];
  ------------------
  |  |  205|   419k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  215|   419k|        if (aw4 >= bw4) {
  ------------------
  |  Branch (215:13): [True: 363k, False: 55.9k]
  ------------------
  216|   363k|            const int off = t->bx & (aw4 - 1);
  217|   363k|            if (off) have_topleft = 0;
  ------------------
  |  Branch (217:17): [True: 63.9k, False: 299k]
  ------------------
  218|   363k|            if (aw4 - off > bw4) have_topright = 0;
  ------------------
  |  Branch (218:17): [True: 65.5k, False: 297k]
  ------------------
  219|   363k|        } else {
  220|  55.9k|            unsigned mask = 1 << aw4;
  221|   139k|            for (int x = aw4; x < w4; x += aw4) {
  ------------------
  |  Branch (221:31): [True: 84.1k, False: 55.4k]
  ------------------
  222|  84.1k|                r2 += aw4;
  223|  84.1k|                if (matches(r2)) {
  ------------------
  |  |  206|  84.1k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 65.4k, False: 18.6k]
  |  |  |  Branch (206:53): [True: 60.7k, False: 4.66k]
  |  |  ------------------
  ------------------
  224|  60.7k|                    masks[0] |= mask;
  225|  60.7k|                    if (++count >= 8) return;
  ------------------
  |  Branch (225:25): [True: 482, False: 60.2k]
  ------------------
  226|  60.7k|                }
  227|  83.6k|                aw4 = bs(r2)[0];
  ------------------
  |  |  205|  83.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  228|  83.6k|                mask <<= aw4;
  229|  83.6k|            }
  230|  55.9k|        }
  231|   419k|    }
  232|   458k|    if (have_left) {
  ------------------
  |  Branch (232:9): [True: 420k, False: 38.4k]
  ------------------
  233|   420k|        /*const*/ refmvs_block *const *r2 = r;
  234|   420k|        if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|   420k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 353k, False: 66.5k]
  |  |  |  Branch (206:53): [True: 324k, False: 29.0k]
  |  |  ------------------
  ------------------
  235|   324k|            masks[1] |= 1;
  236|   324k|            if (++count >= 8) return;
  ------------------
  |  Branch (236:17): [True: 285, False: 324k]
  ------------------
  237|   324k|        }
  238|   420k|        int lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|   420k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  239|   420k|        if (lh4 >= bh4) {
  ------------------
  |  Branch (239:13): [True: 350k, False: 69.4k]
  ------------------
  240|   350k|            if (t->by & (lh4 - 1)) have_topleft = 0;
  ------------------
  |  Branch (240:17): [True: 67.8k, False: 282k]
  ------------------
  241|   350k|        } else {
  242|  69.4k|            unsigned mask = 1 << lh4;
  243|   172k|            for (int y = lh4; y < h4; y += lh4) {
  ------------------
  |  Branch (243:31): [True: 104k, False: 68.1k]
  ------------------
  244|   104k|                r2 += lh4;
  245|   104k|                if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|   104k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 75.0k, False: 29.6k]
  |  |  |  Branch (206:53): [True: 69.6k, False: 5.44k]
  |  |  ------------------
  ------------------
  246|  69.6k|                    masks[1] |= mask;
  247|  69.6k|                    if (++count >= 8) return;
  ------------------
  |  Branch (247:25): [True: 1.29k, False: 68.3k]
  ------------------
  248|  69.6k|                }
  249|   103k|                lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|   103k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  250|   103k|                mask <<= lh4;
  251|   103k|            }
  252|  69.4k|        }
  253|   420k|    }
  254|   457k|    if (have_topleft && matches(&r[-1][t->bx - 1])) {
  ------------------
  |  |  206|   247k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 197k, False: 49.7k]
  |  |  |  Branch (206:53): [True: 179k, False: 17.5k]
  |  |  ------------------
  ------------------
  |  Branch (254:9): [True: 247k, False: 210k]
  ------------------
  255|   179k|        masks[1] |= 1ULL << 32;
  256|   179k|        if (++count >= 8) return;
  ------------------
  |  Branch (256:13): [True: 890, False: 178k]
  ------------------
  257|   179k|    }
  258|   456k|    if (have_topright && matches(&r[-1][t->bx + bw4])) {
  ------------------
  |  |  206|   157k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 124k, False: 33.1k]
  |  |  |  Branch (206:53): [True: 114k, False: 10.3k]
  |  |  ------------------
  ------------------
  |  Branch (258:9): [True: 157k, False: 298k]
  ------------------
  259|   114k|        masks[0] |= 1ULL << 32;
  260|   114k|    }
  261|   456k|#undef matches
  262|   456k|}
decode.c:derive_warpmv:
  268|  78.1k|{
  269|  78.1k|    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
  270|  78.1k|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  271|       |
  272|  78.1k|#define add_sample(dx, dy, sx, sy, rp) do { \
  273|  78.1k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  274|  78.1k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  275|  78.1k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  276|  78.1k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  277|  78.1k|    np++; \
  278|  78.1k|} while (0)
  279|       |
  280|       |    // use masks[] to find the projectable motion vectors in the edges
  281|  78.1k|    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
  ------------------
  |  Branch (281:9): [True: 58.0k, False: 20.0k]
  |  Branch (281:37): [True: 24.9k, False: 33.0k]
  ------------------
  282|  24.9k|        const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
  ------------------
  |  |  205|  24.9k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  283|  24.9k|        add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
  ------------------
  |  |  272|  24.9k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  24.9k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  24.9k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  24.9k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  24.9k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  24.9k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  24.9k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  24.9k|    np++; \
  |  |  278|  24.9k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 24.9k]
  |  |  ------------------
  ------------------
  284|   105k|    } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
  ------------------
  |  Branch (284:64): [True: 104k, False: 270]
  |  Branch (284:74): [True: 51.9k, False: 52.8k]
  ------------------
  285|  51.9k|        const int tz = ctz(xmask);
  286|  51.9k|        off += tz;
  287|  51.9k|        xmask >>= tz;
  288|  51.9k|        add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
  ------------------
  |  |  272|  51.9k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  51.9k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  51.9k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  51.9k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  51.9k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  51.9k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  51.9k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  51.9k|    np++; \
  |  |  278|  51.9k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 51.9k]
  |  |  ------------------
  ------------------
  289|  51.9k|        xmask &= ~1;
  290|  51.9k|    }
  291|  78.1k|    if (np < 8 && masks[1] == 1) {
  ------------------
  |  Branch (291:9): [True: 77.8k, False: 270]
  |  Branch (291:19): [True: 28.6k, False: 49.2k]
  ------------------
  292|  28.6k|        const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
  ------------------
  |  |  205|  28.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  293|  28.6k|        add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
  ------------------
  |  |  272|  28.6k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  28.6k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  28.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  28.6k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  28.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  28.6k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  28.6k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  28.6k|    np++; \
  |  |  278|  28.6k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 28.6k]
  |  |  ------------------
  ------------------
  294|   105k|    } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
  ------------------
  |  Branch (294:64): [True: 104k, False: 816]
  |  Branch (294:74): [True: 56.1k, False: 48.7k]
  ------------------
  295|  56.1k|        const int tz = ctz(ymask);
  296|  56.1k|        off += tz;
  297|  56.1k|        ymask >>= tz;
  298|  56.1k|        add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
  ------------------
  |  |  272|  56.1k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  56.1k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  56.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  56.1k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  56.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  56.1k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  56.1k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  56.1k|    np++; \
  |  |  278|  56.1k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 56.1k]
  |  |  ------------------
  ------------------
  299|  56.1k|        ymask &= ~1;
  300|  56.1k|    }
  301|  78.1k|    if (np < 8 && masks[1] >> 32) // top/left
  ------------------
  |  Branch (301:9): [True: 77.1k, False: 966]
  |  Branch (301:19): [True: 39.6k, False: 37.5k]
  ------------------
  302|  39.6k|        add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
  ------------------
  |  |  272|  39.6k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  39.6k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  39.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  39.6k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  39.6k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  39.6k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  39.6k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  39.6k|    np++; \
  |  |  278|  39.6k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 39.6k]
  |  |  ------------------
  ------------------
  303|  78.1k|    if (np < 8 && masks[0] >> 32) // top/right
  ------------------
  |  Branch (303:9): [True: 76.8k, False: 1.27k]
  |  Branch (303:19): [True: 25.3k, False: 51.4k]
  ------------------
  304|  25.3k|        add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
  ------------------
  |  |  272|  25.3k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  25.3k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  25.3k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  25.3k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  25.3k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  25.3k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  25.3k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  25.3k|    np++; \
  |  |  278|  25.3k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 25.3k]
  |  |  ------------------
  ------------------
  305|  78.1k|    assert(np > 0 && np <= 8);
  ------------------
  |  Branch (305:5): [True: 78.1k, False: 0]
  |  Branch (305:5): [True: 78.1k, False: 0]
  ------------------
  306|  78.1k|#undef bs
  307|       |
  308|       |    // select according to motion vector difference against a threshold
  309|  78.1k|    int mvd[8], ret = 0;
  310|  78.1k|    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
  311|   304k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (311:21): [True: 226k, False: 78.1k]
  ------------------
  312|   226k|        mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
  313|   226k|                 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
  314|   226k|        if (mvd[i] > thresh)
  ------------------
  |  Branch (314:13): [True: 40.2k, False: 186k]
  ------------------
  315|  40.2k|            mvd[i] = -1;
  316|   186k|        else
  317|   186k|            ret++;
  318|   226k|    }
  319|  78.1k|    if (!ret) {
  ------------------
  |  Branch (319:9): [True: 9.66k, False: 68.4k]
  ------------------
  320|  9.66k|        ret = 1;
  321|  75.3k|    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
  ------------------
  |  Branch (321:47): [True: 16.5k, False: 58.8k]
  ------------------
  322|  25.6k|        while (mvd[i] != -1) i++;
  ------------------
  |  Branch (322:16): [True: 9.16k, False: 16.5k]
  ------------------
  323|  30.8k|        while (mvd[j] == -1) j--;
  ------------------
  |  Branch (323:16): [True: 14.3k, False: 16.5k]
  ------------------
  324|  16.5k|        assert(i != j);
  ------------------
  |  Branch (324:9): [True: 16.5k, False: 0]
  ------------------
  325|  16.5k|        if (i > j) break;
  ------------------
  |  Branch (325:13): [True: 9.64k, False: 6.86k]
  ------------------
  326|       |        // replace the discarded samples;
  327|  6.86k|        mvd[i] = mvd[j];
  328|  6.86k|        memcpy(pts[i], pts[j], sizeof(*pts));
  329|  6.86k|    }
  330|       |
  331|  78.1k|    if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
  ------------------
  |  Branch (331:9): [True: 75.8k, False: 2.30k]
  ------------------
  332|  75.8k|        !dav1d_get_shear_params(wmp))
  ------------------
  |  Branch (332:9): [True: 72.9k, False: 2.91k]
  ------------------
  333|  72.9k|    {
  334|  72.9k|        wmp->type = DAV1D_WM_TYPE_AFFINE;
  335|  72.9k|    } else
  336|  5.21k|        wmp->type = DAV1D_WM_TYPE_IDENTITY;
  337|  78.1k|}
decode.c:splat_tworef_mv:
  550|   192k|{
  551|   192k|    assert(bw4 >= 2 && bh4 >= 2);
  ------------------
  |  Branch (551:5): [True: 192k, False: 0]
  |  Branch (551:5): [True: 192k, False: 0]
  ------------------
  552|   192k|    const enum CompInterPredMode mode = b->inter_mode;
  553|   192k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  554|   192k|        .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
  555|   192k|        .mv.mv = { b->mv[0], b->mv[1] },
  556|   192k|        .bs = bs,
  557|   192k|        .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
  558|   192k|    };
  559|   192k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  560|   192k|}
decode.c:splat_oneref_mv:
  519|   905k|{
  520|   905k|    const enum InterPredMode mode = b->inter_mode;
  521|   905k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  522|   905k|        .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
  ------------------
  |  Branch (522:37): [True: 48.9k, False: 856k]
  ------------------
  523|   905k|        .mv.mv[0] = b->mv[0],
  524|   905k|        .bs = bs,
  525|   905k|        .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
  ------------------
  |  Branch (525:16): [True: 363k, False: 542k]
  |  Branch (525:36): [True: 260k, False: 102k]
  ------------------
  526|   905k|    };
  527|   905k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  528|   905k|}
decode.c:read_restoration_info:
 2514|  98.3k|{
 2515|  98.3k|    const Dav1dFrameContext *const f = t->f;
 2516|  98.3k|    Dav1dTileState *const ts = t->ts;
 2517|       |
 2518|  98.3k|    if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
  ------------------
  |  Branch (2518:9): [True: 35.4k, False: 62.9k]
  ------------------
 2519|  35.4k|        const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|  35.4k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 2520|  35.4k|                               ts->cdf.m.restore_switchable, 2);
 2521|  35.4k|        lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */
 2522|  62.9k|    } else {
 2523|  62.9k|        const unsigned type =
 2524|  62.9k|            dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  62.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 2525|  62.9k|                frame_type == DAV1D_RESTORATION_WIENER ?
  ------------------
  |  Branch (2525:17): [True: 21.7k, False: 41.2k]
  ------------------
 2526|  41.2k|                ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
 2527|  62.9k|        lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
  ------------------
  |  Branch (2527:20): [True: 31.2k, False: 31.6k]
  ------------------
 2528|  62.9k|    }
 2529|       |
 2530|  98.3k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (2530:9): [True: 17.3k, False: 80.9k]
  ------------------
 2531|  17.3k|        lr->filter_v[0] = p ? 0 :
  ------------------
  |  Branch (2531:27): [True: 9.25k, False: 8.12k]
  ------------------
 2532|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2533|  8.12k|                ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
 2534|  17.3k|        lr->filter_v[1] =
 2535|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2536|  17.3k|                ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
 2537|  17.3k|        lr->filter_v[2] =
 2538|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2539|  17.3k|                ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
 2540|       |
 2541|  17.3k|        lr->filter_h[0] = p ? 0 :
  ------------------
  |  Branch (2541:27): [True: 9.25k, False: 8.12k]
  ------------------
 2542|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2543|  8.12k|                ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
 2544|  17.3k|        lr->filter_h[1] =
 2545|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2546|  17.3k|                ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
 2547|  17.3k|        lr->filter_h[2] =
 2548|  17.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2549|  17.3k|                ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
 2550|  17.3k|        memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
 2551|  17.3k|        ts->lr_ref[p] = lr;
 2552|  17.3k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  17.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 17.3k]
  |  |  ------------------
  |  |   35|  17.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  17.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2553|      0|            printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
 2554|      0|                   p, lr->filter_v[0], lr->filter_v[1],
 2555|      0|                   lr->filter_v[2], lr->filter_h[0],
 2556|      0|                   lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
 2557|  80.9k|    } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
  ------------------
  |  Branch (2557:16): [True: 32.3k, False: 48.6k]
  ------------------
 2558|  32.3k|        const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
 2559|  32.3k|        const uint16_t *const sgr_params = dav1d_sgr_params[idx];
 2560|  32.3k|        lr->type += idx;
 2561|  32.3k|        lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2561:30): [True: 25.1k, False: 7.26k]
  ------------------
 2562|  25.1k|            ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
 2563|  32.3k|        lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2563:30): [True: 23.6k, False: 8.74k]
  ------------------
 2564|  23.6k|            ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
 2565|  32.3k|        memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
 2566|  32.3k|        memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
 2567|  32.3k|        ts->lr_ref[p] = lr;
 2568|  32.3k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  32.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 32.3k]
  |  |  ------------------
  |  |   35|  32.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  32.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2569|      0|            printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
 2570|      0|                   p, idx, lr->sgr_weights[0],
 2571|      0|                   lr->sgr_weights[1], ts->msac.rng);
 2572|  32.3k|    }
 2573|  98.3k|}
decode.c:init_quant_tables:
   57|  52.5k|{
   58|   181k|    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
  ------------------
  |  Branch (58:21): [True: 129k, False: 52.5k]
  |  Branch (58:26): [True: 98.4k, False: 83.2k]
  ------------------
   59|   129k|        const int yac = frame_hdr->segmentation.enabled ?
  ------------------
  |  Branch (59:25): [True: 87.5k, False: 41.6k]
  ------------------
   60|  87.5k|            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
   61|   129k|        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
   62|   129k|        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
   63|   129k|        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
   64|   129k|        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
   65|   129k|        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
   66|       |
   67|   129k|        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
   68|   129k|        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
   69|   129k|        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
   70|   129k|        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
   71|   129k|        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
   72|   129k|        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
   73|   129k|    }
   74|  52.5k|}
decode.c:setup_tile:
 2430|  45.9k|{
 2431|  45.9k|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2432|  45.9k|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2433|  45.9k|    const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
 2434|  45.9k|    const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
 2435|  45.9k|    const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
 2436|  45.9k|    const int sb_shift = f->sb_shift;
 2437|       |
 2438|  45.9k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2439|   137k|    for (int p = 0; p < 2; p++) {
  ------------------
  |  Branch (2439:21): [True: 91.8k, False: 45.9k]
  ------------------
 2440|  91.8k|        ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ?
  ------------------
  |  Branch (2440:39): [True: 0, False: 91.8k]
  ------------------
 2441|      0|            &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] :
 2442|  91.8k|            NULL;
 2443|  91.8k|        ts->frame_thread[p].cbi = f->frame_thread.cbi ?
  ------------------
  |  Branch (2443:35): [True: 0, False: 91.8k]
  ------------------
 2444|      0|            &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] :
 2445|  91.8k|            NULL;
 2446|  91.8k|        ts->frame_thread[p].cf = f->frame_thread.cf ?
  ------------------
  |  Branch (2446:34): [True: 0, False: 91.8k]
  ------------------
 2447|      0|            (uint8_t*)f->frame_thread.cf +
 2448|      0|                (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
 2449|  91.8k|            NULL;
 2450|  91.8k|    }
 2451|       |
 2452|  45.9k|    dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
 2453|  45.9k|    ts->last_qidx = f->frame_hdr->quant.yac;
 2454|  45.9k|    ts->last_delta_lf.u32 = 0;
 2455|       |
 2456|  45.9k|    dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
 2457|       |
 2458|  45.9k|    ts->tiling.row = tile_row;
 2459|  45.9k|    ts->tiling.col = tile_col;
 2460|  45.9k|    ts->tiling.col_start = col_sb_start << sb_shift;
 2461|  45.9k|    ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
 2462|  45.9k|    ts->tiling.row_start = row_sb_start << sb_shift;
 2463|  45.9k|    ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
 2464|       |
 2465|       |    // Reference Restoration Unit (used for exp coding)
 2466|  45.9k|    int sb_idx, unit_idx;
 2467|  45.9k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2467:9): [True: 3.97k, False: 41.9k]
  ------------------
 2468|       |        // vertical components only
 2469|  3.97k|        sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
 2470|  3.97k|        unit_idx = (ts->tiling.row_start & 16) >> 3;
 2471|  41.9k|    } else {
 2472|  41.9k|        sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
 2473|  41.9k|        unit_idx = ((ts->tiling.row_start & 16) >> 3) +
 2474|  41.9k|                   ((ts->tiling.col_start & 16) >> 4);
 2475|  41.9k|    }
 2476|   183k|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2476:21): [True: 137k, False: 45.9k]
  ------------------
 2477|   137k|        if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2477:13): [True: 117k, False: 20.3k]
  ------------------
 2478|   117k|            continue;
 2479|       |
 2480|  20.3k|        if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2480:13): [True: 5.31k, False: 15.0k]
  ------------------
 2481|  5.31k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2481:32): [True: 3.00k, False: 2.30k]
  |  Branch (2481:37): [True: 2.27k, False: 737]
  ------------------
 2482|  5.31k|            const int d = f->frame_hdr->super_res.width_scale_denominator;
 2483|  5.31k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2484|  5.31k|            const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
 2485|  5.31k|            const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
 2486|  5.31k|            const int px_x = x << (unit_size_log2 + ss_hor);
 2487|  5.31k|            const int u_idx = unit_idx + ((px_x & 64) >> 6);
 2488|  5.31k|            const int sb128x = px_x >> 7;
 2489|  5.31k|            if (sb128x >= f->sr_sb128w) continue;
  ------------------
  |  Branch (2489:17): [True: 251, False: 5.06k]
  ------------------
 2490|  5.06k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
 2491|  15.0k|        } else {
 2492|  15.0k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2493|  15.0k|        }
 2494|       |
 2495|  20.0k|        ts->lr_ref[p]->filter_v[0] = 3;
 2496|  20.0k|        ts->lr_ref[p]->filter_v[1] = -7;
 2497|  20.0k|        ts->lr_ref[p]->filter_v[2] = 15;
 2498|  20.0k|        ts->lr_ref[p]->filter_h[0] = 3;
 2499|  20.0k|        ts->lr_ref[p]->filter_h[1] = -7;
 2500|  20.0k|        ts->lr_ref[p]->filter_h[2] = 15;
 2501|  20.0k|        ts->lr_ref[p]->sgr_weights[0] = -32;
 2502|  20.0k|        ts->lr_ref[p]->sgr_weights[1] = 31;
 2503|  20.0k|    }
 2504|       |
 2505|  45.9k|    if (f->c->n_tc > 1) {
  ------------------
  |  Branch (2505:9): [True: 0, False: 45.9k]
  ------------------
 2506|      0|        for (int p = 0; p < 2; p++)
  ------------------
  |  Branch (2506:25): [True: 0, False: 0]
  ------------------
 2507|      0|            atomic_init(&ts->progress[p], row_sb_start);
 2508|      0|    }
 2509|  45.9k|}
decode.c:get_upscale_x0:
 3321|  7.20k|static int get_upscale_x0(const int in_w, const int out_w, const int step) {
 3322|  7.20k|    const int err = out_w * step - (in_w << 14);
 3323|  7.20k|    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2);
 3324|  7.20k|    return x0 & 0x3fff;
 3325|  7.20k|}

obu.c:get_poc_diff:
  239|   243k|{
  240|   243k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 0, False: 243k]
  ------------------
  241|   243k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   243k|    const int diff = poc0 - poc1;
  243|   243k|    return (diff & (mask - 1)) - (diff & mask);
  244|   243k|}
refmvs.c:get_gmv_2d:
  482|  1.29M|{
  483|  1.29M|    switch (gmv->type) {
  484|   161k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 161k, False: 1.12M]
  ------------------
  485|   161k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 161k, False: 0]
  ------------------
  486|   161k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 161k, False: 0]
  ------------------
  487|       |        // fall-through
  488|   161k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 1.29M]
  ------------------
  489|   195k|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 33.5k, False: 1.25M]
  ------------------
  490|   195k|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|   195k|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|   195k|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|   195k|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|   195k|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|   195k|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|   195k|        const int shift = 16 - (3 - !hdr->hp);
  497|   195k|        const int round = (1 << shift) >> 1;
  498|   195k|        mv res = (mv) {
  499|   195k|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|   195k|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|   195k|        };
  502|   195k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 59.6k, False: 135k]
  ------------------
  503|  59.6k|            fix_int_mv_precision(&res);
  504|   195k|        return res;
  505|   161k|    }
  506|   147k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 147k, False: 1.14M]
  ------------------
  507|   147k|        mv res = (mv) {
  508|   147k|            .y = gmv->matrix[0] >> 13,
  509|   147k|            .x = gmv->matrix[1] >> 13,
  510|   147k|        };
  511|   147k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 18.2k, False: 128k]
  ------------------
  512|  18.2k|            fix_int_mv_precision(&res);
  513|   147k|        return res;
  514|   161k|    }
  515|   948k|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 948k, False: 342k]
  ------------------
  516|   948k|        return (mv) { .x = 0, .y = 0 };
  517|  1.29M|    }
  518|  1.29M|}
refmvs.c:fix_int_mv_precision:
  462|   102k|static inline void fix_int_mv_precision(mv *const mv) {
  463|   102k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|   102k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|   102k|}
refmvs.c:fix_mv_precision:
  469|   614k|{
  470|   614k|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 24.8k, False: 589k]
  ------------------
  471|  24.8k|        fix_int_mv_precision(mv);
  472|   589k|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 30.3k, False: 559k]
  ------------------
  473|  30.3k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|  30.3k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|  30.3k|    }
  476|   614k|}
refmvs.c:get_poc_diff:
  239|   513k|{
  240|   513k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 216k, False: 297k]
  ------------------
  241|   297k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   297k|    const int diff = poc0 - poc1;
  243|   297k|    return (diff & (mask - 1)) - (diff & mask);
  244|   513k|}
decode.c:get_partition_ctx:
   87|  2.74M|{
   88|  2.74M|    return ((a->partition[xb8] >> (4 - bl)) & 1) +
   89|  2.74M|          (((l->partition[yb8] >> (4 - bl)) & 1) << 1);
   90|  2.74M|}
decode.c:get_cur_frame_segid:
  445|   868k|{
  446|   868k|    cur_seg_map += bx + by * stride;
  447|   868k|    if (have_left && have_top) {
  ------------------
  |  Branch (447:9): [True: 761k, False: 106k]
  |  Branch (447:22): [True: 571k, False: 189k]
  ------------------
  448|   571k|        const int l = cur_seg_map[-1];
  449|   571k|        const int a = cur_seg_map[-stride];
  450|   571k|        const int al = cur_seg_map[-(stride + 1)];
  451|       |
  452|   571k|        if (l == a && al == l) *seg_ctx = 2;
  ------------------
  |  Branch (452:13): [True: 361k, False: 210k]
  |  Branch (452:23): [True: 348k, False: 13.0k]
  ------------------
  453|   223k|        else if (l == a || al == l || a == al) *seg_ctx = 1;
  ------------------
  |  Branch (453:18): [True: 13.0k, False: 210k]
  |  Branch (453:28): [True: 92.0k, False: 118k]
  |  Branch (453:39): [True: 74.2k, False: 43.8k]
  ------------------
  454|  43.8k|        else *seg_ctx = 0;
  455|   571k|        return a == al ? a : l;
  ------------------
  |  Branch (455:16): [True: 422k, False: 148k]
  ------------------
  456|   571k|    } else {
  457|   296k|        *seg_ctx = 0;
  458|   296k|        return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0;
  ------------------
  |  Branch (458:16): [True: 189k, False: 106k]
  |  Branch (458:46): [True: 103k, False: 3.43k]
  ------------------
  459|   296k|    }
  460|   868k|}
decode.c:get_intra_ctx:
   63|   862k|{
   64|   862k|    if (have_left) {
  ------------------
  |  Branch (64:9): [True: 828k, False: 33.6k]
  ------------------
   65|   828k|        if (have_top) {
  ------------------
  |  Branch (65:13): [True: 743k, False: 85.3k]
  ------------------
   66|   743k|            const int ctx = l->intra[yb4] + a->intra[xb4];
   67|   743k|            return ctx + (ctx == 2);
   68|   743k|        } else
   69|  85.3k|            return l->intra[yb4] * 2;
   70|   828k|    } else {
   71|  33.6k|        return have_top ? a->intra[xb4] * 2 : 0;
  ------------------
  |  Branch (71:16): [True: 22.8k, False: 10.7k]
  ------------------
   72|  33.6k|    }
   73|   862k|}
decode.c:get_tx_ctx:
   79|   435k|{
   80|   435k|    return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw);
   81|   435k|}
decode.c:get_comp_ctx:
  160|   338k|{
  161|   338k|    if (have_top) {
  ------------------
  |  Branch (161:9): [True: 295k, False: 43.1k]
  ------------------
  162|   295k|        if (have_left) {
  ------------------
  |  Branch (162:13): [True: 283k, False: 12.1k]
  ------------------
  163|   283k|            if (a->comp_type[xb4]) {
  ------------------
  |  Branch (163:17): [True: 119k, False: 163k]
  ------------------
  164|   119k|                if (l->comp_type[yb4]) {
  ------------------
  |  Branch (164:21): [True: 82.9k, False: 36.8k]
  ------------------
  165|  82.9k|                    return 4;
  166|  82.9k|                } else {
  167|       |                    // 4U means intra (-1) or bwd (>= 4)
  168|  36.8k|                    return 2 + ((unsigned)l->ref[0][yb4] >= 4U);
  169|  36.8k|                }
  170|   163k|            } else if (l->comp_type[yb4]) {
  ------------------
  |  Branch (170:24): [True: 44.2k, False: 119k]
  ------------------
  171|       |                // 4U means intra (-1) or bwd (>= 4)
  172|  44.2k|                return 2 + ((unsigned)a->ref[0][xb4] >= 4U);
  173|   119k|            } else {
  174|   119k|                return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4);
  175|   119k|            }
  176|   283k|        } else {
  177|  12.1k|            return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4;
  ------------------
  |  Branch (177:20): [True: 4.89k, False: 7.26k]
  ------------------
  178|  12.1k|        }
  179|   295k|    } else if (have_left) {
  ------------------
  |  Branch (179:16): [True: 39.1k, False: 3.90k]
  ------------------
  180|  39.1k|        return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4;
  ------------------
  |  Branch (180:16): [True: 18.1k, False: 21.0k]
  ------------------
  181|  39.1k|    } else {
  182|  3.90k|        return 1;
  183|  3.90k|    }
  184|   338k|}
decode.c:fix_mv_precision:
  469|   617k|{
  470|   617k|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 141k, False: 476k]
  ------------------
  471|   141k|        fix_int_mv_precision(mv);
  472|   476k|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 116k, False: 359k]
  ------------------
  473|   116k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|   116k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|   116k|    }
  476|   617k|}
decode.c:fix_int_mv_precision:
  462|   172k|static inline void fix_int_mv_precision(mv *const mv) {
  463|   172k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|   172k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|   172k|}
decode.c:get_comp_dir_ctx:
  190|   176k|{
  191|   176k|#define has_uni_comp(edge, off) \
  192|   176k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  193|       |
  194|   176k|    if (have_top && have_left) {
  ------------------
  |  Branch (194:9): [True: 154k, False: 22.2k]
  |  Branch (194:21): [True: 148k, False: 5.54k]
  ------------------
  195|   148k|        const int a_intra = a->intra[xb4], l_intra = l->intra[yb4];
  196|       |
  197|   148k|        if (a_intra && l_intra) return 2;
  ------------------
  |  Branch (197:13): [True: 6.86k, False: 141k]
  |  Branch (197:24): [True: 1.59k, False: 5.26k]
  ------------------
  198|   147k|        if (a_intra || l_intra) {
  ------------------
  |  Branch (198:13): [True: 5.26k, False: 141k]
  |  Branch (198:24): [True: 4.19k, False: 137k]
  ------------------
  199|  9.45k|            const BlockContext *const edge = a_intra ? l : a;
  ------------------
  |  Branch (199:46): [True: 5.26k, False: 4.19k]
  ------------------
  200|  9.45k|            const int off = a_intra ? yb4 : xb4;
  ------------------
  |  Branch (200:29): [True: 5.26k, False: 4.19k]
  ------------------
  201|       |
  202|  9.45k|            if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (202:17): [True: 2.68k, False: 6.77k]
  ------------------
  203|  6.77k|            return 1 + 2 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  6.77k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  204|  9.45k|        }
  205|       |
  206|   137k|        const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE;
  207|   137k|        const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE;
  208|   137k|        const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4];
  209|       |
  210|   137k|        if (!a_comp && !l_comp) {
  ------------------
  |  Branch (210:13): [True: 39.2k, False: 98.3k]
  |  Branch (210:24): [True: 13.6k, False: 25.5k]
  ------------------
  211|  13.6k|            return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4));
  212|   123k|        } else if (!a_comp || !l_comp) {
  ------------------
  |  Branch (212:20): [True: 25.5k, False: 98.3k]
  |  Branch (212:31): [True: 20.7k, False: 77.5k]
  ------------------
  213|  46.3k|            const BlockContext *const edge = a_comp ? a : l;
  ------------------
  |  Branch (213:46): [True: 20.7k, False: 25.5k]
  ------------------
  214|  46.3k|            const int off = a_comp ? xb4 : yb4;
  ------------------
  |  Branch (214:29): [True: 20.7k, False: 25.5k]
  ------------------
  215|       |
  216|  46.3k|            if (!has_uni_comp(edge, off)) return 1;
  ------------------
  |  |  192|  46.3k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  |  Branch (216:17): [True: 38.3k, False: 7.99k]
  ------------------
  217|  7.99k|            return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4));
  218|  77.5k|        } else {
  219|  77.5k|            const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|  77.5k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
                          const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|  77.5k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  220|       |
  221|  77.5k|            if (!a_uni && !l_uni) return 0;
  ------------------
  |  Branch (221:17): [True: 66.4k, False: 11.0k]
  |  Branch (221:27): [True: 60.9k, False: 5.54k]
  ------------------
  222|  16.5k|            if (!a_uni || !l_uni) return 2;
  ------------------
  |  Branch (222:17): [True: 5.54k, False: 11.0k]
  |  Branch (222:27): [True: 6.23k, False: 4.79k]
  ------------------
  223|  4.79k|            return 3 + ((a_ref0 == 4) == (l_ref0 == 4));
  224|  16.5k|        }
  225|   137k|    } else if (have_top || have_left) {
  ------------------
  |  Branch (225:16): [True: 5.54k, False: 22.2k]
  |  Branch (225:28): [True: 21.4k, False: 750]
  ------------------
  226|  27.0k|        const BlockContext *const edge = have_left ? l : a;
  ------------------
  |  Branch (226:42): [True: 21.4k, False: 5.54k]
  ------------------
  227|  27.0k|        const int off = have_left ? yb4 : xb4;
  ------------------
  |  Branch (227:25): [True: 21.4k, False: 5.54k]
  ------------------
  228|       |
  229|  27.0k|        if (edge->intra[off]) return 2;
  ------------------
  |  Branch (229:13): [True: 1.17k, False: 25.8k]
  ------------------
  230|  25.8k|        if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (230:13): [True: 6.94k, False: 18.9k]
  ------------------
  231|  18.9k|        return 4 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  18.9k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  232|  25.8k|    } else {
  233|    750|        return 2;
  234|    750|    }
  235|   176k|}
decode.c:av1_get_fwd_ref_ctx:
  307|   504k|{
  308|   504k|    int cnt[4] = { 0 };
  309|       |
  310|   504k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (310:9): [True: 457k, False: 47.6k]
  |  Branch (310:21): [True: 428k, False: 28.3k]
  ------------------
  311|   428k|        if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (311:13): [True: 393k, False: 35.1k]
  ------------------
  312|   428k|        if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (312:13): [True: 120k, False: 308k]
  |  Branch (312:34): [True: 14.3k, False: 106k]
  ------------------
  313|   428k|    }
  314|       |
  315|   504k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (315:9): [True: 489k, False: 14.8k]
  |  Branch (315:22): [True: 460k, False: 29.3k]
  ------------------
  316|   460k|        if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (316:13): [True: 428k, False: 32.2k]
  ------------------
  317|   460k|        if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (317:13): [True: 143k, False: 317k]
  |  Branch (317:34): [True: 15.0k, False: 128k]
  ------------------
  318|   460k|    }
  319|       |
  320|   504k|    cnt[0] += cnt[1];
  321|   504k|    cnt[2] += cnt[3];
  322|       |
  323|   504k|    return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (323:12): [True: 84.7k, False: 419k]
  |  Branch (323:35): [True: 91.5k, False: 328k]
  ------------------
  324|   504k|}
decode.c:av1_get_fwd_ref_2_ctx:
  350|   140k|{
  351|   140k|    int cnt[2] = { 0 };
  352|       |
  353|   140k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (353:9): [True: 119k, False: 21.1k]
  |  Branch (353:21): [True: 110k, False: 9.16k]
  ------------------
  354|   110k|        if ((a->ref[0][xb4] ^ 2U) < 2) cnt[a->ref[0][xb4] - 2]++;
  ------------------
  |  Branch (354:13): [True: 63.0k, False: 47.3k]
  ------------------
  355|   110k|        if (a->comp_type[xb4] && (a->ref[1][xb4] ^ 2U) < 2) cnt[a->ref[1][xb4] - 2]++;
  ------------------
  |  Branch (355:13): [True: 43.5k, False: 66.8k]
  |  Branch (355:34): [True: 6.42k, False: 37.1k]
  ------------------
  356|   110k|    }
  357|       |
  358|   140k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (358:9): [True: 135k, False: 5.58k]
  |  Branch (358:22): [True: 125k, False: 9.15k]
  ------------------
  359|   125k|        if ((l->ref[0][yb4] ^ 2U) < 2) cnt[l->ref[0][yb4] - 2]++;
  ------------------
  |  Branch (359:13): [True: 79.8k, False: 46.0k]
  ------------------
  360|   125k|        if (l->comp_type[yb4] && (l->ref[1][yb4] ^ 2U) < 2) cnt[l->ref[1][yb4] - 2]++;
  ------------------
  |  Branch (360:13): [True: 58.5k, False: 67.3k]
  |  Branch (360:34): [True: 7.80k, False: 50.7k]
  ------------------
  361|   125k|    }
  362|       |
  363|   140k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (363:12): [True: 38.2k, False: 102k]
  |  Branch (363:35): [True: 73.7k, False: 28.7k]
  ------------------
  364|   140k|}
decode.c:av1_get_fwd_ref_1_ctx:
  330|   377k|{
  331|   377k|    int cnt[2] = { 0 };
  332|       |
  333|   377k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (333:9): [True: 350k, False: 27.5k]
  |  Branch (333:21): [True: 330k, False: 19.8k]
  ------------------
  334|   330k|        if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (334:13): [True: 283k, False: 47.0k]
  ------------------
  335|   330k|        if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (335:13): [True: 85.2k, False: 245k]
  |  Branch (335:34): [True: 5.34k, False: 79.8k]
  ------------------
  336|   330k|    }
  337|       |
  338|   377k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (338:9): [True: 367k, False: 10.2k]
  |  Branch (338:22): [True: 347k, False: 20.5k]
  ------------------
  339|   347k|        if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (339:13): [True: 299k, False: 47.8k]
  ------------------
  340|   347k|        if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (340:13): [True: 93.3k, False: 253k]
  |  Branch (340:34): [True: 5.14k, False: 88.1k]
  ------------------
  341|   347k|    }
  342|       |
  343|   377k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (343:12): [True: 47.1k, False: 330k]
  |  Branch (343:35): [True: 25.8k, False: 304k]
  ------------------
  344|   377k|}
decode.c:av1_get_bwd_ref_ctx:
  370|   352k|{
  371|   352k|    int cnt[3] = { 0 };
  372|       |
  373|   352k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (373:9): [True: 297k, False: 54.8k]
  |  Branch (373:21): [True: 280k, False: 17.7k]
  ------------------
  374|   280k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (374:13): [True: 136k, False: 143k]
  ------------------
  375|   280k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (375:13): [True: 105k, False: 174k]
  |  Branch (375:34): [True: 98.2k, False: 7.24k]
  ------------------
  376|   280k|    }
  377|       |
  378|   352k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (378:9): [True: 340k, False: 12.6k]
  |  Branch (378:22): [True: 320k, False: 19.2k]
  ------------------
  379|   320k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (379:13): [True: 154k, False: 166k]
  ------------------
  380|   320k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (380:13): [True: 127k, False: 193k]
  |  Branch (380:34): [True: 120k, False: 6.65k]
  ------------------
  381|   320k|    }
  382|       |
  383|   352k|    cnt[1] += cnt[0];
  384|       |
  385|   352k|    return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (385:12): [True: 60.6k, False: 292k]
  |  Branch (385:35): [True: 177k, False: 114k]
  ------------------
  386|   352k|}
decode.c:av1_get_bwd_ref_1_ctx:
  392|   145k|{
  393|   145k|    int cnt[3] = { 0 };
  394|       |
  395|   145k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (395:9): [True: 132k, False: 13.4k]
  |  Branch (395:21): [True: 122k, False: 9.25k]
  ------------------
  396|   122k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (396:13): [True: 47.9k, False: 74.9k]
  ------------------
  397|   122k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (397:13): [True: 55.8k, False: 67.0k]
  |  Branch (397:34): [True: 51.4k, False: 4.39k]
  ------------------
  398|   122k|    }
  399|       |
  400|   145k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (400:9): [True: 141k, False: 4.03k]
  |  Branch (400:22): [True: 131k, False: 10.1k]
  ------------------
  401|   131k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (401:13): [True: 46.3k, False: 85.0k]
  ------------------
  402|   131k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (402:13): [True: 65.0k, False: 66.3k]
  |  Branch (402:34): [True: 61.2k, False: 3.80k]
  ------------------
  403|   131k|    }
  404|       |
  405|   145k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (405:12): [True: 30.5k, False: 115k]
  |  Branch (405:35): [True: 38.2k, False: 76.7k]
  ------------------
  406|   145k|}
decode.c:av1_get_ref_ctx:
  287|   585k|{
  288|   585k|    int cnt[2] = { 0 };
  289|       |
  290|   585k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (290:9): [True: 519k, False: 65.8k]
  |  Branch (290:21): [True: 482k, False: 36.4k]
  ------------------
  291|   482k|        cnt[a->ref[0][xb4] >= 4]++;
  292|   482k|        if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++;
  ------------------
  |  Branch (292:13): [True: 60.7k, False: 422k]
  ------------------
  293|   482k|    }
  294|       |
  295|   585k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (295:9): [True: 564k, False: 20.4k]
  |  Branch (295:22): [True: 526k, False: 38.3k]
  ------------------
  296|   526k|        cnt[l->ref[0][yb4] >= 4]++;
  297|   526k|        if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++;
  ------------------
  |  Branch (297:13): [True: 73.4k, False: 452k]
  ------------------
  298|   526k|    }
  299|       |
  300|   585k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (300:12): [True: 82.0k, False: 503k]
  |  Branch (300:35): [True: 170k, False: 332k]
  ------------------
  301|   585k|}
decode.c:av1_get_uni_p1_ctx:
  412|  21.4k|{
  413|  21.4k|    int cnt[3] = { 0 };
  414|       |
  415|  21.4k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (415:9): [True: 19.7k, False: 1.73k]
  |  Branch (415:21): [True: 18.5k, False: 1.18k]
  ------------------
  416|  18.5k|        if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++;
  ------------------
  |  Branch (416:13): [True: 4.01k, False: 14.5k]
  ------------------
  417|  18.5k|        if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++;
  ------------------
  |  Branch (417:13): [True: 12.0k, False: 6.44k]
  |  Branch (417:34): [True: 7.04k, False: 5.03k]
  ------------------
  418|  18.5k|    }
  419|       |
  420|  21.4k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (420:9): [True: 20.2k, False: 1.15k]
  |  Branch (420:22): [True: 19.2k, False: 1.08k]
  ------------------
  421|  19.2k|        if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++;
  ------------------
  |  Branch (421:13): [True: 4.31k, False: 14.9k]
  ------------------
  422|  19.2k|        if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++;
  ------------------
  |  Branch (422:13): [True: 13.4k, False: 5.75k]
  |  Branch (422:34): [True: 7.63k, False: 5.82k]
  ------------------
  423|  19.2k|    }
  424|       |
  425|  21.4k|    cnt[1] += cnt[2];
  426|       |
  427|  21.4k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (427:12): [True: 6.91k, False: 14.5k]
  |  Branch (427:35): [True: 9.81k, False: 4.72k]
  ------------------
  428|  21.4k|}
decode.c:get_drl_context:
  432|   312k|{
  433|   312k|    if (ref_mv_stack[ref_idx].weight >= 640)
  ------------------
  |  Branch (433:9): [True: 249k, False: 62.4k]
  ------------------
  434|   249k|        return ref_mv_stack[ref_idx + 1].weight < 640;
  435|       |
  436|  62.4k|    return ref_mv_stack[ref_idx + 1].weight < 640 ? 2 : 0;
  ------------------
  |  Branch (436:12): [True: 62.4k, False: 0]
  ------------------
  437|   312k|}
decode.c:get_gmv_2d:
  482|   394k|{
  483|   394k|    switch (gmv->type) {
  484|  44.4k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 44.4k, False: 350k]
  ------------------
  485|  44.4k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 44.4k, False: 0]
  ------------------
  486|  44.4k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 44.4k, False: 0]
  ------------------
  487|       |        // fall-through
  488|  44.4k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 394k]
  ------------------
  489|  59.2k|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 14.7k, False: 379k]
  ------------------
  490|  59.2k|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|  59.2k|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|  59.2k|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|  59.2k|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|  59.2k|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|  59.2k|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|  59.2k|        const int shift = 16 - (3 - !hdr->hp);
  497|  59.2k|        const int round = (1 << shift) >> 1;
  498|  59.2k|        mv res = (mv) {
  499|  59.2k|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|  59.2k|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|  59.2k|        };
  502|  59.2k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 24.1k, False: 35.0k]
  ------------------
  503|  24.1k|            fix_int_mv_precision(&res);
  504|  59.2k|        return res;
  505|  44.4k|    }
  506|   123k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 123k, False: 271k]
  ------------------
  507|   123k|        mv res = (mv) {
  508|   123k|            .y = gmv->matrix[0] >> 13,
  509|   123k|            .x = gmv->matrix[1] >> 13,
  510|   123k|        };
  511|   123k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 6.90k, False: 116k]
  ------------------
  512|  6.90k|            fix_int_mv_precision(&res);
  513|   123k|        return res;
  514|  44.4k|    }
  515|   211k|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 211k, False: 182k]
  ------------------
  516|   211k|        return (mv) { .x = 0, .y = 0 };
  517|   394k|    }
  518|   394k|}
decode.c:get_mask_comp_ctx:
  266|   151k|{
  267|   151k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (267:23): [True: 25.4k, False: 126k]
  ------------------
  268|   151k|                      a->ref[0][xb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (268:23): [True: 5.96k, False: 120k]
  ------------------
  269|   151k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (269:23): [True: 36.8k, False: 115k]
  ------------------
  270|   151k|                      l->ref[0][yb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (270:23): [True: 5.20k, False: 109k]
  ------------------
  271|       |
  272|   151k|    return imin(a_ctx + l_ctx, 5);
  273|   151k|}
decode.c:get_jnt_comp_ctx:
  251|  89.8k|{
  252|  89.8k|    const int d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
  253|  89.8k|    const int d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc));
  254|  89.8k|    const int offset = d0 == d1;
  255|  89.8k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (255:23): [True: 42.9k, False: 46.9k]
  ------------------
  256|  46.9k|                      a->ref[0][xb4] == 6;
  ------------------
  |  Branch (256:23): [True: 3.12k, False: 43.7k]
  ------------------
  257|  89.8k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (257:23): [True: 44.1k, False: 45.7k]
  ------------------
  258|  45.7k|                      l->ref[0][yb4] == 6;
  ------------------
  |  Branch (258:23): [True: 3.51k, False: 42.2k]
  ------------------
  259|       |
  260|  89.8k|    return 3 * offset + a_ctx + l_ctx;
  261|  89.8k|}
decode.c:get_filter_ctx:
  139|   483k|{
  140|   483k|    const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
  ------------------
  |  Branch (140:27): [True: 321k, False: 162k]
  |  Branch (140:52): [True: 11.6k, False: 150k]
  ------------------
  141|   333k|                         a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS;
  142|   483k|    const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
  ------------------
  |  Branch (142:27): [True: 359k, False: 124k]
  |  Branch (142:52): [True: 12.6k, False: 111k]
  ------------------
  143|   372k|                         l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS;
  144|       |
  145|   483k|    if (a_filter == l_filter) {
  ------------------
  |  Branch (145:9): [True: 304k, False: 179k]
  ------------------
  146|   304k|        return comp * 4 + a_filter;
  147|   304k|    } else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (147:16): [True: 98.0k, False: 81.2k]
  ------------------
  148|  98.0k|        return comp * 4 + l_filter;
  149|  98.0k|    } else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (149:16): [True: 59.2k, False: 22.0k]
  ------------------
  150|  59.2k|        return comp * 4 + a_filter;
  151|  59.2k|    } else {
  152|  22.0k|        return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS;
  153|  22.0k|    }
  154|   483k|}
decode.c:gather_top_partition_prob:
  106|   489k|{
  107|       |    // Exploit the fact that cdfs for PARTITION_V, PARTITION_SPLIT and
  108|       |    // PARTITION_T_TOP_SPLIT are neighbors.
  109|   489k|    unsigned out = in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT];
  110|       |    // Exploit the facts that cdfs for PARTITION_T_LEFT_SPLIT and
  111|       |    // PARTITION_T_RIGHT_SPLIT are neighbors, the probability for
  112|       |    // PARTITION_V4 is always zero, and the probability for
  113|       |    // PARTITION_T_RIGHT_SPLIT is zero in 128x128 blocks.
  114|   489k|    out += in[PARTITION_T_LEFT_SPLIT - 1];
  115|   489k|    if (bl != BL_128X128)
  ------------------
  |  Branch (115:9): [True: 450k, False: 39.2k]
  ------------------
  116|   450k|        out += in[PARTITION_V4 - 1] - in[PARTITION_T_RIGHT_SPLIT];
  117|   489k|    return out;
  118|   489k|}
decode.c:gather_left_partition_prob:
   94|  55.3k|{
   95|  55.3k|    unsigned out = in[PARTITION_H - 1] - in[PARTITION_H];
   96|       |    // Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT,
   97|       |    // PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors.
   98|  55.3k|    out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT];
   99|  55.3k|    if (bl != BL_128X128)
  ------------------
  |  Branch (99:9): [True: 43.6k, False: 11.6k]
  ------------------
  100|  43.6k|        out += in[PARTITION_H4 - 1] - in[PARTITION_H4];
  101|  55.3k|    return out;
  102|  55.3k|}
decode.c:get_poc_diff:
  239|   579k|{
  240|   579k|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 35.2k, False: 543k]
  ------------------
  241|   543k|    const int mask = 1 << (order_hint_n_bits - 1);
  242|   543k|    const int diff = poc0 - poc1;
  243|   543k|    return (diff & (mask - 1)) - (diff & mask);
  244|   579k|}
recon_tmpl.c:get_uv_inter_txtp:
  122|   247k|{
  123|   247k|    if (uvt_dim->max == TX_32X32)
  ------------------
  |  Branch (123:9): [True: 63.2k, False: 184k]
  ------------------
  124|  63.2k|        return ytxtp == IDTX ? IDTX : DCT_DCT;
  ------------------
  |  Branch (124:16): [True: 2.61k, False: 60.5k]
  ------------------
  125|   184k|    if (uvt_dim->min == TX_16X16 &&
  ------------------
  |  Branch (125:9): [True: 20.9k, False: 163k]
  ------------------
  126|  20.9k|        ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) |
  ------------------
  |  Branch (126:9): [True: 671, False: 20.2k]
  ------------------
  127|  20.9k|                         (1 << H_ADST) | (1 << V_ADST))))
  128|    671|    {
  129|    671|        return DCT_DCT;
  130|    671|    }
  131|       |
  132|   183k|    return ytxtp;
  133|   184k|}

dav1d_prep_grain_8bpc:
  105|  1.58k|{
  106|  1.58k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|       |#if BITDEPTH != 8
  108|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|       |#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  1.58k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  113|  1.58k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 687, False: 894]
  |  Branch (113:35): [True: 261, False: 633]
  ------------------
  114|    948|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|    948|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  116|  1.58k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 740, False: 841]
  |  Branch (116:35): [True: 261, False: 580]
  ------------------
  117|  1.00k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|  1.00k|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  1.58k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 826, False: 755]
  |  Branch (121:31): [True: 208, False: 547]
  ------------------
  122|  1.03k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  1.58k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 687, False: 894]
  ------------------
  124|    687|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  1.58k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 740, False: 841]
  ------------------
  126|    740|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  1.58k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 1.58k, False: 0]
  ------------------
  130|  1.58k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 755, False: 826]
  ------------------
  131|    755|        const ptrdiff_t stride = out->stride[0];
  132|    755|        const ptrdiff_t sz = out->p.h * stride;
  133|    755|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 755]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|    755|        else
  137|    755|            memcpy(out->data[0], in->data[0], sz);
  138|    755|    }
  139|       |
  140|  1.58k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 1.27k, False: 311]
  |  Branch (140:52): [True: 1.00k, False: 261]
  ------------------
  141|  1.00k|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 1.00k, False: 0]
  ------------------
  142|  1.00k|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|  1.00k|        const ptrdiff_t stride = out->stride[1];
  144|  1.00k|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|  1.00k|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 1.00k]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|  1.00k|        } else {
  153|  1.00k|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 322, False: 687]
  ------------------
  154|    322|                memcpy(out->data[1], in->data[1], sz);
  155|  1.00k|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 269, False: 740]
  ------------------
  156|    269|                memcpy(out->data[2], in->data[2], sz);
  157|  1.00k|        }
  158|  1.00k|    }
  159|  1.58k|}
dav1d_apply_grain_row_8bpc:
  167|  6.90k|{
  168|       |    // Synthesize grain for the affected planes
  169|  6.90k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  6.90k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  6.90k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  6.90k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  6.90k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  6.90k|    pixel *const luma_src =
  175|  6.90k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  6.90k|#define FG_BLOCK_SIZE 32
  ------------------
                      ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   53|  6.90k|#define PXSTRIDE(x) (x)
  ------------------
  176|       |#if BITDEPTH != 8
  177|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|       |#endif
  179|       |
  180|  6.90k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 1.88k, False: 5.02k]
  ------------------
  181|  1.88k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  1.88k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  1.88k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  1.88k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  1.88k|#define FG_BLOCK_SIZE 32
  ------------------
                      dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   53|  1.88k|#define PXSTRIDE(x) (x)
  ------------------
  183|  1.88k|                         luma_src, out->stride[0], data,
  184|  1.88k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  185|  1.88k|    }
  186|       |
  187|  6.90k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 2.56k, False: 4.34k]
  |  Branch (187:36): [True: 1.33k, False: 1.22k]
  ------------------
  188|  1.33k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 713, False: 619]
  ------------------
  189|    713|    {
  190|    713|        return;
  191|    713|    }
  192|       |
  193|  6.19k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  6.19k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  6.19k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  6.19k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 3.79k, False: 2.40k]
  ------------------
  197|  3.79k|        pixel *ptr = luma_src;
  198|   120k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 116k, False: 3.79k]
  ------------------
  199|   116k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|   116k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  ------------------
  |  |   53|   116k|#define PXSTRIDE(x) (x)
  ------------------
  201|   116k|        }
  202|  3.79k|    }
  203|       |
  204|  6.19k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  6.19k|#define FG_BLOCK_SIZE 32
  ------------------
                  const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   53|  6.19k|#define PXSTRIDE(x) (x)
  ------------------
  205|  6.19k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 619, False: 5.57k]
  ------------------
  206|  1.85k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 1.23k, False: 619]
  ------------------
  207|  1.23k|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|  1.23k|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|  1.23k|                                                in->stride[1], data, cpw,
  210|  1.23k|                                                scaling[0], grain_lut[1 + pl],
  211|  1.23k|                                                bh, row, luma_src, in->stride[0],
  212|  1.23k|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  213|  5.57k|    } else {
  214|  16.7k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 11.1k, False: 5.57k]
  ------------------
  215|  11.1k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 6.51k, False: 4.63k]
  ------------------
  216|  6.51k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  6.51k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  6.51k|                                                    in->stride[1], data, cpw,
  219|  6.51k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  6.51k|                                                    bh, row, luma_src, in->stride[0],
  221|  6.51k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  222|  5.57k|    }
  223|  6.19k|}
dav1d_apply_grain_8bpc:
  228|  1.58k|{
  229|  1.58k|    ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]);
  ------------------
  |  |  100|  1.58k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  1.58k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  230|  1.58k|#if ARCH_X86_64 && BITDEPTH == 8
  231|  1.58k|    ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]);
  ------------------
  |  |   96|  1.58k|    ALIGN(type var[sz1d]sznd, ALIGN_64_VAL)
  |  |  ------------------
  |  |  |  |   86|  1.58k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  232|       |#else
  233|       |    uint8_t scaling[3][SCALING_SIZE];
  234|       |#endif
  235|  1.58k|    const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  1.58k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  1.58k|#define FG_BLOCK_SIZE 32
  ------------------
  236|       |
  237|  1.58k|    bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
  ------------------
  |  |   51|  1.58k|#define bitfn(x) x##_8bpc
  ------------------
  238|  8.48k|    for (int row = 0; row < rows; row++)
  ------------------
  |  Branch (238:23): [True: 6.90k, False: 1.58k]
  ------------------
  239|  6.90k|        bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row);
  ------------------
  |  |   51|  6.90k|#define bitfn(x) x##_8bpc
  ------------------
  240|  1.58k|}
fg_apply_tmpl.c:generate_scaling:
   44|  2.46k|{
   45|  2.46k|#if BITDEPTH == 8
   46|  2.46k|    const int shift_x = 0;
   47|  2.46k|    const int scaling_size = SCALING_SIZE;
  ------------------
  |  |   39|  2.46k|#define SCALING_SIZE 256
  ------------------
   48|       |#else
   49|       |    assert(bitdepth > 8);
   50|       |    const int shift_x = bitdepth - 8;
   51|       |    const int scaling_size = 1 << bitdepth;
   52|       |#endif
   53|       |
   54|  2.46k|    if (num == 0) {
  ------------------
  |  Branch (54:9): [True: 208, False: 2.25k]
  ------------------
   55|    208|        memset(scaling, 0, scaling_size);
   56|    208|        return;
   57|    208|    }
   58|       |
   59|       |    // Fill up the preceding entries with the initial value
   60|  2.25k|    memset(scaling, points[0][1], points[0][0] << shift_x);
   61|       |
   62|       |    // Linearly interpolate the values in the middle
   63|  4.34k|    for (int i = 0; i < num - 1; i++) {
  ------------------
  |  Branch (63:21): [True: 2.08k, False: 2.25k]
  ------------------
   64|  2.08k|        const int bx = points[i][0];
   65|  2.08k|        const int by = points[i][1];
   66|  2.08k|        const int ex = points[i+1][0];
   67|  2.08k|        const int ey = points[i+1][1];
   68|  2.08k|        const int dx = ex - bx;
   69|  2.08k|        const int dy = ey - by;
   70|  2.08k|        assert(dx > 0);
  ------------------
  |  Branch (70:9): [True: 2.08k, False: 0]
  ------------------
   71|  2.08k|        const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
   72|   161k|        for (int x = 0, d = 0x8000; x < dx; x++) {
  ------------------
  |  Branch (72:37): [True: 159k, False: 2.08k]
  ------------------
   73|   159k|            scaling[(bx + x) << shift_x] = by + (d >> 16);
   74|   159k|            d += delta;
   75|   159k|        }
   76|  2.08k|    }
   77|       |
   78|       |    // Fill up the remaining entries with the final value
   79|  2.25k|    const int n = points[num - 1][0] << shift_x;
   80|  2.25k|    memset(&scaling[n], points[num - 1][1], scaling_size - n);
   81|       |
   82|       |#if BITDEPTH != 8
   83|       |    const int pad = 1 << shift_x, rnd = pad >> 1;
   84|       |    for (int i = 0; i < num - 1; i++) {
   85|       |        const int bx = points[i][0] << shift_x;
   86|       |        const int ex = points[i+1][0] << shift_x;
   87|       |        const int dx = ex - bx;
   88|       |        for (int x = 0; x < dx; x += pad) {
   89|       |            const int range = scaling[bx + x + pad] - scaling[bx + x];
   90|       |            for (int n = 1, r = rnd; n < pad; n++) {
   91|       |                r += range;
   92|       |                scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x);
   93|       |            }
   94|       |        }
   95|       |    }
   96|       |#endif
   97|  2.25k|}
dav1d_prep_grain_16bpc:
  105|  3.43k|{
  106|  3.43k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|  3.43k|#if BITDEPTH != 8
  108|  3.43k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|  3.43k|#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  3.43k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  ------------------
  |  |   74|  3.43k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  113|  3.43k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 586, False: 2.84k]
  |  Branch (113:35): [True: 412, False: 2.43k]
  ------------------
  114|    998|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|    998|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|    998|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  116|  3.43k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 754, False: 2.67k]
  |  Branch (116:35): [True: 412, False: 2.26k]
  ------------------
  117|  1.16k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|  1.16k|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  1.16k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  3.43k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 2.58k, False: 841]
  |  Branch (121:31): [True: 393, False: 448]
  ------------------
  122|  2.98k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  3.43k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 586, False: 2.84k]
  ------------------
  124|    586|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  3.43k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 754, False: 2.67k]
  ------------------
  126|    754|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  3.43k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 3.43k, False: 0]
  ------------------
  130|  3.43k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 841, False: 2.58k]
  ------------------
  131|    841|        const ptrdiff_t stride = out->stride[0];
  132|    841|        const ptrdiff_t sz = out->p.h * stride;
  133|    841|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 841]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|    841|        else
  137|    841|            memcpy(out->data[0], in->data[0], sz);
  138|    841|    }
  139|       |
  140|  3.43k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 1.43k, False: 1.99k]
  |  Branch (140:52): [True: 1.02k, False: 412]
  ------------------
  141|  1.02k|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 1.02k, False: 0]
  ------------------
  142|  1.02k|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|  1.02k|        const ptrdiff_t stride = out->stride[1];
  144|  1.02k|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|  1.02k|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 1.02k]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|  1.02k|        } else {
  153|  1.02k|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 438, False: 586]
  ------------------
  154|    438|                memcpy(out->data[1], in->data[1], sz);
  155|  1.02k|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 270, False: 754]
  ------------------
  156|    270|                memcpy(out->data[2], in->data[2], sz);
  157|  1.02k|        }
  158|  1.02k|    }
  159|  3.43k|}
dav1d_apply_grain_row_16bpc:
  167|  7.27k|{
  168|       |    // Synthesize grain for the affected planes
  169|  7.27k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  7.27k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  7.27k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  7.27k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  7.27k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  7.27k|    pixel *const luma_src =
  175|  7.27k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  7.27k|#define FG_BLOCK_SIZE 32
  ------------------
  176|  7.27k|#if BITDEPTH != 8
  177|  7.27k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|  7.27k|#endif
  179|       |
  180|  7.27k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 4.46k, False: 2.80k]
  ------------------
  181|  4.46k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.46k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.46k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  4.46k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  4.46k|#define FG_BLOCK_SIZE 32
  ------------------
  183|  4.46k|                         luma_src, out->stride[0], data,
  184|  4.46k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  4.46k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  185|  4.46k|    }
  186|       |
  187|  7.27k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 5.61k, False: 1.65k]
  |  Branch (187:36): [True: 4.70k, False: 907]
  ------------------
  188|  4.70k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 3.90k, False: 802]
  ------------------
  189|  3.90k|    {
  190|  3.90k|        return;
  191|  3.90k|    }
  192|       |
  193|  3.36k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  3.36k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  3.36k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  3.36k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 1.21k, False: 2.14k]
  ------------------
  197|  1.21k|        pixel *ptr = luma_src;
  198|  28.2k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 27.0k, False: 1.21k]
  ------------------
  199|  27.0k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|  27.0k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  201|  27.0k|        }
  202|  1.21k|    }
  203|       |
  204|  3.36k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  3.36k|#define FG_BLOCK_SIZE 32
  ------------------
  205|  3.36k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 802, False: 2.56k]
  ------------------
  206|  2.40k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 1.60k, False: 802]
  ------------------
  207|  1.60k|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|  1.60k|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|  1.60k|                                                in->stride[1], data, cpw,
  210|  1.60k|                                                scaling[0], grain_lut[1 + pl],
  211|  1.60k|                                                bh, row, luma_src, in->stride[0],
  212|  1.60k|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  1.60k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  213|  2.56k|    } else {
  214|  7.68k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 5.12k, False: 2.56k]
  ------------------
  215|  5.12k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 3.12k, False: 1.99k]
  ------------------
  216|  3.12k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  3.12k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  3.12k|                                                    in->stride[1], data, cpw,
  219|  3.12k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  3.12k|                                                    bh, row, luma_src, in->stride[0],
  221|  3.12k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  3.12k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  222|  2.56k|    }
  223|  3.36k|}
dav1d_apply_grain_16bpc:
  228|  3.43k|{
  229|  3.43k|    ALIGN_STK_16(entry, grain_lut, 3,[GRAIN_HEIGHT + 1][GRAIN_WIDTH]);
  ------------------
  |  |  100|  3.43k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  3.43k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  230|       |#if ARCH_X86_64 && BITDEPTH == 8
  231|       |    ALIGN_STK_64(uint8_t, scaling, 3,[SCALING_SIZE]);
  232|       |#else
  233|  3.43k|    uint8_t scaling[3][SCALING_SIZE];
  234|  3.43k|#endif
  235|  3.43k|    const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  3.43k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int rows = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  3.43k|#define FG_BLOCK_SIZE 32
  ------------------
  236|       |
  237|  3.43k|    bitfn(dav1d_prep_grain)(dsp, out, in, scaling, grain_lut);
  ------------------
  |  |   77|  3.43k|#define bitfn(x) x##_16bpc
  ------------------
  238|  10.7k|    for (int row = 0; row < rows; row++)
  ------------------
  |  Branch (238:23): [True: 7.27k, False: 3.43k]
  ------------------
  239|  7.27k|        bitfn(dav1d_apply_grain_row)(dsp, out, in, scaling, grain_lut, row);
  ------------------
  |  |   77|  7.27k|#define bitfn(x) x##_16bpc
  ------------------
  240|  3.43k|}

dav1d_film_grain_dsp_init_8bpc:
  423|  3.47k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  3.47k|    c->generate_grain_y = generate_grain_y_c;
  425|  3.47k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  3.47k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  3.47k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  3.47k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  3.47k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  3.47k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  3.47k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  3.47k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  3.47k|#endif
  440|  3.47k|#endif
  441|  3.47k|}
dav1d_film_grain_dsp_init_16bpc:
  423|  4.68k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  4.68k|    c->generate_grain_y = generate_grain_y_c;
  425|  4.68k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  4.68k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  4.68k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  4.68k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  4.68k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  4.68k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  4.68k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  4.68k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  4.68k|#endif
  440|  4.68k|#endif
  441|  4.68k|}

dav1d_init_get_bits:
   38|   120k|{
   39|   120k|    assert(sz);
  ------------------
  |  Branch (39:5): [True: 120k, False: 0]
  ------------------
   40|   120k|    c->ptr = c->ptr_start = data;
   41|   120k|    c->ptr_end = &c->ptr_start[sz];
   42|   120k|    c->state = 0;
   43|   120k|    c->bits_left = 0;
   44|   120k|    c->error = 0;
   45|   120k|}
dav1d_get_bit:
   47|  2.93M|unsigned dav1d_get_bit(GetBits *const c) {
   48|  2.93M|    if (!c->bits_left) {
  ------------------
  |  Branch (48:9): [True: 443k, False: 2.49M]
  ------------------
   49|   443k|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (49:13): [True: 5.39k, False: 438k]
  ------------------
   50|  5.39k|            c->error = 1;
   51|   438k|        } else {
   52|   438k|            const unsigned state = *c->ptr++;
   53|   438k|            c->bits_left = 7;
   54|   438k|            c->state = (uint64_t) state << 57;
   55|   438k|            return state >> 7;
   56|   438k|        }
   57|   443k|    }
   58|       |
   59|  2.50M|    const uint64_t state = c->state;
   60|  2.50M|    c->bits_left--;
   61|  2.50M|    c->state = state << 1;
   62|  2.50M|    return (unsigned) (state >> 63);
   63|  2.93M|}
dav1d_get_uleb128:
   95|  58.5k|unsigned dav1d_get_uleb128(GetBits *const c) {
   96|  58.5k|    uint64_t val = 0;
   97|  58.5k|    unsigned i = 0, more;
   98|       |
   99|  61.0k|    do {
  100|  61.0k|        const int v = dav1d_get_bits(c, 8);
  101|  61.0k|        more = v & 0x80;
  102|  61.0k|        val |= ((uint64_t) (v & 0x7F)) << i;
  103|  61.0k|        i += 7;
  104|  61.0k|    } while (more && i < 56);
  ------------------
  |  Branch (104:14): [True: 2.54k, False: 58.4k]
  |  Branch (104:22): [True: 2.48k, False: 59]
  ------------------
  105|       |
  106|  58.5k|    if (val > UINT32_MAX || more) {
  ------------------
  |  Branch (106:9): [True: 225, False: 58.3k]
  |  Branch (106:29): [True: 36, False: 58.2k]
  ------------------
  107|    261|        c->error = 1;
  108|    261|        return 0;
  109|    261|    }
  110|       |
  111|  58.2k|    return (unsigned) val;
  112|  58.5k|}
dav1d_get_uniform:
  114|   110k|unsigned dav1d_get_uniform(GetBits *const c, const unsigned max) {
  115|       |    // Output in range [0..max-1]
  116|       |    // max must be > 1, or else nothing is read from the bitstream
  117|   110k|    assert(max > 1);
  ------------------
  |  Branch (117:5): [True: 110k, False: 0]
  ------------------
  118|   110k|    const int l = ulog2(max) + 1;
  119|   110k|    assert(l > 1);
  ------------------
  |  Branch (119:5): [True: 110k, False: 0]
  ------------------
  120|   110k|    const unsigned m = (1U << l) - max;
  121|   110k|    const unsigned v = dav1d_get_bits(c, l - 1);
  122|   110k|    return v < m ? v : (v << 1) - m + dav1d_get_bit(c);
  ------------------
  |  Branch (122:12): [True: 104k, False: 5.89k]
  ------------------
  123|   110k|}
dav1d_get_vlc:
  125|  1.14k|unsigned dav1d_get_vlc(GetBits *const c) {
  126|  1.14k|    if (dav1d_get_bit(c))
  ------------------
  |  Branch (126:9): [True: 465, False: 676]
  ------------------
  127|    465|        return 0;
  128|       |
  129|    676|    int n_bits = 0;
  130|  9.72k|    do {
  131|  9.72k|        if (++n_bits == 32)
  ------------------
  |  Branch (131:13): [True: 73, False: 9.64k]
  ------------------
  132|     73|            return UINT32_MAX;
  133|  9.72k|    } while (!dav1d_get_bit(c));
  ------------------
  |  Branch (133:14): [True: 9.04k, False: 603]
  ------------------
  134|       |
  135|    603|    return ((1U << n_bits) - 1) + dav1d_get_bits(c, n_bits);
  136|    676|}
dav1d_get_bits_subexp:
  162|  40.7k|int dav1d_get_bits_subexp(GetBits *const c, const int ref, const unsigned n) {
  163|  40.7k|    return (int) get_bits_subexp_u(c, ref + (1 << n), 2 << n) - (1 << n);
  164|  40.7k|}
getbits.c:refill:
   65|   589k|static inline void refill(GetBits *const c, const int n) {
   66|   589k|    assert(c->bits_left >= 0 && c->bits_left < 32);
  ------------------
  |  Branch (66:5): [True: 589k, False: 0]
  |  Branch (66:5): [True: 589k, False: 0]
  ------------------
   67|   589k|    unsigned state = 0;
   68|   645k|    do {
   69|   645k|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (69:13): [True: 18.1k, False: 627k]
  ------------------
   70|  18.1k|            c->error = 1;
   71|  18.1k|            if (state) break;
  ------------------
  |  Branch (71:17): [True: 2.22k, False: 15.8k]
  ------------------
   72|  15.8k|            return;
   73|  18.1k|        }
   74|   627k|        state = (state << 8) | *c->ptr++;
   75|   627k|        c->bits_left += 8;
   76|   627k|    } while (n > c->bits_left);
  ------------------
  |  Branch (76:14): [True: 55.8k, False: 571k]
  ------------------
   77|   574k|    c->state |= (uint64_t) state << (64 - c->bits_left);
   78|   574k|}
getbits.c:get_bits_subexp_u:
  140|  40.7k|{
  141|  40.7k|    unsigned v = 0;
  142|       |
  143|   115k|    for (int i = 0;; i++) {
  144|   115k|        const int b = i ? 3 + i - 1 : 3;
  ------------------
  |  Branch (144:23): [True: 74.2k, False: 40.7k]
  ------------------
  145|       |
  146|   115k|        if (n < v + 3 * (1 << b)) {
  ------------------
  |  Branch (146:13): [True: 5.09k, False: 109k]
  ------------------
  147|  5.09k|            v += dav1d_get_uniform(c, n - v + 1);
  148|  5.09k|            break;
  149|  5.09k|        }
  150|       |
  151|   109k|        if (!dav1d_get_bit(c)) {
  ------------------
  |  Branch (151:13): [True: 35.6k, False: 74.2k]
  ------------------
  152|  35.6k|            v += dav1d_get_bits(c, b);
  153|  35.6k|            break;
  154|  35.6k|        }
  155|       |
  156|  74.2k|        v += 1 << b;
  157|  74.2k|    }
  158|       |
  159|  40.7k|    return ref * 2 <= n ? inv_recenter(ref, v) : n - inv_recenter(n - ref, v);
  ------------------
  |  Branch (159:12): [True: 35.6k, False: 5.13k]
  ------------------
  160|  40.7k|}

obu.c:dav1d_bytealign_get_bits:
   52|   115k|static inline void dav1d_bytealign_get_bits(GetBits *c) {
   53|       |    // bits_left is never more than 7, because it is only incremented
   54|       |    // by refill(), called by dav1d_get_bits and that never reads more
   55|       |    // than 7 bits more than it needs.
   56|       |    //
   57|       |    // If this wasn't true, we would need to work out how many bits to
   58|       |    // discard (bits_left % 8), subtract that from bits_left and then
   59|       |    // shift state right by that amount.
   60|   115k|    assert(c->bits_left <= 7);
  ------------------
  |  Branch (60:5): [True: 115k, False: 0]
  ------------------
   61|       |
   62|   115k|    c->bits_left = 0;
   63|   115k|    c->state = 0;
   64|   115k|}

dav1d_init_intra_edge_tree:
  126|      1|COLD void dav1d_init_intra_edge_tree(void) {
  127|       |    // This function is guaranteed to be called only once
  128|      1|    struct ModeSelMem mem;
  129|       |
  130|      1|    mem.nwc[BL_128X128] = &nodes.branch_sb128[1];
  131|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb128[1 + 4];
  132|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb128[1 + 4 + 16];
  133|      1|    mem.nt = nodes.tip_sb128;
  134|      1|    init_mode_node(nodes.branch_sb128, BL_128X128, &mem, 1, 0);
  135|      1|    assert(mem.nwc[BL_128X128] == &nodes.branch_sb128[1 + 4]);
  ------------------
  |  Branch (135:5): [True: 1, False: 0]
  ------------------
  136|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb128[1 + 4 + 16]);
  ------------------
  |  Branch (136:5): [True: 1, False: 0]
  ------------------
  137|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb128[1 + 4 + 16 + 64]);
  ------------------
  |  Branch (137:5): [True: 1, False: 0]
  ------------------
  138|      1|    assert(mem.nt == &nodes.tip_sb128[256]);
  ------------------
  |  Branch (138:5): [True: 1, False: 0]
  ------------------
  139|       |
  140|      1|    mem.nwc[BL_128X128] = NULL;
  141|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb64[1];
  142|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb64[1 + 4];
  143|      1|    mem.nt = nodes.tip_sb64;
  144|      1|    init_mode_node(nodes.branch_sb64, BL_64X64, &mem, 1, 0);
  145|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb64[1 + 4]);
  ------------------
  |  Branch (145:5): [True: 1, False: 0]
  ------------------
  146|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb64[1 + 4 + 16]);
  ------------------
  |  Branch (146:5): [True: 1, False: 0]
  ------------------
  147|      1|    assert(mem.nt == &nodes.tip_sb64[64]);
  ------------------
  |  Branch (147:5): [True: 1, False: 0]
  ------------------
  148|      1|}
intra_edge.c:init_mode_node:
  101|    106|{
  102|    106|    init_edges(&nwc->node, bl,
  103|    106|               (top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
  ------------------
  |  Branch (103:17): [True: 73, False: 33]
  ------------------
  104|    106|               (left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
  ------------------
  |  Branch (104:17): [True: 33, False: 73]
  ------------------
  105|    106|    if (bl == BL_16X16) {
  ------------------
  |  Branch (105:9): [True: 80, False: 26]
  ------------------
  106|    400|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (106:25): [True: 320, False: 80]
  ------------------
  107|    320|            EdgeTip *const nt = mem->nt++;
  108|    320|            nwc->split_offset[n] = PTR_OFFSET(nwc, nt);
  ------------------
  |  |   94|    320|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  109|    320|            init_edges(&nt->node, bl + 1,
  110|    320|                       ((n == 3 || (n == 1 && !top_has_right)) ? 0 :
  ------------------
  |  Branch (110:26): [True: 80, False: 240]
  |  Branch (110:37): [True: 80, False: 160]
  |  Branch (110:47): [True: 26, False: 54]
  ------------------
  111|    320|                        EDGE_ALL_TOP_HAS_RIGHT) |
  112|    320|                       (!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
  ------------------
  |  Branch (112:27): [True: 80, False: 240]
  |  Branch (112:38): [True: 80, False: 160]
  |  Branch (112:48): [True: 26, False: 54]
  ------------------
  113|    320|                        EDGE_ALL_LEFT_HAS_BOTTOM));
  114|    320|        }
  115|     80|    } else {
  116|    130|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (116:25): [True: 104, False: 26]
  ------------------
  117|    104|            EdgeBranch *const nwc_child = mem->nwc[bl]++;
  118|    104|            nwc->split_offset[n] = PTR_OFFSET(nwc, nwc_child);
  ------------------
  |  |   94|    104|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  119|    104|            init_mode_node(nwc_child, bl + 1, mem,
  120|    104|                           !(n == 3 || (n == 1 && !top_has_right)),
  ------------------
  |  Branch (120:30): [True: 26, False: 78]
  |  Branch (120:41): [True: 26, False: 52]
  |  Branch (120:51): [True: 7, False: 19]
  ------------------
  121|    104|                           n == 0 || (n == 2 && left_has_bottom));
  ------------------
  |  Branch (121:28): [True: 26, False: 78]
  |  Branch (121:39): [True: 26, False: 52]
  |  Branch (121:49): [True: 7, False: 19]
  ------------------
  122|    104|        }
  123|     26|    }
  124|    106|}
intra_edge.c:init_edges:
   58|    426|{
   59|    426|    node->o = edge_flags;
   60|    426|    node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
   61|    426|    node->v[0] = edge_flags | EDGE_ALL_TOP_HAS_RIGHT;
   62|       |
   63|    426|    if (bl == BL_8X8) {
  ------------------
  |  Branch (63:9): [True: 320, False: 106]
  ------------------
   64|    320|        EdgeTip *const nt = (EdgeTip *) node;
   65|       |
   66|    320|        node->h[1] = edge_flags & (EDGE_ALL_LEFT_HAS_BOTTOM |
   67|    320|                                   EDGE_I420_TOP_HAS_RIGHT);
   68|    320|        node->v[1] = edge_flags & (EDGE_ALL_TOP_HAS_RIGHT |
   69|    320|                                   EDGE_I420_LEFT_HAS_BOTTOM |
   70|    320|                                   EDGE_I422_LEFT_HAS_BOTTOM);
   71|       |
   72|    320|        nt->split[0] = (edge_flags & EDGE_ALL_TOP_HAS_RIGHT) |
   73|    320|                       EDGE_I422_LEFT_HAS_BOTTOM;
   74|    320|        nt->split[1] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
   75|    320|        nt->split[2] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
   76|    320|                                     EDGE_I420_LEFT_HAS_BOTTOM |
   77|    320|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   78|    320|    } else {
   79|    106|        EdgeBranch *const nwc = (EdgeBranch *) node;
   80|       |
   81|    106|        node->h[1] = edge_flags & EDGE_ALL_LEFT_HAS_BOTTOM;
   82|    106|        node->v[1] = edge_flags & EDGE_ALL_TOP_HAS_RIGHT;
   83|       |
   84|    106|        nwc->h4 = EDGE_ALL_LEFT_HAS_BOTTOM;
   85|    106|        nwc->v4 = EDGE_ALL_TOP_HAS_RIGHT;
   86|    106|        if (bl == BL_16X16) {
  ------------------
  |  Branch (86:13): [True: 80, False: 26]
  ------------------
   87|     80|            nwc->h4 |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
   88|     80|            nwc->v4 |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
   89|     80|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   90|     80|        }
   91|    106|    }
   92|    426|}

recon_tmpl.c:sm_flag:
   95|  4.94M|static inline int sm_flag(const BlockContext *const b, const int idx) {
   96|  4.94M|    if (!b->intra[idx]) return 0;
  ------------------
  |  Branch (96:9): [True: 228k, False: 4.72M]
  ------------------
   97|  4.72M|    const enum IntraPredMode m = b->mode[idx];
   98|  4.72M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (98:13): [True: 311k, False: 4.40M]
  |  Branch (98:33): [True: 106k, False: 4.30M]
  ------------------
   99|  4.30M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   496k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (99:13): [True: 78.2k, False: 4.22M]
  ------------------
  100|  4.94M|}
recon_tmpl.c:sm_uv_flag:
  102|  3.30M|static inline int sm_uv_flag(const BlockContext *const b, const int idx) {
  103|  3.30M|    const enum IntraPredMode m = b->uvmode[idx];
  104|  3.30M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (104:13): [True: 227k, False: 3.07M]
  |  Branch (104:33): [True: 90.5k, False: 2.98M]
  ------------------
  105|  2.98M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   365k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (105:13): [True: 47.3k, False: 2.94M]
  ------------------
  106|  3.30M|}

dav1d_prepare_intra_edges_8bpc:
   86|  5.84M|{
   87|  5.84M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   58|  5.84M|#define bitdepth_from_max(x) 8
  ------------------
   88|  5.84M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 5.84M, False: 0]
  |  Branch (88:5): [True: 5.84M, False: 0]
  ------------------
   89|       |
   90|  5.84M|    switch (mode) {
   91|   155k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 155k, False: 5.69M]
  ------------------
   92|   448k|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 293k, False: 5.55M]
  ------------------
   93|   504k|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 55.6k, False: 5.79M]
  ------------------
   94|   561k|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 57.3k, False: 5.79M]
  ------------------
   95|   617k|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 55.6k, False: 5.79M]
  ------------------
   96|   687k|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 70.3k, False: 5.77M]
  ------------------
   97|   808k|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 121k, False: 5.72M]
  ------------------
   98|   892k|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 83.3k, False: 5.76M]
  ------------------
   99|   892k|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|   892k|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 260k, False: 632k]
  ------------------
  102|   260k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 167k, False: 92.3k]
  |  Branch (102:35): [True: 144k, False: 23.1k]
  ------------------
  103|   632k|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 275k, False: 356k]
  ------------------
  104|   275k|            mode = Z2_PRED;
  105|   356k|        else
  106|   356k|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 181k, False: 175k]
  |  Branch (106:36): [True: 178k, False: 2.82k]
  ------------------
  107|   892k|        break;
  108|   808k|    }
  109|  3.55M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 3.55M, False: 2.29M]
  ------------------
  110|  4.30M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 744k, False: 5.10M]
  ------------------
  111|  4.30M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  4.30M|        break;
  113|   656k|    default:
  ------------------
  |  Branch (113:5): [True: 656k, False: 5.19M]
  ------------------
  114|   656k|        break;
  115|  5.84M|    }
  116|       |
  117|  5.84M|    const pixel *dst_top;
  118|  5.84M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 5.14M, False: 701k]
  ------------------
  119|  5.14M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 4.84M, False: 302k]
  ------------------
  120|   302k|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 146k, False: 155k]
  ------------------
  121|   155k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 155k, False: 0]
  |  Branch (121:58): [True: 4.92k, False: 150k]
  ------------------
  122|  4.99M|    {
  123|  4.99M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 322k, False: 4.67M]
  ------------------
  124|   322k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  4.67M|        } else {
  126|  4.67M|            dst_top = &dst[-PXSTRIDE(stride)];
  ------------------
  |  |   53|  4.67M|#define PXSTRIDE(x) (x)
  ------------------
  127|  4.67M|        }
  128|  4.99M|    }
  129|       |
  130|  5.84M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 5.29M, False: 556k]
  ------------------
  131|  5.29M|        const int sz = th << 2;
  132|  5.29M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  5.29M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 5.26M, False: 24.8k]
  ------------------
  135|  5.26M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  62.1M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 56.9M, False: 5.26M]
  ------------------
  138|  56.9M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  ------------------
  |  |   53|  56.9M|#define PXSTRIDE(x) (x)
  ------------------
  139|  5.26M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 130k, False: 5.13M]
  ------------------
  140|   130k|                pixel_set(left, left[sz - px_have], sz - px_have);
  ------------------
  |  |   48|   130k|#define pixel_set memset
  ------------------
  141|  5.26M|        } else {
  142|  24.8k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  |   48|  24.8k|#define pixel_set memset
  ------------------
  |  Branch (142:29): [True: 17.5k, False: 7.35k]
  ------------------
  143|  24.8k|        }
  144|       |
  145|  5.29M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 178k, False: 5.11M]
  ------------------
  146|   178k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 0, False: 178k]
  |  Branch (146:56): [True: 27.4k, False: 150k]
  ------------------
  147|   178k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   178k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 54.9k, False: 123k]
  ------------------
  150|  54.9k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   621k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 566k, False: 54.9k]
  ------------------
  153|   566k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  ------------------
  |  |   53|   566k|#define PXSTRIDE(x) (x)
  ------------------
  154|  54.9k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 1.41k, False: 53.5k]
  ------------------
  155|  1.41k|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  ------------------
  |  |   48|  1.41k|#define pixel_set memset
  ------------------
  156|   123k|            } else {
  157|   123k|                pixel_set(left - sz, left[0], sz);
  ------------------
  |  |   48|   123k|#define pixel_set memset
  ------------------
  158|   123k|            }
  159|   178k|        }
  160|  5.29M|    }
  161|       |
  162|  5.84M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 5.02M, False: 822k]
  ------------------
  163|  5.02M|        const int sz = tw << 2;
  164|  5.02M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  5.02M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 4.84M, False: 181k]
  ------------------
  167|  4.84M|            const int px_have = imin(sz, (w - x) << 2);
  168|  4.84M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   47|  4.84M|#define pixel_copy memcpy
  ------------------
  169|  4.84M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 132k, False: 4.71M]
  ------------------
  170|   132k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  ------------------
  |  |   48|   132k|#define pixel_set memset
  ------------------
  171|  4.84M|        } else {
  172|   181k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  |   48|   181k|#define pixel_set memset
  ------------------
  |  Branch (172:28): [True: 174k, False: 7.31k]
  ------------------
  173|   181k|        }
  174|       |
  175|  5.02M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 144k, False: 4.88M]
  ------------------
  176|   144k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 0, False: 144k]
  |  Branch (176:53): [True: 3.80k, False: 140k]
  ------------------
  177|   144k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   144k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 92.1k, False: 52.5k]
  ------------------
  180|  92.1k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|  92.1k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   47|  92.1k|#define pixel_copy memcpy
  ------------------
  183|  92.1k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 540, False: 91.5k]
  ------------------
  184|    540|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  ------------------
  |  |   48|    540|#define pixel_set memset
  ------------------
  185|    540|                              sz - px_have);
  186|  92.1k|            } else {
  187|  52.5k|                pixel_set(top + sz, top[sz - 1], sz);
  ------------------
  |  |   48|  52.5k|#define pixel_set memset
  ------------------
  188|  52.5k|            }
  189|   144k|        }
  190|  5.02M|    }
  191|       |
  192|  5.84M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 1.35M, False: 4.49M]
  ------------------
  193|  1.35M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 1.34M, False: 10.4k]
  ------------------
  194|  1.34M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 1.24M, False: 100k]
  ------------------
  195|  10.4k|        else
  196|  10.4k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 7.99k, False: 2.46k]
  ------------------
  197|       |
  198|  1.35M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 275k, False: 1.08M]
  |  Branch (198:32): [True: 115k, False: 159k]
  |  Branch (198:48): [True: 20.4k, False: 95.5k]
  ------------------
  199|  20.4k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  20.4k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  1.35M|    }
  202|       |
  203|  5.84M|    return mode;
  204|  5.84M|}
dav1d_prepare_intra_edges_16bpc:
   86|  6.82M|{
   87|  6.82M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   75|  6.82M|#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
  ------------------
   88|  6.82M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 6.82M, False: 0]
  |  Branch (88:5): [True: 6.82M, False: 0]
  ------------------
   89|       |
   90|  6.82M|    switch (mode) {
   91|   211k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 211k, False: 6.61M]
  ------------------
   92|   610k|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 399k, False: 6.42M]
  ------------------
   93|   687k|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 76.4k, False: 6.74M]
  ------------------
   94|   761k|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 74.1k, False: 6.75M]
  ------------------
   95|   828k|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 66.5k, False: 6.75M]
  ------------------
   96|   954k|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 126k, False: 6.69M]
  ------------------
   97|  1.14M|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 187k, False: 6.63M]
  ------------------
   98|  1.24M|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 99.7k, False: 6.72M]
  ------------------
   99|  1.24M|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|  1.24M|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 335k, False: 906k]
  ------------------
  102|   335k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 213k, False: 121k]
  |  Branch (102:35): [True: 163k, False: 50.3k]
  ------------------
  103|   906k|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 404k, False: 501k]
  ------------------
  104|   404k|            mode = Z2_PRED;
  105|   501k|        else
  106|   501k|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 277k, False: 224k]
  |  Branch (106:36): [True: 268k, False: 8.94k]
  ------------------
  107|  1.24M|        break;
  108|  1.14M|    }
  109|  3.87M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 3.87M, False: 2.94M]
  ------------------
  110|  4.68M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 811k, False: 6.01M]
  ------------------
  111|  4.68M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  4.68M|        break;
  113|   896k|    default:
  ------------------
  |  Branch (113:5): [True: 896k, False: 5.93M]
  ------------------
  114|   896k|        break;
  115|  6.82M|    }
  116|       |
  117|  6.82M|    const pixel *dst_top;
  118|  6.82M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 5.42M, False: 1.40M]
  ------------------
  119|  5.42M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 5.05M, False: 368k]
  ------------------
  120|   368k|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 188k, False: 180k]
  ------------------
  121|   180k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 180k, False: 0]
  |  Branch (121:58): [True: 10.0k, False: 169k]
  ------------------
  122|  5.25M|    {
  123|  5.25M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 259k, False: 4.99M]
  ------------------
  124|   259k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  4.99M|        } else {
  126|  4.99M|            dst_top = &dst[-PXSTRIDE(stride)];
  127|  4.99M|        }
  128|  5.25M|    }
  129|       |
  130|  6.82M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 6.19M, False: 634k]
  ------------------
  131|  6.19M|        const int sz = th << 2;
  132|  6.19M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  6.19M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 6.15M, False: 36.9k]
  ------------------
  135|  6.15M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  75.0M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 68.9M, False: 6.15M]
  ------------------
  138|  68.9M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  139|  6.15M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 271k, False: 5.88M]
  ------------------
  140|   271k|                pixel_set(left, left[sz - px_have], sz - px_have);
  141|  6.15M|        } else {
  142|  36.9k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  Branch (142:29): [True: 30.0k, False: 6.96k]
  ------------------
  143|  36.9k|        }
  144|       |
  145|  6.19M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 268k, False: 5.92M]
  ------------------
  146|   268k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 0, False: 268k]
  |  Branch (146:56): [True: 75.7k, False: 192k]
  ------------------
  147|   268k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   268k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 83.1k, False: 185k]
  ------------------
  150|  83.1k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   949k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 866k, False: 83.1k]
  ------------------
  153|   866k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  154|  83.1k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 8.61k, False: 74.5k]
  ------------------
  155|  8.61k|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  156|   185k|            } else {
  157|   185k|                pixel_set(left - sz, left[0], sz);
  158|   185k|            }
  159|   268k|        }
  160|  6.19M|    }
  161|       |
  162|  6.82M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 5.50M, False: 1.32M]
  ------------------
  163|  5.50M|        const int sz = tw << 2;
  164|  5.50M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  5.50M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 5.05M, False: 449k]
  ------------------
  167|  5.05M|            const int px_have = imin(sz, (w - x) << 2);
  168|  5.05M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   65|  5.05M|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  169|  5.05M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 119k, False: 4.93M]
  ------------------
  170|   119k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  171|  5.05M|        } else {
  172|   449k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  Branch (172:28): [True: 442k, False: 6.51k]
  ------------------
  173|   449k|        }
  174|       |
  175|  5.50M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 163k, False: 5.34M]
  ------------------
  176|   163k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 0, False: 163k]
  |  Branch (176:53): [True: 4.77k, False: 158k]
  ------------------
  177|   163k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   163k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 94.6k, False: 68.8k]
  ------------------
  180|  94.6k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|  94.6k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   65|  94.6k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  183|  94.6k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 886, False: 93.7k]
  ------------------
  184|    886|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  185|    886|                              sz - px_have);
  186|  94.6k|            } else {
  187|  68.8k|                pixel_set(top + sz, top[sz - 1], sz);
  188|  68.8k|            }
  189|   163k|        }
  190|  5.50M|    }
  191|       |
  192|  6.82M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 1.64M, False: 5.18M]
  ------------------
  193|  1.64M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 1.62M, False: 17.5k]
  ------------------
  194|  1.62M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 1.37M, False: 249k]
  ------------------
  195|  17.5k|        else
  196|  17.5k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 15.9k, False: 1.56k]
  ------------------
  197|       |
  198|  1.64M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 404k, False: 1.23M]
  |  Branch (198:32): [True: 195k, False: 209k]
  |  Branch (198:48): [True: 47.8k, False: 147k]
  ------------------
  199|  47.8k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  47.8k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  1.64M|    }
  202|       |
  203|  6.82M|    return mode;
  204|  6.82M|}

dav1d_intra_pred_dsp_init_8bpc:
  744|  3.47k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  3.47k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  3.47k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  3.47k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  3.47k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  3.47k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  3.47k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  3.47k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  3.47k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  3.47k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  3.47k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  3.47k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  3.47k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  3.47k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  3.47k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  3.47k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  3.47k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  3.47k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  3.47k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  3.47k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  3.47k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  3.47k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  3.47k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  3.47k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  3.47k|#endif
  782|  3.47k|}
dav1d_intra_pred_dsp_init_16bpc:
  744|  4.68k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  4.68k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  4.68k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  4.68k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  4.68k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  4.68k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  4.68k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  4.68k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  4.68k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  4.68k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  4.68k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  4.68k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  4.68k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  4.68k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  4.68k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  4.68k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  4.68k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  4.68k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  4.68k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  4.68k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  4.68k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  4.68k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  4.68k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  4.68k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  4.68k|#endif
  782|  4.68k|}

itx_1d.c:inv_dct4_1d_internal_c:
   68|  5.03M|{
   69|  5.03M|    assert(stride > 0);
  ------------------
  |  Branch (69:5): [True: 5.03M, False: 0]
  ------------------
   70|  5.03M|    const int in0 = c[0 * stride], in1 = c[1 * stride];
   71|       |
   72|  5.03M|    int t0, t1, t2, t3;
   73|  5.03M|    if (tx64) {
  ------------------
  |  Branch (73:9): [True: 1.47M, False: 3.56M]
  ------------------
   74|  1.47M|        t0 = t1 = (in0 * 181 + 128) >> 8;
   75|  1.47M|        t2 = (in1 * 1567 + 2048) >> 12;
   76|  1.47M|        t3 = (in1 * 3784 + 2048) >> 12;
   77|  3.56M|    } else {
   78|  3.56M|        const int in2 = c[2 * stride], in3 = c[3 * stride];
   79|       |
   80|  3.56M|        t0 = ((in0 + in2) * 181 + 128) >> 8;
   81|  3.56M|        t1 = ((in0 - in2) * 181 + 128) >> 8;
   82|  3.56M|        t2 = ((in1 *  1567         - in3 * (3784 - 4096) + 2048) >> 12) - in3;
   83|  3.56M|        t3 = ((in1 * (3784 - 4096) + in3 *  1567         + 2048) >> 12) + in1;
   84|  3.56M|    }
   85|       |
   86|  5.03M|    c[0 * stride] = CLIP(t0 + t3);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
   87|  5.03M|    c[1 * stride] = CLIP(t1 + t2);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
   88|  5.03M|    c[2 * stride] = CLIP(t1 - t2);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
   89|  5.03M|    c[3 * stride] = CLIP(t0 - t3);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
   90|  5.03M|}
itx_1d.c:inv_dct8_1d_internal_c:
  101|  5.03M|{
  102|  5.03M|    assert(stride > 0);
  ------------------
  |  Branch (102:5): [True: 5.03M, False: 0]
  ------------------
  103|  5.03M|    inv_dct4_1d_internal_c(c, stride << 1, min, max, tx64);
  104|       |
  105|  5.03M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  106|       |
  107|  5.03M|    int t4a, t5a, t6a, t7a;
  108|  5.03M|    if (tx64) {
  ------------------
  |  Branch (108:9): [True: 1.47M, False: 3.56M]
  ------------------
  109|  1.47M|        t4a = (in1 *   799 + 2048) >> 12;
  110|  1.47M|        t5a = (in3 * -2276 + 2048) >> 12;
  111|  1.47M|        t6a = (in3 *  3406 + 2048) >> 12;
  112|  1.47M|        t7a = (in1 *  4017 + 2048) >> 12;
  113|  3.56M|    } else {
  114|  3.56M|        const int in5 = c[5 * stride], in7 = c[7 * stride];
  115|       |
  116|  3.56M|        t4a = ((in1 *   799         - in7 * (4017 - 4096) + 2048) >> 12) - in7;
  117|  3.56M|        t5a =  (in5 *  1703         - in3 *  1138         + 1024) >> 11;
  118|  3.56M|        t6a =  (in5 *  1138         + in3 *  1703         + 1024) >> 11;
  119|  3.56M|        t7a = ((in1 * (4017 - 4096) + in7 *  799          + 2048) >> 12) + in1;
  120|  3.56M|    }
  121|       |
  122|  5.03M|    const int t4  = CLIP(t4a + t5a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  123|  5.03M|              t5a = CLIP(t4a - t5a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  124|  5.03M|    const int t7  = CLIP(t7a + t6a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  125|  5.03M|              t6a = CLIP(t7a - t6a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  126|       |
  127|  5.03M|    const int t5  = ((t6a - t5a) * 181 + 128) >> 8;
  128|  5.03M|    const int t6  = ((t6a + t5a) * 181 + 128) >> 8;
  129|       |
  130|  5.03M|    const int t0 = c[0 * stride];
  131|  5.03M|    const int t1 = c[2 * stride];
  132|  5.03M|    const int t2 = c[4 * stride];
  133|  5.03M|    const int t3 = c[6 * stride];
  134|       |
  135|  5.03M|    c[0 * stride] = CLIP(t0 + t7);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  136|  5.03M|    c[1 * stride] = CLIP(t1 + t6);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  137|  5.03M|    c[2 * stride] = CLIP(t2 + t5);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  138|  5.03M|    c[3 * stride] = CLIP(t3 + t4);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  139|  5.03M|    c[4 * stride] = CLIP(t3 - t4);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  140|  5.03M|    c[5 * stride] = CLIP(t2 - t5);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  141|  5.03M|    c[6 * stride] = CLIP(t1 - t6);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  142|  5.03M|    c[7 * stride] = CLIP(t0 - t7);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  143|  5.03M|}
itx_1d.c:inv_dct16_1d_c:
  242|  1.18M|{
  243|  1.18M|    inv_dct16_1d_internal_c(c, stride, min, max, 0);
  244|  1.18M|}
itx_1d.c:inv_dct16_1d_internal_c:
  154|  5.03M|{
  155|  5.03M|    assert(stride > 0);
  ------------------
  |  Branch (155:5): [True: 5.03M, False: 0]
  ------------------
  156|  5.03M|    inv_dct8_1d_internal_c(c, stride << 1, min, max, tx64);
  157|       |
  158|  5.03M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  159|  5.03M|    const int in5 = c[5 * stride], in7 = c[7 * stride];
  160|       |
  161|  5.03M|    int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
  162|  5.03M|    if (tx64) {
  ------------------
  |  Branch (162:9): [True: 1.47M, False: 3.56M]
  ------------------
  163|  1.47M|        t8a  = (in1 *   401 + 2048) >> 12;
  164|  1.47M|        t9a  = (in7 * -2598 + 2048) >> 12;
  165|  1.47M|        t10a = (in5 *  1931 + 2048) >> 12;
  166|  1.47M|        t11a = (in3 * -1189 + 2048) >> 12;
  167|  1.47M|        t12a = (in3 *  3920 + 2048) >> 12;
  168|  1.47M|        t13a = (in5 *  3612 + 2048) >> 12;
  169|  1.47M|        t14a = (in7 *  3166 + 2048) >> 12;
  170|  1.47M|        t15a = (in1 *  4076 + 2048) >> 12;
  171|  3.56M|    } else {
  172|  3.56M|        const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  173|  3.56M|        const int in13 = c[13 * stride], in15 = c[15 * stride];
  174|       |
  175|  3.56M|        t8a  = ((in1  *   401         - in15 * (4076 - 4096) + 2048) >> 12) - in15;
  176|  3.56M|        t9a  =  (in9  *  1583         - in7  *  1299         + 1024) >> 11;
  177|  3.56M|        t10a = ((in5  *  1931         - in11 * (3612 - 4096) + 2048) >> 12) - in11;
  178|  3.56M|        t11a = ((in13 * (3920 - 4096) - in3  *  1189         + 2048) >> 12) + in13;
  179|  3.56M|        t12a = ((in13 *  1189         + in3  * (3920 - 4096) + 2048) >> 12) + in3;
  180|  3.56M|        t13a = ((in5  * (3612 - 4096) + in11 *  1931         + 2048) >> 12) + in5;
  181|  3.56M|        t14a =  (in9  *  1299         + in7  *  1583         + 1024) >> 11;
  182|  3.56M|        t15a = ((in1  * (4076 - 4096) + in15 *   401         + 2048) >> 12) + in1;
  183|  3.56M|    }
  184|       |
  185|  5.03M|    int t8  = CLIP(t8a  + t9a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  186|  5.03M|    int t9  = CLIP(t8a  - t9a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  187|  5.03M|    int t10 = CLIP(t11a - t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  188|  5.03M|    int t11 = CLIP(t11a + t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  189|  5.03M|    int t12 = CLIP(t12a + t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  190|  5.03M|    int t13 = CLIP(t12a - t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  191|  5.03M|    int t14 = CLIP(t15a - t14a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  192|  5.03M|    int t15 = CLIP(t15a + t14a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  193|       |
  194|  5.03M|    t9a  = ((  t14 *  1567         - t9  * (3784 - 4096)  + 2048) >> 12) - t9;
  195|  5.03M|    t14a = ((  t14 * (3784 - 4096) + t9  *  1567          + 2048) >> 12) + t14;
  196|  5.03M|    t10a = ((-(t13 * (3784 - 4096) + t10 *  1567)         + 2048) >> 12) - t13;
  197|  5.03M|    t13a = ((  t13 *  1567         - t10 * (3784 - 4096)  + 2048) >> 12) - t10;
  198|       |
  199|  5.03M|    t8a  = CLIP(t8   + t11);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  200|  5.03M|    t9   = CLIP(t9a  + t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  201|  5.03M|    t10  = CLIP(t9a  - t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  202|  5.03M|    t11a = CLIP(t8   - t11);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  203|  5.03M|    t12a = CLIP(t15  - t12);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  204|  5.03M|    t13  = CLIP(t14a - t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  205|  5.03M|    t14  = CLIP(t14a + t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  206|  5.03M|    t15a = CLIP(t15  + t12);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  207|       |
  208|  5.03M|    t10a = ((t13  - t10)  * 181 + 128) >> 8;
  209|  5.03M|    t13a = ((t13  + t10)  * 181 + 128) >> 8;
  210|  5.03M|    t11  = ((t12a - t11a) * 181 + 128) >> 8;
  211|  5.03M|    t12  = ((t12a + t11a) * 181 + 128) >> 8;
  212|       |
  213|  5.03M|    const int t0 = c[ 0 * stride];
  214|  5.03M|    const int t1 = c[ 2 * stride];
  215|  5.03M|    const int t2 = c[ 4 * stride];
  216|  5.03M|    const int t3 = c[ 6 * stride];
  217|  5.03M|    const int t4 = c[ 8 * stride];
  218|  5.03M|    const int t5 = c[10 * stride];
  219|  5.03M|    const int t6 = c[12 * stride];
  220|  5.03M|    const int t7 = c[14 * stride];
  221|       |
  222|  5.03M|    c[ 0 * stride] = CLIP(t0 + t15a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  223|  5.03M|    c[ 1 * stride] = CLIP(t1 + t14);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  224|  5.03M|    c[ 2 * stride] = CLIP(t2 + t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  225|  5.03M|    c[ 3 * stride] = CLIP(t3 + t12);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  226|  5.03M|    c[ 4 * stride] = CLIP(t4 + t11);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  227|  5.03M|    c[ 5 * stride] = CLIP(t5 + t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  228|  5.03M|    c[ 6 * stride] = CLIP(t6 + t9);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  229|  5.03M|    c[ 7 * stride] = CLIP(t7 + t8a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  230|  5.03M|    c[ 8 * stride] = CLIP(t7 - t8a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  231|  5.03M|    c[ 9 * stride] = CLIP(t6 - t9);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  232|  5.03M|    c[10 * stride] = CLIP(t5 - t10a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  233|  5.03M|    c[11 * stride] = CLIP(t4 - t11);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  234|  5.03M|    c[12 * stride] = CLIP(t3 - t12);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  235|  5.03M|    c[13 * stride] = CLIP(t2 - t13a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  236|  5.03M|    c[14 * stride] = CLIP(t1 - t14);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  237|  5.03M|    c[15 * stride] = CLIP(t0 - t15a);
  ------------------
  |  |   37|  5.03M|#define CLIP(a) iclip(a, min, max)
  ------------------
  238|  5.03M|}
itx_1d.c:inv_dct32_1d_c:
  432|  2.37M|{
  433|  2.37M|    inv_dct32_1d_internal_c(c, stride, min, max, 0);
  434|  2.37M|}
itx_1d.c:inv_dct32_1d_internal_c:
  249|  3.84M|{
  250|  3.84M|    assert(stride > 0);
  ------------------
  |  Branch (250:5): [True: 3.84M, False: 0]
  ------------------
  251|  3.84M|    inv_dct16_1d_internal_c(c, stride << 1, min, max, tx64);
  252|       |
  253|  3.84M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  254|  3.84M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  255|  3.84M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  256|  3.84M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  257|       |
  258|  3.84M|    int t16a, t17a, t18a, t19a, t20a, t21a, t22a, t23a;
  259|  3.84M|    int t24a, t25a, t26a, t27a, t28a, t29a, t30a, t31a;
  260|  3.84M|    if (tx64) {
  ------------------
  |  Branch (260:9): [True: 1.47M, False: 2.37M]
  ------------------
  261|  1.47M|        t16a = (in1  *   201 + 2048) >> 12;
  262|  1.47M|        t17a = (in15 * -2751 + 2048) >> 12;
  263|  1.47M|        t18a = (in9  *  1751 + 2048) >> 12;
  264|  1.47M|        t19a = (in7  * -1380 + 2048) >> 12;
  265|  1.47M|        t20a = (in5  *   995 + 2048) >> 12;
  266|  1.47M|        t21a = (in11 * -2106 + 2048) >> 12;
  267|  1.47M|        t22a = (in13 *  2440 + 2048) >> 12;
  268|  1.47M|        t23a = (in3  *  -601 + 2048) >> 12;
  269|  1.47M|        t24a = (in3  *  4052 + 2048) >> 12;
  270|  1.47M|        t25a = (in13 *  3290 + 2048) >> 12;
  271|  1.47M|        t26a = (in11 *  3513 + 2048) >> 12;
  272|  1.47M|        t27a = (in5  *  3973 + 2048) >> 12;
  273|  1.47M|        t28a = (in7  *  3857 + 2048) >> 12;
  274|  1.47M|        t29a = (in9  *  3703 + 2048) >> 12;
  275|  1.47M|        t30a = (in15 *  3035 + 2048) >> 12;
  276|  1.47M|        t31a = (in1  *  4091 + 2048) >> 12;
  277|  2.37M|    } else {
  278|  2.37M|        const int in17 = c[17 * stride], in19 = c[19 * stride];
  279|  2.37M|        const int in21 = c[21 * stride], in23 = c[23 * stride];
  280|  2.37M|        const int in25 = c[25 * stride], in27 = c[27 * stride];
  281|  2.37M|        const int in29 = c[29 * stride], in31 = c[31 * stride];
  282|       |
  283|  2.37M|        t16a = ((in1  *   201         - in31 * (4091 - 4096) + 2048) >> 12) - in31;
  284|  2.37M|        t17a = ((in17 * (3035 - 4096) - in15 *  2751         + 2048) >> 12) + in17;
  285|  2.37M|        t18a = ((in9  *  1751         - in23 * (3703 - 4096) + 2048) >> 12) - in23;
  286|  2.37M|        t19a = ((in25 * (3857 - 4096) - in7  *  1380         + 2048) >> 12) + in25;
  287|  2.37M|        t20a = ((in5  *   995         - in27 * (3973 - 4096) + 2048) >> 12) - in27;
  288|  2.37M|        t21a = ((in21 * (3513 - 4096) - in11 *  2106         + 2048) >> 12) + in21;
  289|  2.37M|        t22a =  (in13 *  1220         - in19 *  1645         + 1024) >> 11;
  290|  2.37M|        t23a = ((in29 * (4052 - 4096) - in3  *   601         + 2048) >> 12) + in29;
  291|  2.37M|        t24a = ((in29 *   601         + in3  * (4052 - 4096) + 2048) >> 12) + in3;
  292|  2.37M|        t25a =  (in13 *  1645         + in19 *  1220         + 1024) >> 11;
  293|  2.37M|        t26a = ((in21 *  2106         + in11 * (3513 - 4096) + 2048) >> 12) + in11;
  294|  2.37M|        t27a = ((in5  * (3973 - 4096) + in27 *   995         + 2048) >> 12) + in5;
  295|  2.37M|        t28a = ((in25 *  1380         + in7  * (3857 - 4096) + 2048) >> 12) + in7;
  296|  2.37M|        t29a = ((in9  * (3703 - 4096) + in23 *  1751         + 2048) >> 12) + in9;
  297|  2.37M|        t30a = ((in17 *  2751         + in15 * (3035 - 4096) + 2048) >> 12) + in15;
  298|  2.37M|        t31a = ((in1  * (4091 - 4096) + in31 *   201         + 2048) >> 12) + in1;
  299|  2.37M|    }
  300|       |
  301|  3.84M|    int t16 = CLIP(t16a + t17a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  302|  3.84M|    int t17 = CLIP(t16a - t17a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  303|  3.84M|    int t18 = CLIP(t19a - t18a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  304|  3.84M|    int t19 = CLIP(t19a + t18a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  305|  3.84M|    int t20 = CLIP(t20a + t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  306|  3.84M|    int t21 = CLIP(t20a - t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  307|  3.84M|    int t22 = CLIP(t23a - t22a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  308|  3.84M|    int t23 = CLIP(t23a + t22a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  309|  3.84M|    int t24 = CLIP(t24a + t25a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  310|  3.84M|    int t25 = CLIP(t24a - t25a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  311|  3.84M|    int t26 = CLIP(t27a - t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  312|  3.84M|    int t27 = CLIP(t27a + t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  313|  3.84M|    int t28 = CLIP(t28a + t29a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  314|  3.84M|    int t29 = CLIP(t28a - t29a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  315|  3.84M|    int t30 = CLIP(t31a - t30a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  316|  3.84M|    int t31 = CLIP(t31a + t30a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  317|       |
  318|  3.84M|    t17a = ((  t30 *   799         - t17 * (4017 - 4096)  + 2048) >> 12) - t17;
  319|  3.84M|    t30a = ((  t30 * (4017 - 4096) + t17 *   799          + 2048) >> 12) + t30;
  320|  3.84M|    t18a = ((-(t29 * (4017 - 4096) + t18 *   799)         + 2048) >> 12) - t29;
  321|  3.84M|    t29a = ((  t29 *   799         - t18 * (4017 - 4096)  + 2048) >> 12) - t18;
  322|  3.84M|    t21a =  (  t26 *  1703         - t21 *  1138          + 1024) >> 11;
  323|  3.84M|    t26a =  (  t26 *  1138         + t21 *  1703          + 1024) >> 11;
  324|  3.84M|    t22a =  (-(t25 *  1138         + t22 *  1703        ) + 1024) >> 11;
  325|  3.84M|    t25a =  (  t25 *  1703         - t22 *  1138          + 1024) >> 11;
  326|       |
  327|  3.84M|    t16a = CLIP(t16  + t19);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  328|  3.84M|    t17  = CLIP(t17a + t18a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  329|  3.84M|    t18  = CLIP(t17a - t18a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  330|  3.84M|    t19a = CLIP(t16  - t19);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  331|  3.84M|    t20a = CLIP(t23  - t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  332|  3.84M|    t21  = CLIP(t22a - t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  333|  3.84M|    t22  = CLIP(t22a + t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  334|  3.84M|    t23a = CLIP(t23  + t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  335|  3.84M|    t24a = CLIP(t24  + t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  336|  3.84M|    t25  = CLIP(t25a + t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  337|  3.84M|    t26  = CLIP(t25a - t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  338|  3.84M|    t27a = CLIP(t24  - t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  339|  3.84M|    t28a = CLIP(t31  - t28);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  340|  3.84M|    t29  = CLIP(t30a - t29a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  341|  3.84M|    t30  = CLIP(t30a + t29a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  342|  3.84M|    t31a = CLIP(t31  + t28);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  343|       |
  344|  3.84M|    t18a = ((  t29  *  1567         - t18  * (3784 - 4096)  + 2048) >> 12) - t18;
  345|  3.84M|    t29a = ((  t29  * (3784 - 4096) + t18  *  1567          + 2048) >> 12) + t29;
  346|  3.84M|    t19  = ((  t28a *  1567         - t19a * (3784 - 4096)  + 2048) >> 12) - t19a;
  347|  3.84M|    t28  = ((  t28a * (3784 - 4096) + t19a *  1567          + 2048) >> 12) + t28a;
  348|  3.84M|    t20  = ((-(t27a * (3784 - 4096) + t20a *  1567)         + 2048) >> 12) - t27a;
  349|  3.84M|    t27  = ((  t27a *  1567         - t20a * (3784 - 4096)  + 2048) >> 12) - t20a;
  350|  3.84M|    t21a = ((-(t26  * (3784 - 4096) + t21  *  1567)         + 2048) >> 12) - t26;
  351|  3.84M|    t26a = ((  t26  *  1567         - t21  * (3784 - 4096)  + 2048) >> 12) - t21;
  352|       |
  353|  3.84M|    t16  = CLIP(t16a + t23a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  354|  3.84M|    t17a = CLIP(t17  + t22);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  355|  3.84M|    t18  = CLIP(t18a + t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  356|  3.84M|    t19a = CLIP(t19  + t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  357|  3.84M|    t20a = CLIP(t19  - t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  358|  3.84M|    t21  = CLIP(t18a - t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  359|  3.84M|    t22a = CLIP(t17  - t22);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  360|  3.84M|    t23  = CLIP(t16a - t23a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  361|  3.84M|    t24  = CLIP(t31a - t24a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  362|  3.84M|    t25a = CLIP(t30  - t25);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  363|  3.84M|    t26  = CLIP(t29a - t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  364|  3.84M|    t27a = CLIP(t28  - t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  365|  3.84M|    t28a = CLIP(t28  + t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  366|  3.84M|    t29  = CLIP(t29a + t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  367|  3.84M|    t30a = CLIP(t30  + t25);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  368|  3.84M|    t31  = CLIP(t31a + t24a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  369|       |
  370|  3.84M|    t20  = ((t27a - t20a) * 181 + 128) >> 8;
  371|  3.84M|    t27  = ((t27a + t20a) * 181 + 128) >> 8;
  372|  3.84M|    t21a = ((t26  - t21 ) * 181 + 128) >> 8;
  373|  3.84M|    t26a = ((t26  + t21 ) * 181 + 128) >> 8;
  374|  3.84M|    t22  = ((t25a - t22a) * 181 + 128) >> 8;
  375|  3.84M|    t25  = ((t25a + t22a) * 181 + 128) >> 8;
  376|  3.84M|    t23a = ((t24  - t23 ) * 181 + 128) >> 8;
  377|  3.84M|    t24a = ((t24  + t23 ) * 181 + 128) >> 8;
  378|       |
  379|  3.84M|    const int t0  = c[ 0 * stride];
  380|  3.84M|    const int t1  = c[ 2 * stride];
  381|  3.84M|    const int t2  = c[ 4 * stride];
  382|  3.84M|    const int t3  = c[ 6 * stride];
  383|  3.84M|    const int t4  = c[ 8 * stride];
  384|  3.84M|    const int t5  = c[10 * stride];
  385|  3.84M|    const int t6  = c[12 * stride];
  386|  3.84M|    const int t7  = c[14 * stride];
  387|  3.84M|    const int t8  = c[16 * stride];
  388|  3.84M|    const int t9  = c[18 * stride];
  389|  3.84M|    const int t10 = c[20 * stride];
  390|  3.84M|    const int t11 = c[22 * stride];
  391|  3.84M|    const int t12 = c[24 * stride];
  392|  3.84M|    const int t13 = c[26 * stride];
  393|  3.84M|    const int t14 = c[28 * stride];
  394|  3.84M|    const int t15 = c[30 * stride];
  395|       |
  396|  3.84M|    c[ 0 * stride] = CLIP(t0  + t31);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  397|  3.84M|    c[ 1 * stride] = CLIP(t1  + t30a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  398|  3.84M|    c[ 2 * stride] = CLIP(t2  + t29);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  399|  3.84M|    c[ 3 * stride] = CLIP(t3  + t28a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  400|  3.84M|    c[ 4 * stride] = CLIP(t4  + t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  401|  3.84M|    c[ 5 * stride] = CLIP(t5  + t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  402|  3.84M|    c[ 6 * stride] = CLIP(t6  + t25);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  403|  3.84M|    c[ 7 * stride] = CLIP(t7  + t24a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  404|  3.84M|    c[ 8 * stride] = CLIP(t8  + t23a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  405|  3.84M|    c[ 9 * stride] = CLIP(t9  + t22);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  406|  3.84M|    c[10 * stride] = CLIP(t10 + t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  407|  3.84M|    c[11 * stride] = CLIP(t11 + t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  408|  3.84M|    c[12 * stride] = CLIP(t12 + t19a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  409|  3.84M|    c[13 * stride] = CLIP(t13 + t18);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  410|  3.84M|    c[14 * stride] = CLIP(t14 + t17a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  411|  3.84M|    c[15 * stride] = CLIP(t15 + t16);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  412|  3.84M|    c[16 * stride] = CLIP(t15 - t16);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  413|  3.84M|    c[17 * stride] = CLIP(t14 - t17a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  414|  3.84M|    c[18 * stride] = CLIP(t13 - t18);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  415|  3.84M|    c[19 * stride] = CLIP(t12 - t19a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  416|  3.84M|    c[20 * stride] = CLIP(t11 - t20);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  417|  3.84M|    c[21 * stride] = CLIP(t10 - t21a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  418|  3.84M|    c[22 * stride] = CLIP(t9  - t22);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  419|  3.84M|    c[23 * stride] = CLIP(t8  - t23a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  420|  3.84M|    c[24 * stride] = CLIP(t7  - t24a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  421|  3.84M|    c[25 * stride] = CLIP(t6  - t25);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  422|  3.84M|    c[26 * stride] = CLIP(t5  - t26a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  423|  3.84M|    c[27 * stride] = CLIP(t4  - t27);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  424|  3.84M|    c[28 * stride] = CLIP(t3  - t28a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  425|  3.84M|    c[29 * stride] = CLIP(t2  - t29);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  426|  3.84M|    c[30 * stride] = CLIP(t1  - t30a);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  427|  3.84M|    c[31 * stride] = CLIP(t0  - t31);
  ------------------
  |  |   37|  3.84M|#define CLIP(a) iclip(a, min, max)
  ------------------
  428|  3.84M|}
itx_1d.c:inv_dct64_1d_c:
  438|  1.47M|{
  439|  1.47M|    assert(stride > 0);
  ------------------
  |  Branch (439:5): [True: 1.47M, False: 0]
  ------------------
  440|  1.47M|    inv_dct32_1d_internal_c(c, stride << 1, min, max, 1);
  441|       |
  442|  1.47M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  443|  1.47M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  444|  1.47M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  445|  1.47M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  446|  1.47M|    const int in17 = c[17 * stride], in19 = c[19 * stride];
  447|  1.47M|    const int in21 = c[21 * stride], in23 = c[23 * stride];
  448|  1.47M|    const int in25 = c[25 * stride], in27 = c[27 * stride];
  449|  1.47M|    const int in29 = c[29 * stride], in31 = c[31 * stride];
  450|       |
  451|  1.47M|    int t32a = (in1  *   101 + 2048) >> 12;
  452|  1.47M|    int t33a = (in31 * -2824 + 2048) >> 12;
  453|  1.47M|    int t34a = (in17 *  1660 + 2048) >> 12;
  454|  1.47M|    int t35a = (in15 * -1474 + 2048) >> 12;
  455|  1.47M|    int t36a = (in9  *   897 + 2048) >> 12;
  456|  1.47M|    int t37a = (in23 * -2191 + 2048) >> 12;
  457|  1.47M|    int t38a = (in25 *  2359 + 2048) >> 12;
  458|  1.47M|    int t39a = (in7  *  -700 + 2048) >> 12;
  459|  1.47M|    int t40a = (in5  *   501 + 2048) >> 12;
  460|  1.47M|    int t41a = (in27 * -2520 + 2048) >> 12;
  461|  1.47M|    int t42a = (in21 *  2019 + 2048) >> 12;
  462|  1.47M|    int t43a = (in11 * -1092 + 2048) >> 12;
  463|  1.47M|    int t44a = (in13 *  1285 + 2048) >> 12;
  464|  1.47M|    int t45a = (in19 * -1842 + 2048) >> 12;
  465|  1.47M|    int t46a = (in29 *  2675 + 2048) >> 12;
  466|  1.47M|    int t47a = (in3  *  -301 + 2048) >> 12;
  467|  1.47M|    int t48a = (in3  *  4085 + 2048) >> 12;
  468|  1.47M|    int t49a = (in29 *  3102 + 2048) >> 12;
  469|  1.47M|    int t50a = (in19 *  3659 + 2048) >> 12;
  470|  1.47M|    int t51a = (in13 *  3889 + 2048) >> 12;
  471|  1.47M|    int t52a = (in11 *  3948 + 2048) >> 12;
  472|  1.47M|    int t53a = (in21 *  3564 + 2048) >> 12;
  473|  1.47M|    int t54a = (in27 *  3229 + 2048) >> 12;
  474|  1.47M|    int t55a = (in5  *  4065 + 2048) >> 12;
  475|  1.47M|    int t56a = (in7  *  4036 + 2048) >> 12;
  476|  1.47M|    int t57a = (in25 *  3349 + 2048) >> 12;
  477|  1.47M|    int t58a = (in23 *  3461 + 2048) >> 12;
  478|  1.47M|    int t59a = (in9  *  3996 + 2048) >> 12;
  479|  1.47M|    int t60a = (in15 *  3822 + 2048) >> 12;
  480|  1.47M|    int t61a = (in17 *  3745 + 2048) >> 12;
  481|  1.47M|    int t62a = (in31 *  2967 + 2048) >> 12;
  482|  1.47M|    int t63a = (in1  *  4095 + 2048) >> 12;
  483|       |
  484|  1.47M|    int t32 = CLIP(t32a + t33a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  485|  1.47M|    int t33 = CLIP(t32a - t33a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  486|  1.47M|    int t34 = CLIP(t35a - t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  487|  1.47M|    int t35 = CLIP(t35a + t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  488|  1.47M|    int t36 = CLIP(t36a + t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  489|  1.47M|    int t37 = CLIP(t36a - t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  490|  1.47M|    int t38 = CLIP(t39a - t38a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  491|  1.47M|    int t39 = CLIP(t39a + t38a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  492|  1.47M|    int t40 = CLIP(t40a + t41a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  493|  1.47M|    int t41 = CLIP(t40a - t41a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  494|  1.47M|    int t42 = CLIP(t43a - t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  495|  1.47M|    int t43 = CLIP(t43a + t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  496|  1.47M|    int t44 = CLIP(t44a + t45a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  497|  1.47M|    int t45 = CLIP(t44a - t45a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  498|  1.47M|    int t46 = CLIP(t47a - t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  499|  1.47M|    int t47 = CLIP(t47a + t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  500|  1.47M|    int t48 = CLIP(t48a + t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  501|  1.47M|    int t49 = CLIP(t48a - t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  502|  1.47M|    int t50 = CLIP(t51a - t50a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  503|  1.47M|    int t51 = CLIP(t51a + t50a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  504|  1.47M|    int t52 = CLIP(t52a + t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  505|  1.47M|    int t53 = CLIP(t52a - t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  506|  1.47M|    int t54 = CLIP(t55a - t54a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  507|  1.47M|    int t55 = CLIP(t55a + t54a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  508|  1.47M|    int t56 = CLIP(t56a + t57a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  509|  1.47M|    int t57 = CLIP(t56a - t57a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  510|  1.47M|    int t58 = CLIP(t59a - t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  511|  1.47M|    int t59 = CLIP(t59a + t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  512|  1.47M|    int t60 = CLIP(t60a + t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  513|  1.47M|    int t61 = CLIP(t60a - t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  514|  1.47M|    int t62 = CLIP(t63a - t62a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  515|  1.47M|    int t63 = CLIP(t63a + t62a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  516|       |
  517|  1.47M|    t33a = ((t33 * (4096 - 4076) + t62 *   401         + 2048) >> 12) - t33;
  518|  1.47M|    t34a = ((t34 *  -401         + t61 * (4096 - 4076) + 2048) >> 12) - t61;
  519|  1.47M|    t37a =  (t37 * -1299         + t58 *  1583         + 1024) >> 11;
  520|  1.47M|    t38a =  (t38 * -1583         + t57 * -1299         + 1024) >> 11;
  521|  1.47M|    t41a = ((t41 * (4096 - 3612) + t54 *  1931         + 2048) >> 12) - t41;
  522|  1.47M|    t42a = ((t42 * -1931         + t53 * (4096 - 3612) + 2048) >> 12) - t53;
  523|  1.47M|    t45a = ((t45 * -1189         + t50 * (3920 - 4096) + 2048) >> 12) + t50;
  524|  1.47M|    t46a = ((t46 * (4096 - 3920) + t49 * -1189         + 2048) >> 12) - t46;
  525|  1.47M|    t49a = ((t46 * -1189         + t49 * (3920 - 4096) + 2048) >> 12) + t49;
  526|  1.47M|    t50a = ((t45 * (3920 - 4096) + t50 *  1189         + 2048) >> 12) + t45;
  527|  1.47M|    t53a = ((t42 * (4096 - 3612) + t53 *  1931         + 2048) >> 12) - t42;
  528|  1.47M|    t54a = ((t41 *  1931         + t54 * (3612 - 4096) + 2048) >> 12) + t54;
  529|  1.47M|    t57a =  (t38 * -1299         + t57 *  1583         + 1024) >> 11;
  530|  1.47M|    t58a =  (t37 *  1583         + t58 *  1299         + 1024) >> 11;
  531|  1.47M|    t61a = ((t34 * (4096 - 4076) + t61 *   401         + 2048) >> 12) - t34;
  532|  1.47M|    t62a = ((t33 *   401         + t62 * (4076 - 4096) + 2048) >> 12) + t62;
  533|       |
  534|  1.47M|    t32a = CLIP(t32  + t35);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  535|  1.47M|    t33  = CLIP(t33a + t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  536|  1.47M|    t34  = CLIP(t33a - t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  537|  1.47M|    t35a = CLIP(t32  - t35);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  538|  1.47M|    t36a = CLIP(t39  - t36);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  539|  1.47M|    t37  = CLIP(t38a - t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  540|  1.47M|    t38  = CLIP(t38a + t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  541|  1.47M|    t39a = CLIP(t39  + t36);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  542|  1.47M|    t40a = CLIP(t40  + t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  543|  1.47M|    t41  = CLIP(t41a + t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  544|  1.47M|    t42  = CLIP(t41a - t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  545|  1.47M|    t43a = CLIP(t40  - t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  546|  1.47M|    t44a = CLIP(t47  - t44);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  547|  1.47M|    t45  = CLIP(t46a - t45a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  548|  1.47M|    t46  = CLIP(t46a + t45a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  549|  1.47M|    t47a = CLIP(t47  + t44);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  550|  1.47M|    t48a = CLIP(t48  + t51);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  551|  1.47M|    t49  = CLIP(t49a + t50a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  552|  1.47M|    t50  = CLIP(t49a - t50a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  553|  1.47M|    t51a = CLIP(t48  - t51);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  554|  1.47M|    t52a = CLIP(t55  - t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  555|  1.47M|    t53  = CLIP(t54a - t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  556|  1.47M|    t54  = CLIP(t54a + t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  557|  1.47M|    t55a = CLIP(t55  + t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  558|  1.47M|    t56a = CLIP(t56  + t59);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  559|  1.47M|    t57  = CLIP(t57a + t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  560|  1.47M|    t58  = CLIP(t57a - t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  561|  1.47M|    t59a = CLIP(t56  - t59);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  562|  1.47M|    t60a = CLIP(t63  - t60);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  563|  1.47M|    t61  = CLIP(t62a - t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  564|  1.47M|    t62  = CLIP(t62a + t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  565|  1.47M|    t63a = CLIP(t63  + t60);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  566|       |
  567|  1.47M|    t34a = ((t34  * (4096 - 4017) + t61  *   799         + 2048) >> 12) - t34;
  568|  1.47M|    t35  = ((t35a * (4096 - 4017) + t60a *   799         + 2048) >> 12) - t35a;
  569|  1.47M|    t36  = ((t36a *  -799         + t59a * (4096 - 4017) + 2048) >> 12) - t59a;
  570|  1.47M|    t37a = ((t37  *  -799         + t58  * (4096 - 4017) + 2048) >> 12) - t58;
  571|  1.47M|    t42a =  (t42  * -1138         + t53  *  1703         + 1024) >> 11;
  572|  1.47M|    t43  =  (t43a * -1138         + t52a *  1703         + 1024) >> 11;
  573|  1.47M|    t44  =  (t44a * -1703         + t51a * -1138         + 1024) >> 11;
  574|  1.47M|    t45a =  (t45  * -1703         + t50  * -1138         + 1024) >> 11;
  575|  1.47M|    t50a =  (t45  * -1138         + t50  *  1703         + 1024) >> 11;
  576|  1.47M|    t51  =  (t44a * -1138         + t51a *  1703         + 1024) >> 11;
  577|  1.47M|    t52  =  (t43a *  1703         + t52a *  1138         + 1024) >> 11;
  578|  1.47M|    t53a =  (t42  *  1703         + t53  *  1138         + 1024) >> 11;
  579|  1.47M|    t58a = ((t37  * (4096 - 4017) + t58  *   799         + 2048) >> 12) - t37;
  580|  1.47M|    t59  = ((t36a * (4096 - 4017) + t59a *   799         + 2048) >> 12) - t36a;
  581|  1.47M|    t60  = ((t35a *   799         + t60a * (4017 - 4096) + 2048) >> 12) + t60a;
  582|  1.47M|    t61a = ((t34  *   799         + t61  * (4017 - 4096) + 2048) >> 12) + t61;
  583|       |
  584|  1.47M|    t32  = CLIP(t32a + t39a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  585|  1.47M|    t33a = CLIP(t33  + t38);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  586|  1.47M|    t34  = CLIP(t34a + t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  587|  1.47M|    t35a = CLIP(t35  + t36);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  588|  1.47M|    t36a = CLIP(t35  - t36);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  589|  1.47M|    t37  = CLIP(t34a - t37a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  590|  1.47M|    t38a = CLIP(t33  - t38);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  591|  1.47M|    t39  = CLIP(t32a - t39a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  592|  1.47M|    t40  = CLIP(t47a - t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  593|  1.47M|    t41a = CLIP(t46  - t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  594|  1.47M|    t42  = CLIP(t45a - t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  595|  1.47M|    t43a = CLIP(t44  - t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  596|  1.47M|    t44a = CLIP(t44  + t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  597|  1.47M|    t45  = CLIP(t45a + t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  598|  1.47M|    t46a = CLIP(t46  + t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  599|  1.47M|    t47  = CLIP(t47a + t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  600|  1.47M|    t48  = CLIP(t48a + t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  601|  1.47M|    t49a = CLIP(t49  + t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  602|  1.47M|    t50  = CLIP(t50a + t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  603|  1.47M|    t51a = CLIP(t51  + t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  604|  1.47M|    t52a = CLIP(t51  - t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  605|  1.47M|    t53  = CLIP(t50a - t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  606|  1.47M|    t54a = CLIP(t49  - t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  607|  1.47M|    t55  = CLIP(t48a - t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  608|  1.47M|    t56  = CLIP(t63a - t56a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  609|  1.47M|    t57a = CLIP(t62  - t57);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  610|  1.47M|    t58  = CLIP(t61a - t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  611|  1.47M|    t59a = CLIP(t60  - t59);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  612|  1.47M|    t60a = CLIP(t60  + t59);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  613|  1.47M|    t61  = CLIP(t61a + t58a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  614|  1.47M|    t62a = CLIP(t62  + t57);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  615|  1.47M|    t63  = CLIP(t63a + t56a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  616|       |
  617|  1.47M|    t36  = ((t36a * (4096 - 3784) + t59a *  1567         + 2048) >> 12) - t36a;
  618|  1.47M|    t37a = ((t37  * (4096 - 3784) + t58  *  1567         + 2048) >> 12) - t37;
  619|  1.47M|    t38  = ((t38a * (4096 - 3784) + t57a *  1567         + 2048) >> 12) - t38a;
  620|  1.47M|    t39a = ((t39  * (4096 - 3784) + t56  *  1567         + 2048) >> 12) - t39;
  621|  1.47M|    t40a = ((t40  * -1567         + t55  * (4096 - 3784) + 2048) >> 12) - t55;
  622|  1.47M|    t41  = ((t41a * -1567         + t54a * (4096 - 3784) + 2048) >> 12) - t54a;
  623|  1.47M|    t42a = ((t42  * -1567         + t53  * (4096 - 3784) + 2048) >> 12) - t53;
  624|  1.47M|    t43  = ((t43a * -1567         + t52a * (4096 - 3784) + 2048) >> 12) - t52a;
  625|  1.47M|    t52  = ((t43a * (4096 - 3784) + t52a *  1567         + 2048) >> 12) - t43a;
  626|  1.47M|    t53a = ((t42  * (4096 - 3784) + t53  *  1567         + 2048) >> 12) - t42;
  627|  1.47M|    t54  = ((t41a * (4096 - 3784) + t54a *  1567         + 2048) >> 12) - t41a;
  628|  1.47M|    t55a = ((t40  * (4096 - 3784) + t55  *  1567         + 2048) >> 12) - t40;
  629|  1.47M|    t56a = ((t39  *  1567         + t56  * (3784 - 4096) + 2048) >> 12) + t56;
  630|  1.47M|    t57  = ((t38a *  1567         + t57a * (3784 - 4096) + 2048) >> 12) + t57a;
  631|  1.47M|    t58a = ((t37  *  1567         + t58  * (3784 - 4096) + 2048) >> 12) + t58;
  632|  1.47M|    t59  = ((t36a *  1567         + t59a * (3784 - 4096) + 2048) >> 12) + t59a;
  633|       |
  634|  1.47M|    t32a = CLIP(t32  + t47);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  635|  1.47M|    t33  = CLIP(t33a + t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  636|  1.47M|    t34a = CLIP(t34  + t45);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  637|  1.47M|    t35  = CLIP(t35a + t44a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  638|  1.47M|    t36a = CLIP(t36  + t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  639|  1.47M|    t37  = CLIP(t37a + t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  640|  1.47M|    t38a = CLIP(t38  + t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  641|  1.47M|    t39  = CLIP(t39a + t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  642|  1.47M|    t40  = CLIP(t39a - t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  643|  1.47M|    t41a = CLIP(t38  - t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  644|  1.47M|    t42  = CLIP(t37a - t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  645|  1.47M|    t43a = CLIP(t36  - t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  646|  1.47M|    t44  = CLIP(t35a - t44a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  647|  1.47M|    t45a = CLIP(t34  - t45);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  648|  1.47M|    t46  = CLIP(t33a - t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  649|  1.47M|    t47a = CLIP(t32  - t47);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  650|  1.47M|    t48a = CLIP(t63  - t48);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  651|  1.47M|    t49  = CLIP(t62a - t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  652|  1.47M|    t50a = CLIP(t61  - t50);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  653|  1.47M|    t51  = CLIP(t60a - t51a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  654|  1.47M|    t52a = CLIP(t59  - t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  655|  1.47M|    t53  = CLIP(t58a - t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  656|  1.47M|    t54a = CLIP(t57  - t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  657|  1.47M|    t55  = CLIP(t56a - t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  658|  1.47M|    t56  = CLIP(t56a + t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  659|  1.47M|    t57a = CLIP(t57  + t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  660|  1.47M|    t58  = CLIP(t58a + t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  661|  1.47M|    t59a = CLIP(t59  + t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  662|  1.47M|    t60  = CLIP(t60a + t51a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  663|  1.47M|    t61a = CLIP(t61  + t50);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  664|  1.47M|    t62  = CLIP(t62a + t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  665|  1.47M|    t63a = CLIP(t63  + t48);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  666|       |
  667|  1.47M|    t40a = ((t55  - t40 ) * 181 + 128) >> 8;
  668|  1.47M|    t41  = ((t54a - t41a) * 181 + 128) >> 8;
  669|  1.47M|    t42a = ((t53  - t42 ) * 181 + 128) >> 8;
  670|  1.47M|    t43  = ((t52a - t43a) * 181 + 128) >> 8;
  671|  1.47M|    t44a = ((t51  - t44 ) * 181 + 128) >> 8;
  672|  1.47M|    t45  = ((t50a - t45a) * 181 + 128) >> 8;
  673|  1.47M|    t46a = ((t49  - t46 ) * 181 + 128) >> 8;
  674|  1.47M|    t47  = ((t48a - t47a) * 181 + 128) >> 8;
  675|  1.47M|    t48  = ((t47a + t48a) * 181 + 128) >> 8;
  676|  1.47M|    t49a = ((t46  + t49 ) * 181 + 128) >> 8;
  677|  1.47M|    t50  = ((t45a + t50a) * 181 + 128) >> 8;
  678|  1.47M|    t51a = ((t44  + t51 ) * 181 + 128) >> 8;
  679|  1.47M|    t52  = ((t43a + t52a) * 181 + 128) >> 8;
  680|  1.47M|    t53a = ((t42  + t53 ) * 181 + 128) >> 8;
  681|  1.47M|    t54  = ((t41a + t54a) * 181 + 128) >> 8;
  682|  1.47M|    t55a = ((t40  + t55 ) * 181 + 128) >> 8;
  683|       |
  684|  1.47M|    const int t0  = c[ 0 * stride];
  685|  1.47M|    const int t1  = c[ 2 * stride];
  686|  1.47M|    const int t2  = c[ 4 * stride];
  687|  1.47M|    const int t3  = c[ 6 * stride];
  688|  1.47M|    const int t4  = c[ 8 * stride];
  689|  1.47M|    const int t5  = c[10 * stride];
  690|  1.47M|    const int t6  = c[12 * stride];
  691|  1.47M|    const int t7  = c[14 * stride];
  692|  1.47M|    const int t8  = c[16 * stride];
  693|  1.47M|    const int t9  = c[18 * stride];
  694|  1.47M|    const int t10 = c[20 * stride];
  695|  1.47M|    const int t11 = c[22 * stride];
  696|  1.47M|    const int t12 = c[24 * stride];
  697|  1.47M|    const int t13 = c[26 * stride];
  698|  1.47M|    const int t14 = c[28 * stride];
  699|  1.47M|    const int t15 = c[30 * stride];
  700|  1.47M|    const int t16 = c[32 * stride];
  701|  1.47M|    const int t17 = c[34 * stride];
  702|  1.47M|    const int t18 = c[36 * stride];
  703|  1.47M|    const int t19 = c[38 * stride];
  704|  1.47M|    const int t20 = c[40 * stride];
  705|  1.47M|    const int t21 = c[42 * stride];
  706|  1.47M|    const int t22 = c[44 * stride];
  707|  1.47M|    const int t23 = c[46 * stride];
  708|  1.47M|    const int t24 = c[48 * stride];
  709|  1.47M|    const int t25 = c[50 * stride];
  710|  1.47M|    const int t26 = c[52 * stride];
  711|  1.47M|    const int t27 = c[54 * stride];
  712|  1.47M|    const int t28 = c[56 * stride];
  713|  1.47M|    const int t29 = c[58 * stride];
  714|  1.47M|    const int t30 = c[60 * stride];
  715|  1.47M|    const int t31 = c[62 * stride];
  716|       |
  717|  1.47M|    c[ 0 * stride] = CLIP(t0  + t63a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  718|  1.47M|    c[ 1 * stride] = CLIP(t1  + t62);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  719|  1.47M|    c[ 2 * stride] = CLIP(t2  + t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  720|  1.47M|    c[ 3 * stride] = CLIP(t3  + t60);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  721|  1.47M|    c[ 4 * stride] = CLIP(t4  + t59a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  722|  1.47M|    c[ 5 * stride] = CLIP(t5  + t58);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  723|  1.47M|    c[ 6 * stride] = CLIP(t6  + t57a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  724|  1.47M|    c[ 7 * stride] = CLIP(t7  + t56);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  725|  1.47M|    c[ 8 * stride] = CLIP(t8  + t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  726|  1.47M|    c[ 9 * stride] = CLIP(t9  + t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  727|  1.47M|    c[10 * stride] = CLIP(t10 + t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  728|  1.47M|    c[11 * stride] = CLIP(t11 + t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  729|  1.47M|    c[12 * stride] = CLIP(t12 + t51a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  730|  1.47M|    c[13 * stride] = CLIP(t13 + t50);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  731|  1.47M|    c[14 * stride] = CLIP(t14 + t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  732|  1.47M|    c[15 * stride] = CLIP(t15 + t48);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  733|  1.47M|    c[16 * stride] = CLIP(t16 + t47);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  734|  1.47M|    c[17 * stride] = CLIP(t17 + t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  735|  1.47M|    c[18 * stride] = CLIP(t18 + t45);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  736|  1.47M|    c[19 * stride] = CLIP(t19 + t44a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  737|  1.47M|    c[20 * stride] = CLIP(t20 + t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  738|  1.47M|    c[21 * stride] = CLIP(t21 + t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  739|  1.47M|    c[22 * stride] = CLIP(t22 + t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  740|  1.47M|    c[23 * stride] = CLIP(t23 + t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  741|  1.47M|    c[24 * stride] = CLIP(t24 + t39);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  742|  1.47M|    c[25 * stride] = CLIP(t25 + t38a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  743|  1.47M|    c[26 * stride] = CLIP(t26 + t37);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  744|  1.47M|    c[27 * stride] = CLIP(t27 + t36a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  745|  1.47M|    c[28 * stride] = CLIP(t28 + t35);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  746|  1.47M|    c[29 * stride] = CLIP(t29 + t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  747|  1.47M|    c[30 * stride] = CLIP(t30 + t33);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  748|  1.47M|    c[31 * stride] = CLIP(t31 + t32a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  749|  1.47M|    c[32 * stride] = CLIP(t31 - t32a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  750|  1.47M|    c[33 * stride] = CLIP(t30 - t33);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  751|  1.47M|    c[34 * stride] = CLIP(t29 - t34a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  752|  1.47M|    c[35 * stride] = CLIP(t28 - t35);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  753|  1.47M|    c[36 * stride] = CLIP(t27 - t36a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  754|  1.47M|    c[37 * stride] = CLIP(t26 - t37);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  755|  1.47M|    c[38 * stride] = CLIP(t25 - t38a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  756|  1.47M|    c[39 * stride] = CLIP(t24 - t39);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  757|  1.47M|    c[40 * stride] = CLIP(t23 - t40a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  758|  1.47M|    c[41 * stride] = CLIP(t22 - t41);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  759|  1.47M|    c[42 * stride] = CLIP(t21 - t42a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  760|  1.47M|    c[43 * stride] = CLIP(t20 - t43);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  761|  1.47M|    c[44 * stride] = CLIP(t19 - t44a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  762|  1.47M|    c[45 * stride] = CLIP(t18 - t45);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  763|  1.47M|    c[46 * stride] = CLIP(t17 - t46a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  764|  1.47M|    c[47 * stride] = CLIP(t16 - t47);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  765|  1.47M|    c[48 * stride] = CLIP(t15 - t48);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  766|  1.47M|    c[49 * stride] = CLIP(t14 - t49a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  767|  1.47M|    c[50 * stride] = CLIP(t13 - t50);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  768|  1.47M|    c[51 * stride] = CLIP(t12 - t51a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  769|  1.47M|    c[52 * stride] = CLIP(t11 - t52);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  770|  1.47M|    c[53 * stride] = CLIP(t10 - t53a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  771|  1.47M|    c[54 * stride] = CLIP(t9  - t54);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  772|  1.47M|    c[55 * stride] = CLIP(t8  - t55a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  773|  1.47M|    c[56 * stride] = CLIP(t7  - t56);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  774|  1.47M|    c[57 * stride] = CLIP(t6  - t57a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  775|  1.47M|    c[58 * stride] = CLIP(t5  - t58);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  776|  1.47M|    c[59 * stride] = CLIP(t4  - t59a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  777|  1.47M|    c[60 * stride] = CLIP(t3  - t60);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  778|  1.47M|    c[61 * stride] = CLIP(t2  - t61a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  779|  1.47M|    c[62 * stride] = CLIP(t1  - t62);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  780|  1.47M|    c[63 * stride] = CLIP(t0  - t63a);
  ------------------
  |  |   37|  1.47M|#define CLIP(a) iclip(a, min, max)
  ------------------
  781|  1.47M|}

dav1d_itx_dsp_init_8bpc:
  220|  3.47k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  3.47k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  3.47k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  3.47k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  3.47k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  3.47k|
  264|  3.47k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  3.47k|  ARCH_AARCH64 || \
  266|  3.47k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  3.47k|))
  268|  3.47k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  3.47k|#endif
  270|  3.47k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  3.47k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  3.47k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  3.47k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  3.47k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  3.47k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  3.47k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  3.47k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  3.47k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  3.47k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.47k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.47k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.47k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.47k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  3.47k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  3.47k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  3.47k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  3.47k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  3.47k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  3.47k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  3.47k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  3.47k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  3.47k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  3.47k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  3.47k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  3.47k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  3.47k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  3.47k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  3.47k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  3.47k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  3.47k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  3.47k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.47k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  3.47k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  3.47k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  3.47k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  3.47k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.47k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  3.47k|    int all_simd = 0;
  291|  3.47k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  3.47k|#if ARCH_X86
  305|  3.47k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  3.47k|#endif
  307|  3.47k|#endif
  308|       |
  309|  3.47k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 0, False: 3.47k]
  ------------------
  310|      0|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  3.47k|}
itx_tmpl.c:inv_txfm_add_c:
   47|   144k|{
   48|   144k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
   49|   144k|    const int w = 4 * t_dim->w, h = 4 * t_dim->h;
   50|   144k|    const int has_dconly = txtp == DCT_DCT;
   51|   144k|    assert(w >= 4 && w <= 64);
  ------------------
  |  Branch (51:5): [True: 144k, False: 0]
  |  Branch (51:5): [True: 144k, False: 0]
  ------------------
   52|   144k|    assert(h >= 4 && h <= 64);
  ------------------
  |  Branch (52:5): [True: 144k, False: 0]
  |  Branch (52:5): [True: 144k, False: 0]
  ------------------
   53|   144k|    assert(eob >= 0);
  ------------------
  |  Branch (53:5): [True: 144k, False: 0]
  ------------------
   54|       |
   55|   144k|    const int is_rect2 = w * 2 == h || h * 2 == w;
  ------------------
  |  Branch (55:26): [True: 18.5k, False: 125k]
  |  Branch (55:40): [True: 52.6k, False: 73.3k]
  ------------------
   56|   144k|    const int rnd = (1 << shift) >> 1;
   57|       |
   58|   144k|    if (eob < has_dconly) {
  ------------------
  |  Branch (58:9): [True: 40.3k, False: 104k]
  ------------------
   59|  40.3k|        int dc = coeff[0];
   60|  40.3k|        coeff[0] = 0;
   61|  40.3k|        if (is_rect2)
  ------------------
  |  Branch (61:13): [True: 19.0k, False: 21.2k]
  ------------------
   62|  19.0k|            dc = (dc * 181 + 128) >> 8;
   63|  40.3k|        dc = (dc * 181 + 128) >> 8;
   64|  40.3k|        dc = (dc + rnd) >> shift;
   65|  40.3k|        dc = (dc * 181 + 128 + 2048) >> 12;
   66|  1.38M|        for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  1.34M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (66:25): [True: 1.34M, False: 40.3k]
  ------------------
   67|  59.4M|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (67:29): [True: 58.1M, False: 1.34M]
  ------------------
   68|  58.1M|                dst[x] = iclip_pixel(dst[x] + dc);
  ------------------
  |  |   49|  58.1M|#define iclip_pixel iclip_u8
  ------------------
   69|  40.3k|        return;
   70|  40.3k|    }
   71|       |
   72|   104k|    const uint8_t *const txtps = dav1d_tx1d_types[txtp];
   73|   104k|    const itx_1d_fn first_1d_fn = dav1d_tx1d_fns[t_dim->lw][txtps[0]];
   74|   104k|    const itx_1d_fn second_1d_fn = dav1d_tx1d_fns[t_dim->lh][txtps[1]];
   75|   104k|    const int sh = imin(h, 32), sw = imin(w, 32);
   76|   104k|#if BITDEPTH == 8
   77|   104k|    const int row_clip_min = INT16_MIN;
   78|   104k|    const int col_clip_min = INT16_MIN;
   79|       |#else
   80|       |    const int row_clip_min = (int) ((unsigned) ~bitdepth_max << 7);
   81|       |    const int col_clip_min = (int) ((unsigned) ~bitdepth_max << 5);
   82|       |#endif
   83|   104k|    const int row_clip_max = ~row_clip_min;
   84|   104k|    const int col_clip_max = ~col_clip_min;
   85|       |
   86|   104k|    int32_t tmp[64 * 64], *c = tmp;
   87|   104k|    int last_nonzero_col; // in first 1d itx
   88|   104k|    if (txtps[1] == IDENTITY && txtps[0] != IDENTITY) {
  ------------------
  |  Branch (88:9): [True: 0, False: 104k]
  |  Branch (88:33): [True: 0, False: 0]
  ------------------
   89|      0|        last_nonzero_col = imin(sh - 1, eob);
   90|   104k|    } else if (txtps[0] == IDENTITY && txtps[1] != IDENTITY) {
  ------------------
  |  Branch (90:16): [True: 0, False: 104k]
  |  Branch (90:40): [True: 0, False: 0]
  ------------------
   91|      0|        last_nonzero_col = eob >> (t_dim->lw + 2);
   92|   104k|    } else {
   93|   104k|        last_nonzero_col = dav1d_last_nonzero_col_from_eob[tx][eob];
   94|   104k|    }
   95|   104k|    assert(last_nonzero_col < sh);
  ------------------
  |  Branch (95:5): [True: 104k, False: 0]
  ------------------
   96|  1.03M|    for (int y = 0; y <= last_nonzero_col; y++, c += w) {
  ------------------
  |  Branch (96:21): [True: 933k, False: 104k]
  ------------------
   97|   933k|        if (is_rect2)
  ------------------
  |  Branch (97:13): [True: 383k, False: 550k]
  ------------------
   98|  11.4M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (98:29): [True: 11.0M, False: 383k]
  ------------------
   99|  11.0M|                c[x] = (coeff[y + x * sh] * 181 + 128) >> 8;
  100|   550k|        else
  101|  17.9M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (101:29): [True: 17.4M, False: 550k]
  ------------------
  102|  17.4M|                c[x] = coeff[y + x * sh];
  103|   933k|        first_1d_fn(c, 1, row_clip_min, row_clip_max);
  104|   933k|    }
  105|   104k|    if (last_nonzero_col + 1 < sh)
  ------------------
  |  Branch (105:9): [True: 94.0k, False: 10.0k]
  ------------------
  106|  94.0k|        memset(c, 0, sizeof(*c) * (sh - last_nonzero_col - 1) * w);
  107|       |
  108|   104k|    memset(coeff, 0, sizeof(*coeff) * sw * sh);
  109|   113M|    for (int i = 0; i < w * sh; i++)
  ------------------
  |  Branch (109:21): [True: 113M, False: 104k]
  ------------------
  110|   113M|        tmp[i] = iclip((tmp[i] + rnd) >> shift, col_clip_min, col_clip_max);
  111|       |
  112|  4.20M|    for (int x = 0; x < w; x++)
  ------------------
  |  Branch (112:21): [True: 4.09M, False: 104k]
  ------------------
  113|  4.09M|        second_1d_fn(&tmp[x], w, col_clip_min, col_clip_max);
  114|       |
  115|   104k|    c = tmp;
  116|  3.58M|    for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  3.47M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (116:21): [True: 3.47M, False: 104k]
  ------------------
  117|   153M|        for (int x = 0; x < w; x++)
  ------------------
  |  Branch (117:25): [True: 149M, False: 3.47M]
  ------------------
  118|   149M|            dst[x] = iclip_pixel(dst[x] + ((*c++ + 8) >> 4));
  ------------------
  |  |   49|   149M|#define iclip_pixel iclip_u8
  ------------------
  119|   104k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x32_c:
  127|  15.9k|                                               HIGHBD_DECL_SUFFIX) \
  128|  15.9k|{ \
  129|  15.9k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  15.9k|                   HIGHBD_TAIL_SUFFIX); \
  131|  15.9k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x64_c:
  127|  2.01k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.01k|{ \
  129|  2.01k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.01k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.01k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x16_c:
  127|  34.9k|                                               HIGHBD_DECL_SUFFIX) \
  128|  34.9k|{ \
  129|  34.9k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  34.9k|                   HIGHBD_TAIL_SUFFIX); \
  131|  34.9k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x32_c:
  127|  45.9k|                                               HIGHBD_DECL_SUFFIX) \
  128|  45.9k|{ \
  129|  45.9k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  45.9k|                   HIGHBD_TAIL_SUFFIX); \
  131|  45.9k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x64_c:
  127|  2.59k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.59k|{ \
  129|  2.59k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.59k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.59k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x16_c:
  127|  4.22k|                                               HIGHBD_DECL_SUFFIX) \
  128|  4.22k|{ \
  129|  4.22k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  4.22k|                   HIGHBD_TAIL_SUFFIX); \
  131|  4.22k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x32_c:
  127|  17.6k|                                               HIGHBD_DECL_SUFFIX) \
  128|  17.6k|{ \
  129|  17.6k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  17.6k|                   HIGHBD_TAIL_SUFFIX); \
  131|  17.6k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x64_c:
  127|  21.1k|                                               HIGHBD_DECL_SUFFIX) \
  128|  21.1k|{ \
  129|  21.1k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  21.1k|                   HIGHBD_TAIL_SUFFIX); \
  131|  21.1k|}
dav1d_itx_dsp_init_16bpc:
  220|  4.68k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  4.68k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  4.68k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  4.68k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  4.68k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  4.68k|
  264|  4.68k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  4.68k|  ARCH_AARCH64 || \
  266|  4.68k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  4.68k|))
  268|  4.68k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  4.68k|#endif
  270|  4.68k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  4.68k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  4.68k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  4.68k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  4.68k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  4.68k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  4.68k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  4.68k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  4.68k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  4.68k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  4.68k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  4.68k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  4.68k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  4.68k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  4.68k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  4.68k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  4.68k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  4.68k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  4.68k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  4.68k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  4.68k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  4.68k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  4.68k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  4.68k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  4.68k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  4.68k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  4.68k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  4.68k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  4.68k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  4.68k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  4.68k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  4.68k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  4.68k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  4.68k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  4.68k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  4.68k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  4.68k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  4.68k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  4.68k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  4.68k|    int all_simd = 0;
  291|  4.68k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  4.68k|#if ARCH_X86
  305|  4.68k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  4.68k|#endif
  307|  4.68k|#endif
  308|       |
  309|  4.68k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 2.36k, False: 2.31k]
  ------------------
  310|  2.36k|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  4.68k|}

dav1d_copy_lpf_8bpc:
  106|  43.9k|{
  107|  43.9k|    const int have_tt = f->c->n_tc > 1;
  108|  43.9k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|  43.9k|    const int offset = 8 * !!sby;
  110|  43.9k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|  43.9k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|  43.9k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|  43.9k|    pixel *const dst[3] = {
  114|  43.9k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  ------------------
  |  |   53|  43.9k|#define PXSTRIDE(x) (x)
  ------------------
  115|  43.9k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  ------------------
  |  |   53|  43.9k|#define PXSTRIDE(x) (x)
  ------------------
  116|  43.9k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  ------------------
  |  |   53|  43.9k|#define PXSTRIDE(x) (x)
  ------------------
  117|  43.9k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|  43.9k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|  43.9k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 32.6k, False: 11.2k]
  |  Branch (122:29): [True: 8.45k, False: 2.77k]
  ------------------
  123|  41.1k|        const int h = f->cur.p.h;
  124|  41.1k|        const int w = f->bw << 2;
  125|  41.1k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|  41.1k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|  41.1k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 16.4k, False: 24.6k]
  |  Branch (127:46): [True: 23.9k, False: 723]
  ------------------
  128|  40.4k|            backup_lpf(f, dst[0], lr_stride[0],
  129|  40.4k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|  40.4k|#define PXSTRIDE(x) (x)
  ------------------
  130|  40.4k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|  41.1k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 0, False: 41.1k]
  |  Branch (131:24): [True: 0, False: 0]
  ------------------
  132|      0|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  133|      0|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|      0|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  135|      0|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|      0|        }
  137|  41.1k|    }
  138|  43.9k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 32.6k, False: 11.2k]
  |  Branch (138:30): [True: 3.77k, False: 7.46k]
  ------------------
  139|  36.4k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 12.8k, False: 23.6k]
  ------------------
  140|  12.8k|    {
  141|  12.8k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|  12.8k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|  12.8k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|  12.8k|        const int w = f->bw << (2 - ss_hor);
  145|  12.8k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|  12.8k|        const int offset_uv = offset >> ss_ver;
  147|  12.8k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|  12.8k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  ------------------
  |  |   53|  12.8k|#define PXSTRIDE(x) (x)
  ------------------
  149|  12.8k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 9.04k, False: 3.77k]
  |  Branch (149:33): [True: 660, False: 3.11k]
  ------------------
  150|  9.70k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 1.95k, False: 7.75k]
  |  Branch (150:50): [True: 7.20k, False: 550]
  ------------------
  151|  9.15k|                backup_lpf(f, dst[1], lr_stride[1],
  152|  9.15k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  9.15k|#define PXSTRIDE(x) (x)
  ------------------
  153|  9.15k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|  9.15k|                           row_h, w, h, ss_hor, 1);
  155|  9.70k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 0, False: 9.70k]
  |  Branch (155:28): [True: 0, False: 0]
  ------------------
  156|      0|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|      0|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  158|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|      0|                           row_h, w, h, ss_hor, 0);
  160|  9.70k|        }
  161|  12.8k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 9.04k, False: 3.77k]
  |  Branch (161:33): [True: 3.50k, False: 266]
  ------------------
  162|  12.5k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 5.16k, False: 7.38k]
  |  Branch (162:50): [True: 6.47k, False: 905]
  ------------------
  163|  11.6k|                backup_lpf(f, dst[2], lr_stride[1],
  164|  11.6k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  11.6k|#define PXSTRIDE(x) (x)
  ------------------
  165|  11.6k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|  11.6k|                           row_h, w, h, ss_hor, 1);
  167|  12.5k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 0, False: 12.5k]
  |  Branch (167:28): [True: 0, False: 0]
  ------------------
  168|      0|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|      0|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
  170|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|      0|                           row_h, w, h, ss_hor, 0);
  172|  12.5k|        }
  173|  12.8k|    }
  174|  43.9k|}
dav1d_loopfilter_sbrow_cols_8bpc:
  316|  32.3k|{
  317|  32.3k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|  32.3k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|  32.3k|    const int starty4 = (sby & is_sb64) << 4;
  321|  32.3k|    const int sbsz = 32 >> is_sb64;
  322|  32.3k|    const int sbl2 = 5 - is_sb64;
  323|  32.3k|    const int halign = (f->bh + 31) & ~31;
  324|  32.3k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|  32.3k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|  32.3k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|  32.3k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|  32.3k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|  32.3k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|  32.3k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|  32.3k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|  33.4k|    for (int tile_col = 1;; tile_col++) {
  335|  33.4k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|  33.4k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 32.3k, False: 1.09k]
  ------------------
  337|  1.09k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 457, False: 637]
  ------------------
  338|  1.09k|        x >>= is_sb64;
  339|       |
  340|  1.09k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|  26.9k|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 25.8k, False: 1.09k]
  ------------------
  342|  25.8k|            const int sidx = mask >= 0x10000U;
  343|  25.8k|            const unsigned smask = mask >> (sidx << 4);
  344|  25.8k|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|  25.8k|                                !!(y_hmask[1][sidx] & smask);
  346|  25.8k|            y_hmask[2][sidx] &= ~smask;
  347|  25.8k|            y_hmask[1][sidx] &= ~smask;
  348|  25.8k|            y_hmask[0][sidx] &= ~smask;
  349|  25.8k|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|  25.8k|        }
  351|       |
  352|  1.09k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 322, False: 772]
  ------------------
  353|    322|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  4.08k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 3.76k, False: 322]
  ------------------
  355|  3.76k|                 y++, uv_mask <<= 1)
  356|  3.76k|            {
  357|  3.76k|                const int sidx = uv_mask >= vmax;
  358|  3.76k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  3.76k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  3.76k|                uv_hmask[1][sidx] &= ~smask;
  361|  3.76k|                uv_hmask[0][sidx] &= ~smask;
  362|  3.76k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  3.76k|            }
  364|    322|        }
  365|  1.09k|        lpf_y  += halign;
  366|  1.09k|        lpf_uv += halign >> ss_ver;
  367|  1.09k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|  32.3k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 452, False: 31.9k]
  ------------------
  371|    452|        const BlockContext *a;
  372|    452|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  1.86k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 1.41k, False: 452]
  ------------------
  374|  1.41k|        {
  375|  1.41k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  1.41k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  36.8k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 35.4k, False: 1.41k]
  ------------------
  378|  35.4k|                const int sidx = mask >= 0x10000U;
  379|  35.4k|                const unsigned smask = mask >> (sidx << 4);
  380|  35.4k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  35.4k|                                    !!(y_vmask[1][sidx] & smask);
  382|  35.4k|                y_vmask[2][sidx] &= ~smask;
  383|  35.4k|                y_vmask[1][sidx] &= ~smask;
  384|  35.4k|                y_vmask[0][sidx] &= ~smask;
  385|  35.4k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  35.4k|            }
  387|       |
  388|  1.41k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 1.15k, False: 253]
  ------------------
  389|  1.15k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  1.15k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  27.7k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 26.6k, False: 1.15k]
  ------------------
  392|  26.6k|                    const int sidx = uv_mask >= hmax;
  393|  26.6k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  26.6k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  26.6k|                    uv_vmask[1][sidx] &= ~smask;
  396|  26.6k|                    uv_vmask[0][sidx] &= ~smask;
  397|  26.6k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  26.6k|                }
  399|  1.15k|            }
  400|  1.41k|        }
  401|    452|    }
  402|       |
  403|  32.3k|    pixel *ptr;
  404|  32.3k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|  78.5k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 46.1k, False: 32.3k]
  ------------------
  406|  46.1k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|  46.1k|    {
  408|  46.1k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|  46.1k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|  46.1k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|  46.1k|    }
  412|       |
  413|  32.3k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 23.4k, False: 8.91k]
  |  Branch (413:46): [True: 20.1k, False: 3.37k]
  ------------------
  414|  20.1k|        return;
  415|       |
  416|  12.2k|    ptrdiff_t uv_off;
  417|  12.2k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  32.2k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 19.9k, False: 12.2k]
  ------------------
  419|  19.9k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  19.9k|    {
  421|  19.9k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  19.9k|                             lflvl[x].filter_uv[0],
  423|  19.9k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  19.9k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  19.9k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  19.9k|    }
  427|  12.2k|}
dav1d_loopfilter_sbrow_rows_8bpc:
  432|  32.3k|{
  433|  32.3k|    int x;
  434|       |    // Don't filter outside the frame
  435|  32.3k|    const int have_top = sby > 0;
  436|  32.3k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|  32.3k|    const int starty4 = (sby & is_sb64) << 4;
  438|  32.3k|    const int sbsz = 32 >> is_sb64;
  439|  32.3k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|  32.3k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|  32.3k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|  32.3k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|  32.3k|    pixel *ptr;
  445|  32.3k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|  78.5k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 46.1k, False: 32.3k]
  ------------------
  447|  46.1k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|  46.1k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|  46.1k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|  46.1k|    }
  451|       |
  452|  32.3k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 23.4k, False: 8.91k]
  |  Branch (452:46): [True: 20.1k, False: 3.37k]
  ------------------
  453|  20.1k|        return;
  454|       |
  455|  12.2k|    ptrdiff_t uv_off;
  456|  12.2k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  32.2k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 19.9k, False: 12.2k]
  ------------------
  458|  19.9k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  19.9k|    {
  460|  19.9k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  19.9k|                             lflvl[x].filter_uv[1],
  462|  19.9k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  19.9k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  19.9k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  19.9k|    }
  466|  12.2k|}
lf_apply_tmpl.c:backup_lpf:
   47|   110k|{
   48|   110k|    const int cdef_backup = !lr_backup;
   49|   110k|    const int dst_w = f->frame_hdr->super_res.enabled ?
  ------------------
  |  Branch (49:23): [True: 12.3k, False: 97.9k]
  ------------------
   50|  97.9k|                      (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
   51|       |
   52|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   53|   110k|    int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
   54|   110k|    src += (stripe_h - 2) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|   110k|#define PXSTRIDE(x) (x)
  ------------------
   55|       |
   56|   110k|    if (f->c->n_tc == 1) {
  ------------------
  |  Branch (56:9): [True: 110k, False: 0]
  ------------------
   57|   110k|        if (row) {
  ------------------
  |  Branch (57:13): [True: 90.2k, False: 20.0k]
  ------------------
   58|  90.2k|            const int top = 4 << sb128;
   59|       |            // Copy the top part of the stored loop filtered pixels from the
   60|       |            // previous sb row needed above the first stripe of this sb row.
   61|  90.2k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   47|  90.2k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   62|  90.2k|                       &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   63|  90.2k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   47|  90.2k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   64|  90.2k|                       &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   65|  90.2k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   47|  90.2k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   66|  90.2k|                       &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   67|  90.2k|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   47|  90.2k|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   68|  90.2k|                       &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
   69|  90.2k|        }
   70|   110k|        dst += 4 * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   110k|#define PXSTRIDE(x) (x)
  ------------------
   71|   110k|    }
   72|       |
   73|   110k|    if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
  ------------------
  |  Branch (73:9): [True: 110k, False: 0]
  |  Branch (73:22): [True: 9.40k, False: 100k]
  ------------------
   74|  22.8k|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (74:16): [True: 13.4k, False: 9.40k]
  ------------------
   75|  13.4k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   76|  13.4k|            f->dsp->mc.resize(dst, dst_stride, src, src_stride,
   77|  13.4k|                              dst_w, n_lines, src_w, f->resize_step[ss_hor],
   78|  13.4k|                              f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
   79|  13.4k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   80|  13.4k|            stripe_h = 64 >> ss_ver;
   81|  13.4k|            src += stripe_h * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  13.4k|#define PXSTRIDE(x) (x)
  ------------------
   82|  13.4k|            dst += n_lines * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  13.4k|#define PXSTRIDE(x) (x)
  ------------------
   83|  13.4k|            if (n_lines == 3) {
  ------------------
  |  Branch (83:17): [True: 1.02k, False: 12.3k]
  ------------------
   84|  1.02k|                pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   47|  1.02k|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   53|  1.02k|#define PXSTRIDE(x) (x)
  ------------------
   85|  1.02k|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  1.02k|#define PXSTRIDE(x) (x)
  ------------------
   86|  1.02k|            }
   87|  13.4k|        }
   88|   100k|    } else {
   89|   219k|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (89:16): [True: 118k, False: 100k]
  ------------------
   90|   118k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   91|   593k|            for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (91:29): [True: 474k, False: 118k]
  ------------------
   92|   474k|                pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   47|   474k|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   53|    786|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (92:33): [True: 786, False: 473k]
  ------------------
   93|   474k|                                               src, src_w);
   94|   474k|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   474k|#define PXSTRIDE(x) (x)
  ------------------
   95|   474k|                src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|   474k|#define PXSTRIDE(x) (x)
  ------------------
   96|   474k|            }
   97|   118k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   98|   118k|            stripe_h = 64 >> ss_ver;
   99|   118k|            src += (stripe_h - 4) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|   118k|#define PXSTRIDE(x) (x)
  ------------------
  100|   118k|        }
  101|   100k|    }
  102|   110k|}
lf_apply_tmpl.c:filter_plane_cols_y:
  184|  89.5k|{
  185|  89.5k|    const Dav1dDSPContext *const dsp = f->dsp;
  186|       |
  187|       |    // filter edges between columns (e.g. block1 | block2)
  188|  2.34M|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (188:21): [True: 2.25M, False: 89.5k]
  ------------------
  189|  2.25M|        if (!have_left && !x) continue;
  ------------------
  |  Branch (189:13): [True: 1.22M, False: 1.03M]
  |  Branch (189:27): [True: 56.3k, False: 1.16M]
  ------------------
  190|  2.19M|        uint32_t hmask[4];
  191|  2.19M|        if (!starty4) {
  ------------------
  |  Branch (191:13): [True: 1.90M, False: 296k]
  ------------------
  192|  1.90M|            hmask[0] = mask[x][0][0];
  193|  1.90M|            hmask[1] = mask[x][1][0];
  194|  1.90M|            hmask[2] = mask[x][2][0];
  195|  1.90M|            if (endy4 > 16) {
  ------------------
  |  Branch (195:17): [True: 1.48M, False: 411k]
  ------------------
  196|  1.48M|                hmask[0] |= (unsigned) mask[x][0][1] << 16;
  197|  1.48M|                hmask[1] |= (unsigned) mask[x][1][1] << 16;
  198|  1.48M|                hmask[2] |= (unsigned) mask[x][2][1] << 16;
  199|  1.48M|            }
  200|  1.90M|        } else {
  201|   296k|            hmask[0] = mask[x][0][1];
  202|   296k|            hmask[1] = mask[x][1][1];
  203|   296k|            hmask[2] = mask[x][2][1];
  204|   296k|        }
  205|  2.19M|        hmask[3] = 0;
  206|  2.19M|        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
  207|  2.19M|                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
  208|  2.19M|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  209|  2.19M|    }
  210|  89.5k|}
lf_apply_tmpl.c:filter_plane_cols_uv:
  251|  33.1k|{
  252|  33.1k|    const Dav1dDSPContext *const dsp = f->dsp;
  253|       |
  254|       |    // filter edges between columns (e.g. block1 | block2)
  255|   635k|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (255:21): [True: 602k, False: 33.1k]
  ------------------
  256|   602k|        if (!have_left && !x) continue;
  ------------------
  |  Branch (256:13): [True: 312k, False: 290k]
  |  Branch (256:27): [True: 20.4k, False: 292k]
  ------------------
  257|   582k|        uint32_t hmask[3];
  258|   582k|        if (!starty4) {
  ------------------
  |  Branch (258:13): [True: 533k, False: 48.4k]
  ------------------
  259|   533k|            hmask[0] = mask[x][0][0];
  260|   533k|            hmask[1] = mask[x][1][0];
  261|   533k|            if (endy4 > (16 >> ss_ver)) {
  ------------------
  |  Branch (261:17): [True: 435k, False: 98.5k]
  ------------------
  262|   435k|                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
  263|   435k|                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
  264|   435k|            }
  265|   533k|        } else {
  266|  48.4k|            hmask[0] = mask[x][0][1];
  267|  48.4k|            hmask[1] = mask[x][1][1];
  268|  48.4k|        }
  269|   582k|        hmask[2] = 0;
  270|   582k|        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
  271|   582k|                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
  272|   582k|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  273|   582k|        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
  274|   582k|                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
  275|   582k|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  276|   582k|    }
  277|  33.1k|}
lf_apply_tmpl.c:filter_plane_rows_y:
  220|  89.5k|{
  221|  89.5k|    const Dav1dDSPContext *const dsp = f->dsp;
  222|       |
  223|       |    //                                 block1
  224|       |    // filter edges between rows (e.g. ------)
  225|       |    //                                 block2
  226|  2.15M|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (226:27): [True: 2.06M, False: 89.5k]
  ------------------
  227|  2.06M|         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|  2.06M|#define PXSTRIDE(x) (x)
  ------------------
  228|  2.06M|    {
  229|  2.06M|        if (!have_top && !y) continue;
  ------------------
  |  Branch (229:13): [True: 323k, False: 1.74M]
  |  Branch (229:26): [True: 21.3k, False: 302k]
  ------------------
  230|  2.04M|        const uint32_t vmask[4] = {
  231|  2.04M|            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
  232|  2.04M|            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
  233|  2.04M|            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
  234|  2.04M|            0,
  235|  2.04M|        };
  236|  2.04M|        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
  237|  2.04M|                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
  238|  2.04M|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  239|  2.04M|    }
  240|  89.5k|}
lf_apply_tmpl.c:filter_plane_rows_uv:
  288|  33.1k|{
  289|  33.1k|    const Dav1dDSPContext *const dsp = f->dsp;
  290|  33.1k|    ptrdiff_t off_l = 0;
  291|       |
  292|       |    //                                 block1
  293|       |    // filter edges between rows (e.g. ------)
  294|       |    //                                 block2
  295|   638k|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (295:27): [True: 605k, False: 33.1k]
  ------------------
  296|   605k|         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|   605k|#define PXSTRIDE(x) (x)
  ------------------
  297|   605k|    {
  298|   605k|        if (!have_top && !y) continue;
  ------------------
  |  Branch (298:13): [True: 130k, False: 474k]
  |  Branch (298:26): [True: 6.51k, False: 124k]
  ------------------
  299|   598k|        const uint32_t vmask[3] = {
  300|   598k|            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
  301|   598k|            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
  302|   598k|            0,
  303|   598k|        };
  304|   598k|        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
  305|   598k|                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
  306|   598k|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  307|   598k|        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
  308|   598k|                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
  309|   598k|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  310|   598k|    }
  311|  33.1k|}
dav1d_copy_lpf_16bpc:
  106|  30.9k|{
  107|  30.9k|    const int have_tt = f->c->n_tc > 1;
  108|  30.9k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|  30.9k|    const int offset = 8 * !!sby;
  110|  30.9k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|  30.9k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|  30.9k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|  30.9k|    pixel *const dst[3] = {
  114|  30.9k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  115|  30.9k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  116|  30.9k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  117|  30.9k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|  30.9k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|  30.9k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 26.0k, False: 4.95k]
  |  Branch (122:29): [True: 4.12k, False: 830]
  ------------------
  123|  30.1k|        const int h = f->cur.p.h;
  124|  30.1k|        const int w = f->bw << 2;
  125|  30.1k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|  30.1k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|  30.1k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 12.5k, False: 17.5k]
  |  Branch (127:46): [True: 16.7k, False: 791]
  ------------------
  128|  29.3k|            backup_lpf(f, dst[0], lr_stride[0],
  129|  29.3k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  130|  29.3k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|  30.1k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 0, False: 30.1k]
  |  Branch (131:24): [True: 0, False: 0]
  ------------------
  132|      0|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  133|      0|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|      0|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  135|      0|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|      0|        }
  137|  30.1k|    }
  138|  30.9k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 26.0k, False: 4.95k]
  |  Branch (138:30): [True: 1.82k, False: 3.12k]
  ------------------
  139|  27.8k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 11.3k, False: 16.5k]
  ------------------
  140|  11.3k|    {
  141|  11.3k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|  11.3k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|  11.3k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|  11.3k|        const int w = f->bw << (2 - ss_hor);
  145|  11.3k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|  11.3k|        const int offset_uv = offset >> ss_ver;
  147|  11.3k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|  11.3k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  149|  11.3k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 9.50k, False: 1.82k]
  |  Branch (149:33): [True: 1.31k, False: 514]
  ------------------
  150|  10.8k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 2.83k, False: 7.98k]
  |  Branch (150:50): [True: 7.31k, False: 667]
  ------------------
  151|  10.1k|                backup_lpf(f, dst[1], lr_stride[1],
  152|  10.1k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  153|  10.1k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|  10.1k|                           row_h, w, h, ss_hor, 1);
  155|  10.8k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 0, False: 10.8k]
  |  Branch (155:28): [True: 0, False: 0]
  ------------------
  156|      0|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|      0|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  158|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|      0|                           row_h, w, h, ss_hor, 0);
  160|  10.8k|        }
  161|  11.3k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 9.50k, False: 1.82k]
  |  Branch (161:33): [True: 1.21k, False: 613]
  ------------------
  162|  10.7k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 2.66k, False: 8.05k]
  |  Branch (162:50): [True: 6.87k, False: 1.17k]
  ------------------
  163|  9.54k|                backup_lpf(f, dst[2], lr_stride[1],
  164|  9.54k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  165|  9.54k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|  9.54k|                           row_h, w, h, ss_hor, 1);
  167|  10.7k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 0, False: 10.7k]
  |  Branch (167:28): [True: 0, False: 0]
  ------------------
  168|      0|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|      0|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  170|      0|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|      0|                           row_h, w, h, ss_hor, 0);
  172|  10.7k|        }
  173|  11.3k|    }
  174|  30.9k|}
dav1d_loopfilter_sbrow_cols_16bpc:
  316|  24.0k|{
  317|  24.0k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|  24.0k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|  24.0k|    const int starty4 = (sby & is_sb64) << 4;
  321|  24.0k|    const int sbsz = 32 >> is_sb64;
  322|  24.0k|    const int sbl2 = 5 - is_sb64;
  323|  24.0k|    const int halign = (f->bh + 31) & ~31;
  324|  24.0k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|  24.0k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|  24.0k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|  24.0k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|  24.0k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|  24.0k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|  24.0k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|  24.0k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|  25.1k|    for (int tile_col = 1;; tile_col++) {
  335|  25.1k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|  25.1k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 24.0k, False: 1.12k]
  ------------------
  337|  1.12k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 540, False: 582]
  ------------------
  338|  1.12k|        x >>= is_sb64;
  339|       |
  340|  1.12k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|  27.3k|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 26.2k, False: 1.12k]
  ------------------
  342|  26.2k|            const int sidx = mask >= 0x10000U;
  343|  26.2k|            const unsigned smask = mask >> (sidx << 4);
  344|  26.2k|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|  26.2k|                                !!(y_hmask[1][sidx] & smask);
  346|  26.2k|            y_hmask[2][sidx] &= ~smask;
  347|  26.2k|            y_hmask[1][sidx] &= ~smask;
  348|  26.2k|            y_hmask[0][sidx] &= ~smask;
  349|  26.2k|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|  26.2k|        }
  351|       |
  352|  1.12k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 409, False: 713]
  ------------------
  353|    409|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  4.28k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 3.87k, False: 409]
  ------------------
  355|  3.87k|                 y++, uv_mask <<= 1)
  356|  3.87k|            {
  357|  3.87k|                const int sidx = uv_mask >= vmax;
  358|  3.87k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  3.87k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  3.87k|                uv_hmask[1][sidx] &= ~smask;
  361|  3.87k|                uv_hmask[0][sidx] &= ~smask;
  362|  3.87k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  3.87k|            }
  364|    409|        }
  365|  1.12k|        lpf_y  += halign;
  366|  1.12k|        lpf_uv += halign >> ss_ver;
  367|  1.12k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|  24.0k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 450, False: 23.5k]
  ------------------
  371|    450|        const BlockContext *a;
  372|    450|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  2.07k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 1.62k, False: 450]
  ------------------
  374|  1.62k|        {
  375|  1.62k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  1.62k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  43.7k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 42.1k, False: 1.62k]
  ------------------
  378|  42.1k|                const int sidx = mask >= 0x10000U;
  379|  42.1k|                const unsigned smask = mask >> (sidx << 4);
  380|  42.1k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  42.1k|                                    !!(y_vmask[1][sidx] & smask);
  382|  42.1k|                y_vmask[2][sidx] &= ~smask;
  383|  42.1k|                y_vmask[1][sidx] &= ~smask;
  384|  42.1k|                y_vmask[0][sidx] &= ~smask;
  385|  42.1k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  42.1k|            }
  387|       |
  388|  1.62k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 1.32k, False: 298]
  ------------------
  389|  1.32k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  1.32k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  34.0k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 32.7k, False: 1.32k]
  ------------------
  392|  32.7k|                    const int sidx = uv_mask >= hmax;
  393|  32.7k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  32.7k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  32.7k|                    uv_vmask[1][sidx] &= ~smask;
  396|  32.7k|                    uv_vmask[0][sidx] &= ~smask;
  397|  32.7k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  32.7k|                }
  399|  1.32k|            }
  400|  1.62k|        }
  401|    450|    }
  402|       |
  403|  24.0k|    pixel *ptr;
  404|  24.0k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|  67.3k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 43.3k, False: 24.0k]
  ------------------
  406|  43.3k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|  43.3k|    {
  408|  43.3k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|  43.3k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|  43.3k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|  43.3k|    }
  412|       |
  413|  24.0k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 19.8k, False: 4.12k]
  |  Branch (413:46): [True: 15.7k, False: 4.08k]
  ------------------
  414|  15.7k|        return;
  415|       |
  416|  8.20k|    ptrdiff_t uv_off;
  417|  8.20k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  21.3k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 13.1k, False: 8.20k]
  ------------------
  419|  13.1k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  13.1k|    {
  421|  13.1k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  13.1k|                             lflvl[x].filter_uv[0],
  423|  13.1k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  13.1k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  13.1k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  13.1k|    }
  427|  8.20k|}
dav1d_loopfilter_sbrow_rows_16bpc:
  432|  24.0k|{
  433|  24.0k|    int x;
  434|       |    // Don't filter outside the frame
  435|  24.0k|    const int have_top = sby > 0;
  436|  24.0k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|  24.0k|    const int starty4 = (sby & is_sb64) << 4;
  438|  24.0k|    const int sbsz = 32 >> is_sb64;
  439|  24.0k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|  24.0k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|  24.0k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|  24.0k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|  24.0k|    pixel *ptr;
  445|  24.0k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|  67.3k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 43.3k, False: 24.0k]
  ------------------
  447|  43.3k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|  43.3k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|  43.3k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|  43.3k|    }
  451|       |
  452|  24.0k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 19.8k, False: 4.12k]
  |  Branch (452:46): [True: 15.7k, False: 4.08k]
  ------------------
  453|  15.7k|        return;
  454|       |
  455|  8.20k|    ptrdiff_t uv_off;
  456|  8.20k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  21.3k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 13.1k, False: 8.20k]
  ------------------
  458|  13.1k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  13.1k|    {
  460|  13.1k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  13.1k|                             lflvl[x].filter_uv[1],
  462|  13.1k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  13.1k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  13.1k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  13.1k|    }
  466|  8.20k|}

dav1d_create_lf_mask_intra:
  271|   513k|{
  272|   513k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  273|   513k|    const int bw4 = imin(iw - bx, b_dim[0]);
  274|   513k|    const int bh4 = imin(ih - by, b_dim[1]);
  275|   513k|    const int bx4 = bx & 31;
  276|   513k|    const int by4 = by & 31;
  277|   513k|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (277:5): [True: 513k, False: 0]
  |  Branch (277:5): [True: 513k, False: 0]
  ------------------
  278|       |
  279|   513k|    if (bw4 && bh4) {
  ------------------
  |  Branch (279:9): [True: 511k, False: 1.82k]
  |  Branch (279:16): [True: 487k, False: 23.7k]
  ------------------
  280|   487k|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  281|  2.94M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (281:25): [True: 2.46M, False: 487k]
  ------------------
  282|  26.4M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (282:29): [True: 24.0M, False: 2.46M]
  ------------------
  283|  24.0M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  284|  24.0M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  285|  24.0M|            }
  286|  2.46M|            level_cache_ptr += b4_stride;
  287|  2.46M|        }
  288|       |
  289|   487k|        mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
  290|   487k|    }
  291|       |
  292|   513k|    if (!auv) return;
  ------------------
  |  Branch (292:9): [True: 196k, False: 316k]
  ------------------
  293|       |
  294|   316k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  295|   316k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  296|   316k|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  297|   316k|                          (b_dim[0] + ss_hor) >> ss_hor);
  298|   316k|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  299|   316k|                          (b_dim[1] + ss_ver) >> ss_ver);
  300|   316k|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (300:5): [True: 316k, False: 0]
  |  Branch (300:5): [True: 316k, False: 0]
  ------------------
  301|       |
  302|   316k|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (302:9): [True: 795, False: 315k]
  |  Branch (302:18): [True: 11.2k, False: 304k]
  ------------------
  303|       |
  304|   304k|    const int cbx4 = bx4 >> ss_hor;
  305|   304k|    const int cby4 = by4 >> ss_ver;
  306|       |
  307|   304k|    uint8_t (*level_cache_ptr)[4] =
  308|   304k|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  309|  1.68M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (309:21): [True: 1.37M, False: 304k]
  ------------------
  310|  10.9M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (310:25): [True: 9.55M, False: 1.37M]
  ------------------
  311|  9.55M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  312|  9.55M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  313|  9.55M|        }
  314|  1.37M|        level_cache_ptr += b4_stride;
  315|  1.37M|    }
  316|       |
  317|   304k|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
  318|   304k|                      auv, luv, ss_hor, ss_ver);
  319|   304k|}
dav1d_create_lf_mask_inter:
  334|   747k|{
  335|   747k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  336|   747k|    const int bw4 = imin(iw - bx, b_dim[0]);
  337|   747k|    const int bh4 = imin(ih - by, b_dim[1]);
  338|   747k|    const int bx4 = bx & 31;
  339|   747k|    const int by4 = by & 31;
  340|   747k|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (340:5): [True: 747k, False: 0]
  |  Branch (340:5): [True: 747k, False: 0]
  ------------------
  341|       |
  342|   747k|    if (bw4 && bh4) {
  ------------------
  |  Branch (342:9): [True: 745k, False: 2.63k]
  |  Branch (342:16): [True: 741k, False: 3.19k]
  ------------------
  343|   741k|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  344|  5.17M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (344:25): [True: 4.43M, False: 741k]
  ------------------
  345|  59.2M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (345:29): [True: 54.8M, False: 4.43M]
  ------------------
  346|  54.8M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  347|  54.8M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  348|  54.8M|            }
  349|  4.43M|            level_cache_ptr += b4_stride;
  350|  4.43M|        }
  351|       |
  352|   741k|        mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
  353|   741k|                         max_ytx, tx_masks, ay, ly);
  354|   741k|    }
  355|       |
  356|   747k|    if (!auv) return;
  ------------------
  |  Branch (356:9): [True: 485k, False: 261k]
  ------------------
  357|       |
  358|   261k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  359|   261k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  360|   261k|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  361|   261k|                          (b_dim[0] + ss_hor) >> ss_hor);
  362|   261k|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  363|   261k|                          (b_dim[1] + ss_ver) >> ss_ver);
  364|   261k|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (364:5): [True: 261k, False: 0]
  |  Branch (364:5): [True: 261k, False: 0]
  ------------------
  365|       |
  366|   261k|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (366:9): [True: 734, False: 261k]
  |  Branch (366:18): [True: 1.41k, False: 259k]
  ------------------
  367|       |
  368|   259k|    const int cbx4 = bx4 >> ss_hor;
  369|   259k|    const int cby4 = by4 >> ss_ver;
  370|       |
  371|   259k|    uint8_t (*level_cache_ptr)[4] =
  372|   259k|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  373|  1.37M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (373:21): [True: 1.11M, False: 259k]
  ------------------
  374|  12.3M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (374:25): [True: 11.2M, False: 1.11M]
  ------------------
  375|  11.2M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  376|  11.2M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  377|  11.2M|        }
  378|  1.11M|        level_cache_ptr += b4_stride;
  379|  1.11M|    }
  380|       |
  381|   259k|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
  382|   259k|                      auv, luv, ss_hor, ss_ver);
  383|   259k|}
dav1d_calc_eih:
  385|  14.7k|void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
  386|       |    // set E/I/H values from loopfilter level
  387|  14.7k|    const int sharp = filter_sharpness;
  388|   958k|    for (int level = 0; level < 64; level++) {
  ------------------
  |  Branch (388:25): [True: 943k, False: 14.7k]
  ------------------
  389|   943k|        int limit = level;
  390|       |
  391|   943k|        if (sharp > 0) {
  ------------------
  |  Branch (391:13): [True: 468k, False: 475k]
  ------------------
  392|   468k|            limit >>= (sharp + 3) >> 2;
  393|   468k|            limit = imin(limit, 9 - sharp);
  394|   468k|        }
  395|   943k|        limit = imax(limit, 1);
  396|       |
  397|   943k|        lim_lut->i[level] = limit;
  398|   943k|        lim_lut->e[level] = 2 * (level + 2) + limit;
  399|   943k|    }
  400|  14.7k|    lim_lut->sharp[0] = (sharp + 3) >> 2;
  401|  14.7k|    lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
  ------------------
  |  Branch (401:25): [True: 7.31k, False: 7.42k]
  ------------------
  402|  14.7k|}
dav1d_calc_lf_values:
  441|  56.5k|{
  442|  56.5k|    const int n_seg = hdr->segmentation.enabled ? 8 : 1;
  ------------------
  |  Branch (442:23): [True: 10.5k, False: 45.9k]
  ------------------
  443|       |
  444|  56.5k|    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
  ------------------
  |  Branch (444:9): [True: 38.0k, False: 18.5k]
  |  Branch (444:40): [True: 26.1k, False: 11.8k]
  ------------------
  445|  26.1k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
  446|  26.1k|        return;
  447|  26.1k|    }
  448|       |
  449|  30.3k|    const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
  450|  30.3k|        hdr->loopfilter.mode_ref_delta_enabled ?
  ------------------
  |  Branch (450:9): [True: 21.1k, False: 9.23k]
  ------------------
  451|  30.3k|        &hdr->loopfilter.mode_ref_deltas : NULL;
  452|   118k|    for (int s = 0; s < n_seg; s++) {
  ------------------
  |  Branch (452:21): [True: 88.3k, False: 30.3k]
  ------------------
  453|  88.3k|        const Dav1dSegmentationData *const segd =
  454|  88.3k|            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
  ------------------
  |  Branch (454:13): [True: 66.2k, False: 22.1k]
  ------------------
  455|       |
  456|  88.3k|        calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0],
  457|  88.3k|                      lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
  ------------------
  |  Branch (457:36): [True: 66.2k, False: 22.1k]
  ------------------
  458|  88.3k|        calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1],
  459|  88.3k|                      lf_delta[hdr->delta.lf.multi ? 1 : 0],
  ------------------
  |  Branch (459:32): [True: 29.7k, False: 58.6k]
  ------------------
  460|  88.3k|                      segd ? segd->delta_lf_y_h : 0, mr_deltas);
  ------------------
  |  Branch (460:23): [True: 66.2k, False: 22.1k]
  ------------------
  461|  88.3k|        calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u,
  462|  88.3k|                             lf_delta[hdr->delta.lf.multi ? 2 : 0],
  ------------------
  |  Branch (462:39): [True: 29.7k, False: 58.6k]
  ------------------
  463|  88.3k|                             segd ? segd->delta_lf_u : 0, mr_deltas);
  ------------------
  |  Branch (463:30): [True: 66.2k, False: 22.1k]
  ------------------
  464|  88.3k|        calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v,
  465|  88.3k|                             lf_delta[hdr->delta.lf.multi ? 3 : 0],
  ------------------
  |  Branch (465:39): [True: 29.7k, False: 58.6k]
  ------------------
  466|  88.3k|                             segd ? segd->delta_lf_v : 0, mr_deltas);
  ------------------
  |  Branch (466:30): [True: 66.2k, False: 22.1k]
  ------------------
  467|  88.3k|    }
  468|  30.3k|}
lf_mask.c:mask_edges_intra:
  152|   487k|{
  153|   487k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  154|   487k|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  155|   487k|    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
  156|   487k|    int y, x;
  157|       |
  158|       |    // left block edge
  159|   487k|    unsigned mask = 1U << by4;
  160|  2.94M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (160:17): [True: 2.46M, False: 487k]
  ------------------
  161|  2.46M|        const int sidx = mask >= 0x10000;
  162|  2.46M|        const unsigned smask = mask >> (sidx << 4);
  163|  2.46M|        masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask;
  164|  2.46M|    }
  165|       |
  166|       |    // top block edge
  167|  3.12M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (167:35): [True: 2.64M, False: 487k]
  ------------------
  168|  2.64M|        const int sidx = mask >= 0x10000;
  169|  2.64M|        const unsigned smask = mask >> (sidx << 4);
  170|  2.64M|        masks[1][by4][imin(thl4c, a[x])][sidx] |= smask;
  171|  2.64M|    }
  172|       |
  173|       |    // inner (tx) left|right edges
  174|   487k|    const int hstep = t_dim->w;
  175|   487k|    unsigned t = 1U << by4;
  176|   487k|    unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
  177|   487k|    unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
  178|   641k|    for (x = hstep; x < w4; x += hstep) {
  ------------------
  |  Branch (178:21): [True: 154k, False: 487k]
  ------------------
  179|   154k|        if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (179:13): [True: 134k, False: 20.1k]
  ------------------
  180|   154k|        if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (180:13): [True: 43.2k, False: 110k]
  ------------------
  181|   154k|    }
  182|       |
  183|       |    //            top
  184|       |    // inner (tx) --- edges
  185|       |    //           bottom
  186|   487k|    const int vstep = t_dim->h;
  187|   487k|    t = 1U << bx4;
  188|   487k|    inner = (unsigned) ((((uint64_t) t) << w4) - t);
  189|   487k|    inner1 = inner & 0xffff;
  190|   487k|    inner2 = inner >> 16;
  191|   663k|    for (y = vstep; y < h4; y += vstep) {
  ------------------
  |  Branch (191:21): [True: 175k, False: 487k]
  ------------------
  192|   175k|        if (inner1) masks[1][by4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (192:13): [True: 100k, False: 75.4k]
  ------------------
  193|   175k|        if (inner2) masks[1][by4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (193:13): [True: 92.8k, False: 82.8k]
  ------------------
  194|   175k|    }
  195|       |
  196|   487k|    dav1d_memset_likely_pow2(a, thl4c, w4);
  197|   487k|    dav1d_memset_likely_pow2(l, twl4c, h4);
  198|   487k|}
lf_mask.c:mask_edges_chroma:
  207|   563k|{
  208|   563k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  209|   563k|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  210|   563k|    const int twl4c = !!twl4, thl4c = !!thl4;
  211|   563k|    int y, x;
  212|   563k|    const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
  213|   563k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  214|   563k|    const unsigned vmax = 1 << vmask, hmax = 1 << hmask;
  215|       |
  216|       |    // left block edge
  217|   563k|    unsigned mask = 1U << cby4;
  218|  3.05M|    for (y = 0; y < ch4; y++, mask <<= 1) {
  ------------------
  |  Branch (218:17): [True: 2.49M, False: 563k]
  ------------------
  219|  2.49M|        const int sidx = mask >= vmax;
  220|  2.49M|        const unsigned smask = mask >> (sidx << vbits);
  221|  2.49M|        masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask;
  222|  2.49M|    }
  223|       |
  224|       |    // top block edge
  225|  3.11M|    for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
  ------------------
  |  Branch (225:36): [True: 2.54M, False: 563k]
  ------------------
  226|  2.54M|        const int sidx = mask >= hmax;
  227|  2.54M|        const unsigned smask = mask >> (sidx << hbits);
  228|  2.54M|        masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask;
  229|  2.54M|    }
  230|       |
  231|   563k|    if (!skip_inter) {
  ------------------
  |  Branch (231:9): [True: 438k, False: 125k]
  ------------------
  232|       |        // inner (tx) left|right edges
  233|   438k|        const int hstep = t_dim->w;
  234|   438k|        unsigned t = 1U << cby4;
  235|   438k|        unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
  236|   438k|        unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
  237|   473k|        for (x = hstep; x < cw4; x += hstep) {
  ------------------
  |  Branch (237:25): [True: 35.6k, False: 438k]
  ------------------
  238|  35.6k|            if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (238:17): [True: 33.8k, False: 1.77k]
  ------------------
  239|  35.6k|            if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (239:17): [True: 16.8k, False: 18.7k]
  ------------------
  240|  35.6k|        }
  241|       |
  242|       |        //            top
  243|       |        // inner (tx) --- edges
  244|       |        //           bottom
  245|   438k|        const int vstep = t_dim->h;
  246|   438k|        t = 1U << cbx4;
  247|   438k|        inner = (unsigned) ((((uint64_t) t) << cw4) - t);
  248|   438k|        inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
  249|   509k|        for (y = vstep; y < ch4; y += vstep) {
  ------------------
  |  Branch (249:25): [True: 71.5k, False: 438k]
  ------------------
  250|  71.5k|            if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (250:17): [True: 47.4k, False: 24.1k]
  ------------------
  251|  71.5k|            if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (251:17): [True: 41.3k, False: 30.2k]
  ------------------
  252|  71.5k|        }
  253|   438k|    }
  254|       |
  255|   563k|    dav1d_memset_likely_pow2(a, thl4c, cw4);
  256|   563k|    dav1d_memset_likely_pow2(l, twl4c, ch4);
  257|   563k|}
lf_mask.c:mask_edges_inter:
   85|   741k|{
   86|   741k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
   87|   741k|    int y, x;
   88|       |
   89|   741k|    ALIGN_STK_16(uint8_t, txa, 2 /* edge */, [2 /* txsz, step */][32 /* y */][32 /* x */]);
  ------------------
  |  |  100|   741k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   741k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
   90|  1.53M|    for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
  ------------------
  |  Branch (90:32): [True: 795k, False: 741k]
  ------------------
   91|  1.79M|        for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
  ------------------
  |  Branch (91:36): [True: 1.00M, False: 795k]
  ------------------
   92|  1.00M|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
   93|  1.00M|                      max_tx, 0, y_off, x_off, tx_masks);
   94|       |
   95|       |    // left block edge
   96|   741k|    unsigned mask = 1U << by4;
   97|  5.17M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (97:17): [True: 4.43M, False: 741k]
  ------------------
   98|  4.43M|        const int sidx = mask >= 0x10000;
   99|  4.43M|        const unsigned smask = mask >> (sidx << 4);
  100|  4.43M|        masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask;
  101|  4.43M|    }
  102|       |
  103|       |    // top block edge
  104|  4.83M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (104:35): [True: 4.09M, False: 741k]
  ------------------
  105|  4.09M|        const int sidx = mask >= 0x10000;
  106|  4.09M|        const unsigned smask = mask >> (sidx << 4);
  107|  4.09M|        masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask;
  108|  4.09M|    }
  109|       |
  110|   741k|    if (!skip) {
  ------------------
  |  Branch (110:9): [True: 251k, False: 490k]
  ------------------
  111|       |        // inner (tx) left|right edges
  112|  1.25M|        for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (112:39): [True: 1.00M, False: 251k]
  ------------------
  113|  1.00M|            const int sidx = mask >= 0x10000U;
  114|  1.00M|            const unsigned smask = mask >> (sidx << 4);
  115|  1.00M|            int ltx = txa[0][0][y][0];
  116|  1.00M|            int step = txa[0][1][y][0];
  117|  1.35M|            for (x = step; x < w4; x += step) {
  ------------------
  |  Branch (117:28): [True: 347k, False: 1.00M]
  ------------------
  118|   347k|                const int rtx = txa[0][0][y][x];
  119|   347k|                masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask;
  120|   347k|                ltx = rtx;
  121|   347k|                step = txa[0][1][y][x];
  122|   347k|            }
  123|  1.00M|        }
  124|       |
  125|       |        //            top
  126|       |        // inner (tx) --- edges
  127|       |        //           bottom
  128|  1.31M|        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (128:39): [True: 1.06M, False: 251k]
  ------------------
  129|  1.06M|            const int sidx = mask >= 0x10000U;
  130|  1.06M|            const unsigned smask = mask >> (sidx << 4);
  131|  1.06M|            int ttx = txa[1][0][0][x];
  132|  1.06M|            int step = txa[1][1][0][x];
  133|  1.40M|            for (y = step; y < h4; y += step) {
  ------------------
  |  Branch (133:28): [True: 342k, False: 1.06M]
  ------------------
  134|   342k|                const int btx = txa[1][0][y][x];
  135|   342k|                masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask;
  136|   342k|                ttx = btx;
  137|   342k|                step = txa[1][1][y][x];
  138|   342k|            }
  139|  1.06M|        }
  140|   251k|    }
  141|       |
  142|  5.17M|    for (y = 0; y < h4; y++)
  ------------------
  |  Branch (142:17): [True: 4.43M, False: 741k]
  ------------------
  143|  4.43M|        l[y] = txa[0][0][y][w4 - 1];
  144|   741k|    memcpy(a, txa[1][0][h4 - 1], w4);
  145|   741k|}
lf_mask.c:decomp_tx:
   44|  1.21M|{
   45|  1.21M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
   46|  1.21M|    const int is_split = (from == (int) TX_4X4 || depth > 1) ? 0 :
  ------------------
  |  Branch (46:27): [True: 280k, False: 936k]
  |  Branch (46:51): [True: 56.6k, False: 880k]
  ------------------
   47|  1.21M|        (tx_masks[depth] >> (y_off * 4 + x_off)) & 1;
   48|       |
   49|  1.21M|    if (is_split) {
  ------------------
  |  Branch (49:9): [True: 71.3k, False: 1.14M]
  ------------------
   50|  71.3k|        const enum RectTxfmSize sub = t_dim->sub;
   51|  71.3k|        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;
   52|       |
   53|  71.3k|        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
   54|  71.3k|        if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (54:13): [True: 56.1k, False: 15.1k]
  ------------------
   55|  56.1k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
   56|  56.1k|                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
   57|  71.3k|        if (t_dim->h >= t_dim->w) {
  ------------------
  |  Branch (57:13): [True: 51.5k, False: 19.7k]
  ------------------
   58|  51.5k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
   59|  51.5k|                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
   60|  51.5k|            if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (60:17): [True: 36.4k, False: 15.1k]
  ------------------
   61|  36.4k|                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
   62|  36.4k|                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
   63|  51.5k|        }
   64|  1.14M|    } else {
   65|  1.14M|        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);
   66|       |
   67|  1.14M|#define set_ctx(rep_macro) \
   68|  1.14M|        for (int y = 0; y < t_dim->h; y++) { \
   69|  1.14M|            rep_macro(txa[0][0][y], 0, lw); \
   70|  1.14M|            rep_macro(txa[1][0][y], 0, lh); \
   71|  1.14M|            txa[0][1][y][0] = t_dim->w; \
   72|  1.14M|        }
   73|  1.14M|        case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  1.14M|    switch (var) { \
  |  |   81|   350k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |   68|   832k|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 481k, False: 350k]
  |  |  |  |  ------------------
  |  |  |  |   69|   481k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   481k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   481k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   481k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   481k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   481k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   481k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   481k|        }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 350k, False: 795k]
  |  |  ------------------
  |  |   82|   291k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |   68|  1.07M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 782k, False: 291k]
  |  |  |  |  ------------------
  |  |  |  |   69|   782k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   782k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   782k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   782k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   782k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   782k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   782k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   782k|        }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 291k, False: 854k]
  |  |  ------------------
  |  |   83|   238k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |   68|  1.20M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 964k, False: 238k]
  |  |  |  |  ------------------
  |  |  |  |   69|   964k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   964k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   964k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   964k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   964k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   964k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   964k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   964k|        }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 238k, False: 907k]
  |  |  ------------------
  |  |   84|  69.0k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |   68|   498k|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 429k, False: 69.0k]
  |  |  |  |  ------------------
  |  |  |  |   69|   429k|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   429k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   429k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|   429k|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   429k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   429k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|   429k|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|   429k|        }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 69.0k, False: 1.07M]
  |  |  ------------------
  |  |   85|   196k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |   68|  3.25M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 3.06M, False: 196k]
  |  |  |  |  ------------------
  |  |  |  |   69|  3.06M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.06M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  3.06M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  3.06M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  3.06M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 3.06M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  3.06M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.06M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  3.06M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  3.06M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  3.06M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 3.06M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  3.06M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  3.06M|        }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 196k, False: 949k]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 1.14M]
  |  |  ------------------
  |  |   87|  1.14M|    }
  ------------------
  |  Branch (73:9): [Folded, False: 0]
  ------------------
   74|  1.14M|#undef set_ctx
   75|  1.14M|        dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h);
   76|  1.14M|    }
   77|  1.21M|}
lf_mask.c:calc_lf_value:
  408|   231k|{
  409|   231k|    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
  410|       |
  411|   231k|    if (!mr_delta) {
  ------------------
  |  Branch (411:9): [True: 76.8k, False: 155k]
  ------------------
  412|  76.8k|        memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
  413|   155k|    } else {
  414|   155k|        const int sh = base >= 32;
  415|   155k|        lflvl_values[0][0] = lflvl_values[0][1] =
  416|   155k|            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
  417|  1.24M|        for (int r = 1; r < 8; r++) {
  ------------------
  |  Branch (417:25): [True: 1.08M, False: 155k]
  ------------------
  418|  3.25M|            for (int m = 0; m < 2; m++) {
  ------------------
  |  Branch (418:29): [True: 2.17M, False: 1.08M]
  ------------------
  419|  2.17M|                const int delta =
  420|  2.17M|                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
  421|  2.17M|                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
  422|  2.17M|            }
  423|  1.08M|        }
  424|   155k|    }
  425|   231k|}
lf_mask.c:calc_lf_value_chroma:
  431|   176k|{
  432|   176k|    if (!base_lvl)
  ------------------
  |  Branch (432:9): [True: 121k, False: 55.2k]
  ------------------
  433|   121k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
  434|  55.2k|    else
  435|  55.2k|        calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
  436|   176k|}

dav1d_version:
   61|  9.70k|COLD const char *dav1d_version(void) {
   62|  9.70k|    return DAV1D_VERSION;
  ------------------
  |  |    2|  9.70k|#define DAV1D_VERSION "62501cc"
  ------------------
   63|  9.70k|}
dav1d_default_settings:
   71|  9.69k|COLD void dav1d_default_settings(Dav1dSettings *const s) {
   72|  9.69k|    s->n_threads = 0;
   73|  9.69k|    s->max_frame_delay = 0;
   74|  9.69k|    s->apply_grain = 1;
   75|  9.69k|    s->allocator.cookie = NULL;
   76|  9.69k|    s->allocator.alloc_picture_callback = dav1d_default_picture_alloc;
   77|  9.69k|    s->allocator.release_picture_callback = dav1d_default_picture_release;
   78|  9.69k|    s->logger.cookie = NULL;
   79|       |    s->logger.callback = dav1d_log_default_callback;
  ------------------
  |  |   43|  9.69k|#define dav1d_log_default_callback NULL
  ------------------
   80|  9.69k|    s->operating_point = 0;
   81|  9.69k|    s->all_layers = 1; // just until the tests are adjusted
   82|  9.69k|    s->frame_size_limit = 0;
   83|  9.69k|    s->strict_std_compliance = 0;
   84|  9.69k|    s->output_invisible_frames = 0;
   85|  9.69k|    s->inloop_filters = DAV1D_INLOOPFILTER_ALL;
   86|  9.69k|    s->decode_frame_type = DAV1D_DECODEFRAMETYPE_ALL;
   87|  9.69k|}
dav1d_open:
  140|  9.69k|COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
  141|  9.69k|    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  142|  9.69k|    pthread_once(&initted, init_internal);
  143|       |
  144|  9.69k|    validate_input_or_ret(c_out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  9.69k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  145|  9.69k|    validate_input_or_ret(s != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  9.69k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  146|  9.69k|    validate_input_or_ret(s->n_threads >= 0 &&
  ------------------
  |  |   52|  19.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  147|  9.69k|                          s->n_threads <= DAV1D_MAX_THREADS, DAV1D_ERR(EINVAL));
  148|  9.69k|    validate_input_or_ret(s->max_frame_delay >= 0 &&
  ------------------
  |  |   52|  19.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  149|  9.69k|                          s->max_frame_delay <= DAV1D_MAX_FRAME_DELAY, DAV1D_ERR(EINVAL));
  150|  9.69k|    validate_input_or_ret(s->allocator.alloc_picture_callback != NULL,
  ------------------
  |  |   52|  9.69k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  151|  9.69k|                          DAV1D_ERR(EINVAL));
  152|  9.69k|    validate_input_or_ret(s->allocator.release_picture_callback != NULL,
  ------------------
  |  |   52|  9.69k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  153|  9.69k|                          DAV1D_ERR(EINVAL));
  154|  9.69k|    validate_input_or_ret(s->operating_point >= 0 &&
  ------------------
  |  |   52|  19.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  155|  9.69k|                          s->operating_point <= 31, DAV1D_ERR(EINVAL));
  156|  9.69k|    validate_input_or_ret(s->decode_frame_type >= DAV1D_DECODEFRAMETYPE_ALL &&
  ------------------
  |  |   52|  19.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  |  Branch (52:11): [True: 9.69k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  157|  9.69k|                          s->decode_frame_type <= DAV1D_DECODEFRAMETYPE_KEY, DAV1D_ERR(EINVAL));
  158|       |
  159|  9.69k|    pthread_attr_t thread_attr;
  160|  9.69k|    if (pthread_attr_init(&thread_attr)) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (160:9): [True: 0, False: 9.69k]
  ------------------
  161|  9.69k|    size_t stack_size = 1024 * 1024 + get_stack_size_internal(&thread_attr);
  162|       |
  163|  9.69k|    pthread_attr_setstacksize(&thread_attr, stack_size);
  164|       |
  165|  9.69k|    Dav1dContext *const c = *c_out = dav1d_alloc_aligned(ALLOC_COMMON_CTX, sizeof(*c), 64);
  ------------------
  |  |  134|  9.69k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  166|  9.69k|    if (!c) goto error;
  ------------------
  |  Branch (166:9): [True: 0, False: 9.69k]
  ------------------
  167|  9.69k|    memset(c, 0, sizeof(*c));
  168|       |
  169|  9.69k|    c->allocator = s->allocator;
  170|  9.69k|    c->logger = s->logger;
  171|  9.69k|    c->apply_grain = s->apply_grain;
  172|  9.69k|    c->operating_point = s->operating_point;
  173|  9.69k|    c->all_layers = s->all_layers;
  174|  9.69k|    c->frame_size_limit = s->frame_size_limit;
  175|  9.69k|    c->strict_std_compliance = s->strict_std_compliance;
  176|  9.69k|    c->output_invisible_frames = s->output_invisible_frames;
  177|  9.69k|    c->inloop_filters = s->inloop_filters;
  178|  9.69k|    c->decode_frame_type = s->decode_frame_type;
  179|       |
  180|  9.69k|    dav1d_data_props_set_defaults(&c->cached_error_props);
  181|       |
  182|  9.69k|    if (dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->seq_hdr_pool) ||
  ------------------
  |  |  131|  19.3k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  183|  9.69k|        dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->frame_hdr_pool) ||
  ------------------
  |  |  131|  19.3k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  184|  9.69k|        dav1d_mem_pool_init(ALLOC_SEGMAP, &c->segmap_pool) ||
  ------------------
  |  |  131|  19.3k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  185|  9.69k|        dav1d_mem_pool_init(ALLOC_REFMVS, &c->refmvs_pool) ||
  ------------------
  |  |  131|  19.3k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  186|  9.69k|        dav1d_mem_pool_init(ALLOC_PIC_CTX, &c->pic_ctx_pool) ||
  ------------------
  |  |  131|  19.3k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  187|  9.69k|        dav1d_mem_pool_init(ALLOC_CDF, &c->cdf_pool))
  ------------------
  |  |  131|  9.69k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  188|      0|    {
  189|      0|        goto error;
  190|      0|    }
  191|       |
  192|  9.69k|    if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc &&
  ------------------
  |  Branch (192:9): [True: 9.69k, False: 0]
  ------------------
  193|  9.69k|        c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (193:9): [True: 9.69k, False: 0]
  ------------------
  194|  9.69k|    {
  195|  9.69k|        if (c->allocator.cookie) goto error;
  ------------------
  |  Branch (195:13): [True: 0, False: 9.69k]
  ------------------
  196|  9.69k|        if (dav1d_mem_pool_init(ALLOC_PIC, &c->picture_pool)) goto error;
  ------------------
  |  |  131|  9.69k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 9.69k]
  |  |  ------------------
  ------------------
  197|  9.69k|        c->allocator.cookie = c->picture_pool;
  198|  9.69k|    } else if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc ||
  ------------------
  |  Branch (198:16): [True: 0, False: 0]
  ------------------
  199|      0|               c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (199:16): [True: 0, False: 0]
  ------------------
  200|      0|    {
  201|      0|        goto error;
  202|      0|    }
  203|       |
  204|       |    /* On 32-bit systems extremely large frame sizes can cause overflows in
  205|       |     * dav1d_decode_frame() malloc size calculations. Prevent that from occuring
  206|       |     * by enforcing a maximum frame size limit, chosen to roughly correspond to
  207|       |     * the largest size possible to decode without exhausting virtual memory. */
  208|  9.69k|    if (sizeof(size_t) < 8 && s->frame_size_limit - 1 >= 8192 * 8192) {
  ------------------
  |  Branch (208:9): [Folded, False: 9.69k]
  |  Branch (208:31): [True: 0, False: 0]
  ------------------
  209|      0|        c->frame_size_limit = 8192 * 8192;
  210|      0|        if (s->frame_size_limit)
  ------------------
  |  Branch (210:13): [True: 0, False: 0]
  ------------------
  211|      0|            dav1d_log(c, "Frame size limit reduced from %u to %u.\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  212|      0|                      s->frame_size_limit, c->frame_size_limit);
  213|      0|    }
  214|       |
  215|  9.69k|    c->flush = &c->flush_mem;
  216|  9.69k|    atomic_init(c->flush, 0);
  217|       |
  218|  9.69k|    get_num_threads(c, s, &c->n_tc, &c->n_fc);
  219|       |
  220|  9.69k|    c->fc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->fc) * c->n_fc, 32);
  ------------------
  |  |  134|  9.69k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  221|  9.69k|    if (!c->fc) goto error;
  ------------------
  |  Branch (221:9): [True: 0, False: 9.69k]
  ------------------
  222|  9.69k|    memset(c->fc, 0, sizeof(*c->fc) * c->n_fc);
  223|       |
  224|  9.69k|    c->tc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->tc) * c->n_tc, 64);
  ------------------
  |  |  134|  9.69k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  225|  9.69k|    if (!c->tc) goto error;
  ------------------
  |  Branch (225:9): [True: 0, False: 9.69k]
  ------------------
  226|  9.69k|    memset(c->tc, 0, sizeof(*c->tc) * c->n_tc);
  227|  9.69k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (227:9): [True: 0, False: 9.69k]
  ------------------
  228|      0|        if (pthread_mutex_init(&c->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (228:13): [True: 0, False: 0]
  ------------------
  229|      0|        if (pthread_cond_init(&c->task_thread.cond, NULL)) {
  ------------------
  |  Branch (229:13): [True: 0, False: 0]
  ------------------
  230|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  231|      0|            goto error;
  232|      0|        }
  233|      0|        if (pthread_cond_init(&c->task_thread.delayed_fg.cond, NULL)) {
  ------------------
  |  Branch (233:13): [True: 0, False: 0]
  ------------------
  234|      0|            pthread_cond_destroy(&c->task_thread.cond);
  235|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  236|      0|            goto error;
  237|      0|        }
  238|      0|        c->task_thread.cur = c->n_fc;
  239|      0|        atomic_init(&c->task_thread.reset_task_cur, UINT_MAX);
  240|      0|        atomic_init(&c->task_thread.cond_signaled, 0);
  241|      0|        c->task_thread.inited = 1;
  242|      0|    }
  243|       |
  244|  9.69k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (244:9): [True: 0, False: 9.69k]
  ------------------
  245|      0|        const size_t out_delayed_sz = sizeof(*c->frame_thread.out_delayed) * c->n_fc;
  246|      0|        c->frame_thread.out_delayed =
  247|      0|            dav1d_malloc(ALLOC_THREAD_CTX, out_delayed_sz);
  ------------------
  |  |  132|      0|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  248|      0|        if (!c->frame_thread.out_delayed) goto error;
  ------------------
  |  Branch (248:13): [True: 0, False: 0]
  ------------------
  249|      0|        memset(c->frame_thread.out_delayed, 0, out_delayed_sz);
  250|      0|    }
  251|  19.3k|    for (unsigned n = 0; n < c->n_fc; n++) {
  ------------------
  |  Branch (251:26): [True: 9.69k, False: 9.69k]
  ------------------
  252|  9.69k|        Dav1dFrameContext *const f = &c->fc[n];
  253|  9.69k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (253:13): [True: 0, False: 9.69k]
  ------------------
  254|      0|            if (pthread_mutex_init(&f->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (254:17): [True: 0, False: 0]
  ------------------
  255|      0|            if (pthread_cond_init(&f->task_thread.cond, NULL)) {
  ------------------
  |  Branch (255:17): [True: 0, False: 0]
  ------------------
  256|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  257|      0|                goto error;
  258|      0|            }
  259|      0|            if (pthread_mutex_init(&f->task_thread.pending_tasks.lock, NULL)) {
  ------------------
  |  Branch (259:17): [True: 0, False: 0]
  ------------------
  260|      0|                pthread_cond_destroy(&f->task_thread.cond);
  261|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  262|      0|                goto error;
  263|      0|            }
  264|      0|        }
  265|  9.69k|        f->c = c;
  266|  9.69k|        f->task_thread.ttd = &c->task_thread;
  267|  9.69k|        f->lf.last_sharpness = -1;
  268|  9.69k|    }
  269|       |
  270|  19.3k|    for (unsigned m = 0; m < c->n_tc; m++) {
  ------------------
  |  Branch (270:26): [True: 9.69k, False: 9.69k]
  ------------------
  271|  9.69k|        Dav1dTaskContext *const t = &c->tc[m];
  272|  9.69k|        t->f = &c->fc[0];
  273|  9.69k|        t->task_thread.ttd = &c->task_thread;
  274|  9.69k|        t->c = c;
  275|  9.69k|        memset(t->cf_16bpc, 0, sizeof(t->cf_16bpc));
  276|  9.69k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (276:13): [True: 0, False: 9.69k]
  ------------------
  277|      0|            if (pthread_mutex_init(&t->task_thread.td.lock, NULL)) goto error;
  ------------------
  |  Branch (277:17): [True: 0, False: 0]
  ------------------
  278|      0|            if (pthread_cond_init(&t->task_thread.td.cond, NULL)) {
  ------------------
  |  Branch (278:17): [True: 0, False: 0]
  ------------------
  279|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  280|      0|                goto error;
  281|      0|            }
  282|      0|            if (pthread_create(&t->task_thread.td.thread, &thread_attr, dav1d_worker_task, t)) {
  ------------------
  |  Branch (282:17): [True: 0, False: 0]
  ------------------
  283|      0|                pthread_cond_destroy(&t->task_thread.td.cond);
  284|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  285|      0|                goto error;
  286|      0|            }
  287|      0|            t->task_thread.td.inited = 1;
  288|      0|        }
  289|  9.69k|    }
  290|  9.69k|    dav1d_pal_dsp_init(&c->pal_dsp);
  291|  9.69k|    dav1d_refmvs_dsp_init(&c->refmvs_dsp);
  292|       |
  293|  9.69k|    pthread_attr_destroy(&thread_attr);
  294|       |
  295|  9.69k|    return 0;
  296|       |
  297|      0|error:
  298|      0|    if (c) close_internal(c_out, 0);
  ------------------
  |  Branch (298:9): [True: 0, False: 0]
  ------------------
  299|      0|    pthread_attr_destroy(&thread_attr);
  300|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  301|  9.69k|}
dav1d_send_data:
  439|  77.3k|{
  440|  77.3k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  77.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 77.3k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  441|  77.3k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  77.3k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 77.3k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  442|       |
  443|  77.3k|    if (in->data) {
  ------------------
  |  Branch (443:9): [True: 77.3k, False: 0]
  ------------------
  444|  77.3k|        validate_input_or_ret(in->sz > 0 && in->sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   154k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 77.3k, False: 0]
  |  |  |  Branch (52:11): [True: 77.3k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  445|  77.3k|        c->drain = 0;
  446|  77.3k|    }
  447|  77.3k|    if (c->in.data)
  ------------------
  |  Branch (447:9): [True: 3.58k, False: 73.8k]
  ------------------
  448|  3.58k|        return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  3.58k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  449|  73.8k|    dav1d_data_ref(&c->in, in);
  450|       |
  451|  73.8k|    int res = gen_picture(c);
  452|  73.8k|    if (!res)
  ------------------
  |  Branch (452:9): [True: 23.3k, False: 50.5k]
  ------------------
  453|  23.3k|        dav1d_data_unref_internal(in);
  454|       |
  455|  73.8k|    return res;
  456|  77.3k|}
dav1d_get_picture:
  459|  38.0k|{
  460|  38.0k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  38.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 38.0k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  461|  38.0k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  38.0k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 38.0k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  462|       |
  463|  38.0k|    const int drain = c->drain;
  464|  38.0k|    c->drain = 1;
  465|       |
  466|  38.0k|    int res = gen_picture(c);
  467|  38.0k|    if (res < 0)
  ------------------
  |  Branch (467:9): [True: 1.81k, False: 36.2k]
  ------------------
  468|  1.81k|        return res;
  469|       |
  470|  36.2k|    if (c->cached_error) {
  ------------------
  |  Branch (470:9): [True: 0, False: 36.2k]
  ------------------
  471|      0|        const int res = c->cached_error;
  472|      0|        c->cached_error = 0;
  473|      0|        return res;
  474|      0|    }
  475|       |
  476|  36.2k|    if (output_picture_ready(c, c->n_fc == 1))
  ------------------
  |  Branch (476:9): [True: 20.9k, False: 15.2k]
  ------------------
  477|  20.9k|        return output_image(c, out);
  478|       |
  479|  15.2k|    if (c->n_fc > 1 && drain)
  ------------------
  |  Branch (479:9): [True: 0, False: 15.2k]
  |  Branch (479:24): [True: 0, False: 0]
  ------------------
  480|      0|        return drain_picture(c, out);
  481|       |
  482|  15.2k|    return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  15.2k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  483|  15.2k|}
dav1d_apply_grain:
  487|  5.01k|{
  488|  5.01k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  5.01k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 5.01k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  489|  5.01k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  5.01k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 5.01k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  490|  5.01k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  5.01k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 5.01k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  491|       |
  492|  5.01k|    if (!has_grain(in)) {
  ------------------
  |  Branch (492:9): [True: 0, False: 5.01k]
  ------------------
  493|      0|        dav1d_picture_ref(out, in);
  494|      0|        return 0;
  495|      0|    }
  496|       |
  497|  5.01k|    int res = dav1d_picture_alloc_copy(c, out, in->p.w, in);
  498|  5.01k|    if (res < 0) goto error;
  ------------------
  |  Branch (498:9): [True: 0, False: 5.01k]
  ------------------
  499|       |
  500|  5.01k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (500:9): [True: 0, False: 5.01k]
  ------------------
  501|      0|        dav1d_task_delayed_fg(c, out, in);
  502|  5.01k|    } else {
  503|  5.01k|        switch (out->p.bpc) {
  504|      0|#if CONFIG_8BPC
  505|  1.58k|        case 8:
  ------------------
  |  Branch (505:9): [True: 1.58k, False: 3.43k]
  ------------------
  506|  1.58k|            dav1d_apply_grain_8bpc(&c->dsp[0].fg, out, in);
  507|  1.58k|            break;
  508|      0|#endif
  509|      0|#if CONFIG_16BPC
  510|  2.73k|        case 10:
  ------------------
  |  Branch (510:9): [True: 2.73k, False: 2.27k]
  ------------------
  511|  3.43k|        case 12:
  ------------------
  |  Branch (511:9): [True: 692, False: 4.31k]
  ------------------
  512|  3.43k|            dav1d_apply_grain_16bpc(&c->dsp[(out->p.bpc >> 1) - 4].fg, out, in);
  513|  3.43k|            break;
  514|      0|#endif
  515|      0|        default: abort();
  ------------------
  |  Branch (515:9): [True: 0, False: 5.01k]
  ------------------
  516|  5.01k|        }
  517|  5.01k|    }
  518|       |
  519|  5.01k|    return 0;
  520|       |
  521|      0|error:
  522|      0|    dav1d_picture_unref_internal(out);
  523|      0|    return res;
  524|  5.01k|}
dav1d_flush:
  526|  9.69k|void dav1d_flush(Dav1dContext *const c) {
  527|  9.69k|    dav1d_data_unref_internal(&c->in);
  528|  9.69k|    if (c->out.p.frame_hdr)
  ------------------
  |  Branch (528:9): [True: 0, False: 9.69k]
  ------------------
  529|      0|        dav1d_thread_picture_unref(&c->out);
  530|  9.69k|    if (c->cache.p.frame_hdr)
  ------------------
  |  Branch (530:9): [True: 0, False: 9.69k]
  ------------------
  531|      0|        dav1d_thread_picture_unref(&c->cache);
  532|       |
  533|  9.69k|    c->drain = 0;
  534|  9.69k|    c->cached_error = 0;
  535|       |
  536|  87.2k|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (536:21): [True: 77.5k, False: 9.69k]
  ------------------
  537|  77.5k|        if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (537:13): [True: 21.7k, False: 55.8k]
  ------------------
  538|  21.7k|            dav1d_thread_picture_unref(&c->refs[i].p);
  539|  77.5k|        dav1d_ref_dec(&c->refs[i].segmap);
  540|  77.5k|        dav1d_ref_dec(&c->refs[i].refmvs);
  541|  77.5k|        dav1d_cdf_thread_unref(&c->cdf[i]);
  542|  77.5k|    }
  543|  9.69k|    c->frame_hdr = NULL;
  544|  9.69k|    c->seq_hdr = NULL;
  545|  9.69k|    dav1d_ref_dec(&c->seq_hdr_ref);
  546|       |
  547|  9.69k|    c->mastering_display = NULL;
  548|  9.69k|    c->content_light = NULL;
  549|  9.69k|    c->itut_t35 = NULL;
  550|  9.69k|    c->n_itut_t35 = 0;
  551|  9.69k|    dav1d_ref_dec(&c->mastering_display_ref);
  552|  9.69k|    dav1d_ref_dec(&c->content_light_ref);
  553|  9.69k|    dav1d_ref_dec(&c->itut_t35_ref);
  554|       |
  555|  9.69k|    dav1d_data_props_unref_internal(&c->cached_error_props);
  556|       |
  557|  9.69k|    if (c->n_fc == 1 && c->n_tc == 1) return;
  ------------------
  |  Branch (557:9): [True: 9.69k, False: 0]
  |  Branch (557:25): [True: 9.69k, False: 0]
  ------------------
  558|  9.69k|    atomic_store(c->flush, 1);
  559|       |
  560|      0|    if (c->n_tc > 1) {
  ------------------
  |  Branch (560:9): [True: 0, False: 0]
  ------------------
  561|      0|        pthread_mutex_lock(&c->task_thread.lock);
  562|       |        // stop running tasks in worker threads
  563|      0|        for (unsigned i = 0; i < c->n_tc; i++) {
  ------------------
  |  Branch (563:30): [True: 0, False: 0]
  ------------------
  564|      0|            Dav1dTaskContext *const tc = &c->tc[i];
  565|      0|            while (!tc->task_thread.flushed) {
  ------------------
  |  Branch (565:20): [True: 0, False: 0]
  ------------------
  566|      0|                pthread_cond_wait(&tc->task_thread.td.cond, &c->task_thread.lock);
  567|      0|            }
  568|      0|        }
  569|      0|        for (unsigned i = 0; i < c->n_fc; i++) {
  ------------------
  |  Branch (569:30): [True: 0, False: 0]
  ------------------
  570|      0|            c->fc[i].task_thread.task_head = NULL;
  571|      0|            c->fc[i].task_thread.task_tail = NULL;
  572|      0|            c->fc[i].task_thread.task_cur_prev = NULL;
  573|      0|            c->fc[i].task_thread.pending_tasks.head = NULL;
  574|      0|            c->fc[i].task_thread.pending_tasks.tail = NULL;
  575|      0|            atomic_init(&c->fc[i].task_thread.pending_tasks.merge, 0);
  576|      0|        }
  577|      0|        atomic_init(&c->task_thread.first, 0);
  578|      0|        c->task_thread.cur = c->n_fc;
  579|      0|        atomic_store(&c->task_thread.reset_task_cur, UINT_MAX);
  580|      0|        atomic_store(&c->task_thread.cond_signaled, 0);
  581|      0|        pthread_mutex_unlock(&c->task_thread.lock);
  582|      0|    }
  583|       |
  584|      0|    if (c->n_fc > 1) {
  ------------------
  |  Branch (584:9): [True: 0, False: 0]
  ------------------
  585|      0|        for (unsigned n = 0, next = c->frame_thread.next; n < c->n_fc; n++, next++) {
  ------------------
  |  Branch (585:59): [True: 0, False: 0]
  ------------------
  586|      0|            if (next == c->n_fc) next = 0;
  ------------------
  |  Branch (586:17): [True: 0, False: 0]
  ------------------
  587|      0|            Dav1dFrameContext *const f = &c->fc[next];
  588|      0|            dav1d_decode_frame_exit(f, -1);
  589|      0|            f->n_tile_data = 0;
  590|      0|            f->task_thread.retval = 0;
  591|      0|            f->task_thread.error = 0;
  592|      0|            Dav1dThreadPicture *out_delayed = &c->frame_thread.out_delayed[next];
  593|      0|            if (out_delayed->p.frame_hdr) {
  ------------------
  |  Branch (593:17): [True: 0, False: 0]
  ------------------
  594|      0|                dav1d_thread_picture_unref(out_delayed);
  595|      0|            }
  596|      0|        }
  597|      0|        c->frame_thread.next = 0;
  598|      0|    }
  599|       |    atomic_store(c->flush, 0);
  600|      0|}
dav1d_close:
  602|  9.69k|COLD void dav1d_close(Dav1dContext **const c_out) {
  603|  9.69k|    validate_input(c_out != NULL);
  ------------------
  |  |   59|  9.69k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  9.69k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 9.69k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  604|       |#if TRACK_HEAP_ALLOCATIONS
  605|       |    dav1d_log_alloc_stats(*c_out);
  606|       |#endif
  607|  9.69k|    close_internal(c_out, 1);
  608|  9.69k|}
dav1d_picture_unref:
  727|  20.9k|void dav1d_picture_unref(Dav1dPicture *const p) {
  728|  20.9k|    dav1d_picture_unref_internal(p);
  729|  20.9k|}
dav1d_data_create:
  731|  73.9k|uint8_t *dav1d_data_create(Dav1dData *const buf, const size_t sz) {
  732|  73.9k|    return dav1d_data_create_internal(buf, sz);
  733|  73.9k|}
dav1d_data_unref:
  756|  50.6k|void dav1d_data_unref(Dav1dData *const buf) {
  757|  50.6k|    dav1d_data_unref_internal(buf);
  758|  50.6k|}
lib.c:get_num_threads:
  111|  9.69k|{
  112|       |    /* ceil(sqrt(n)) */
  113|  9.69k|    static const uint8_t fc_lut[49] = {
  114|  9.69k|        1,                                     /*     1 */
  115|  9.69k|        2, 2, 2,                               /*  2- 4 */
  116|  9.69k|        3, 3, 3, 3, 3,                         /*  5- 9 */
  117|  9.69k|        4, 4, 4, 4, 4, 4, 4,                   /* 10-16 */
  118|  9.69k|        5, 5, 5, 5, 5, 5, 5, 5, 5,             /* 17-25 */
  119|  9.69k|        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,       /* 26-36 */
  120|  9.69k|        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* 37-49 */
  121|  9.69k|    };
  122|  9.69k|    *n_tc = s->n_threads ? s->n_threads :
  ------------------
  |  Branch (122:13): [True: 9.69k, False: 0]
  ------------------
  123|  9.69k|        iclip(dav1d_num_logical_processors(c), 1, DAV1D_MAX_THREADS);
  ------------------
  |  |   46|      0|#define DAV1D_MAX_THREADS 256
  ------------------
  124|  9.69k|    *n_fc = s->max_frame_delay ? umin(s->max_frame_delay, *n_tc) :
  ------------------
  |  Branch (124:13): [True: 9.69k, False: 0]
  ------------------
  125|  9.69k|            *n_tc < 50 ? fc_lut[*n_tc - 1] : 8; // min(8, ceil(sqrt(n)))
  ------------------
  |  Branch (125:13): [True: 0, False: 0]
  ------------------
  126|  9.69k|}
lib.c:init_internal:
   53|      1|static COLD void init_internal(void) {
   54|      1|    dav1d_init_cpu();
   55|      1|    dav1d_init_ii_wedge_masks();
   56|      1|    dav1d_init_intra_edge_tree();
   57|      1|    dav1d_init_qm_tables();
   58|      1|    dav1d_init_thread();
  ------------------
  |  |  144|      1|#define dav1d_init_thread() do {} while (0)
  |  |  ------------------
  |  |  |  Branch (144:42): [Folded, False: 1]
  |  |  ------------------
  ------------------
   59|      1|}
lib.c:get_stack_size_internal:
   93|  9.69k|static COLD size_t get_stack_size_internal(const pthread_attr_t *const thread_attr) {
   94|       |    /* glibc has an issue where the size of the TLS is subtracted from the stack
   95|       |     * size instead of allocated separately. As a result the specified stack
   96|       |     * size may be insufficient when used in an application with large amounts
   97|       |     * of TLS data. The following is a workaround to compensate for that.
   98|       |     * See https://sourceware.org/bugzilla/show_bug.cgi?id=11787 */
   99|  9.69k|    size_t (*const get_minstack)(const pthread_attr_t*) =
  100|  9.69k|        dlsym(RTLD_DEFAULT, "__pthread_get_minstack");
  101|  9.69k|    if (get_minstack)
  ------------------
  |  Branch (101:9): [True: 9.69k, False: 0]
  ------------------
  102|  9.69k|        return get_minstack(thread_attr) - PTHREAD_STACK_MIN;
  103|      0|    return 0;
  104|  9.69k|}
lib.c:gen_picture:
  413|   111k|{
  414|   111k|    Dav1dData *const in = &c->in;
  415|       |
  416|   111k|    if (output_picture_ready(c, 0))
  ------------------
  |  Branch (416:9): [True: 16.9k, False: 94.9k]
  ------------------
  417|  16.9k|        return 0;
  418|       |
  419|   129k|    while (in->sz > 0) {
  ------------------
  |  Branch (419:12): [True: 107k, False: 21.6k]
  ------------------
  420|   107k|        const ptrdiff_t res = dav1d_parse_obus(c, in);
  421|   107k|        if (res < 0) {
  ------------------
  |  Branch (421:13): [True: 52.3k, False: 55.4k]
  ------------------
  422|  52.3k|            dav1d_data_unref_internal(in);
  423|  55.4k|        } else {
  424|  55.4k|            assert((size_t)res <= in->sz);
  ------------------
  |  Branch (424:13): [True: 55.4k, False: 0]
  ------------------
  425|  55.4k|            in->sz -= res;
  426|  55.4k|            in->data += res;
  427|  55.4k|            if (!in->sz) dav1d_data_unref_internal(in);
  ------------------
  |  Branch (427:17): [True: 21.4k, False: 33.9k]
  ------------------
  428|  55.4k|        }
  429|   107k|        if (output_picture_ready(c, 0))
  ------------------
  |  Branch (429:13): [True: 20.9k, False: 86.7k]
  ------------------
  430|  20.9k|            break;
  431|  86.7k|        if (res < 0)
  ------------------
  |  Branch (431:13): [True: 52.3k, False: 34.4k]
  ------------------
  432|  52.3k|            return (int)res;
  433|  86.7k|    }
  434|       |
  435|  42.6k|    return 0;
  436|  94.9k|}
lib.c:output_picture_ready:
  332|   255k|static int output_picture_ready(Dav1dContext *const c, const int drain) {
  333|   255k|    if (c->cached_error) return 1;
  ------------------
  |  Branch (333:9): [True: 0, False: 255k]
  ------------------
  334|   255k|    if (!c->all_layers && c->max_spatial_id) {
  ------------------
  |  Branch (334:9): [True: 0, False: 255k]
  |  Branch (334:27): [True: 0, False: 0]
  ------------------
  335|      0|        if (c->out.p.data[0] && c->cache.p.data[0]) {
  ------------------
  |  Branch (335:13): [True: 0, False: 0]
  |  Branch (335:33): [True: 0, False: 0]
  ------------------
  336|      0|            if (c->max_spatial_id == c->cache.p.frame_hdr->spatial_id ||
  ------------------
  |  Branch (336:17): [True: 0, False: 0]
  ------------------
  337|      0|                c->out.flags & PICTURE_FLAG_NEW_TEMPORAL_UNIT)
  ------------------
  |  Branch (337:17): [True: 0, False: 0]
  ------------------
  338|      0|                return 1;
  339|      0|            dav1d_thread_picture_unref(&c->cache);
  340|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  341|      0|            return 0;
  342|      0|        } else if (c->cache.p.data[0] && drain) {
  ------------------
  |  Branch (342:20): [True: 0, False: 0]
  |  Branch (342:42): [True: 0, False: 0]
  ------------------
  343|      0|            return 1;
  344|      0|        } else if (c->out.p.data[0]) {
  ------------------
  |  Branch (344:20): [True: 0, False: 0]
  ------------------
  345|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  346|      0|            return 0;
  347|      0|        }
  348|      0|    }
  349|       |
  350|   255k|    return !!c->out.p.data[0];
  351|   255k|}
lib.c:output_image:
  312|  20.9k|{
  313|  20.9k|    int res = 0;
  314|       |
  315|  20.9k|    Dav1dThreadPicture *const in = (c->all_layers || !c->max_spatial_id)
  ------------------
  |  Branch (315:37): [True: 20.9k, False: 0]
  |  Branch (315:54): [True: 0, False: 0]
  ------------------
  316|  20.9k|                                   ? &c->out : &c->cache;
  317|  20.9k|    if (!c->apply_grain || !has_grain(&in->p)) {
  ------------------
  |  Branch (317:9): [True: 0, False: 20.9k]
  |  Branch (317:28): [True: 15.9k, False: 5.01k]
  ------------------
  318|  15.9k|        dav1d_picture_move_ref(out, &in->p);
  319|  15.9k|        dav1d_thread_picture_unref(in);
  320|  15.9k|        goto end;
  321|  15.9k|    }
  322|       |
  323|  5.01k|    res = dav1d_apply_grain(c, out, &in->p);
  324|  5.01k|    dav1d_thread_picture_unref(in);
  325|  20.9k|end:
  326|  20.9k|    if (!c->all_layers && c->max_spatial_id && c->out.p.data[0]) {
  ------------------
  |  Branch (326:9): [True: 0, False: 20.9k]
  |  Branch (326:27): [True: 0, False: 0]
  |  Branch (326:48): [True: 0, False: 0]
  ------------------
  327|      0|        dav1d_thread_picture_move_ref(in, &c->out);
  328|      0|    }
  329|  20.9k|    return res;
  330|  5.01k|}
lib.c:has_grain:
  304|  25.9k|{
  305|  25.9k|    const Dav1dFilmGrainData *fgdata = &pic->frame_hdr->film_grain.data;
  306|  25.9k|    return fgdata->num_y_points || fgdata->num_uv_points[0] ||
  ------------------
  |  Branch (306:12): [True: 6.83k, False: 19.1k]
  |  Branch (306:36): [True: 1.10k, False: 18.0k]
  ------------------
  307|  18.0k|           fgdata->num_uv_points[1] || (fgdata->clip_to_restricted_range &&
  ------------------
  |  Branch (307:12): [True: 890, False: 17.1k]
  |  Branch (307:41): [True: 1.45k, False: 15.7k]
  ------------------
  308|  1.45k|                                        fgdata->chroma_scaling_from_luma);
  ------------------
  |  Branch (308:41): [True: 1.20k, False: 252]
  ------------------
  309|  25.9k|}
lib.c:close_internal:
  610|  9.69k|static COLD void close_internal(Dav1dContext **const c_out, int flush) {
  611|  9.69k|    Dav1dContext *const c = *c_out;
  612|  9.69k|    if (!c) return;
  ------------------
  |  Branch (612:9): [True: 0, False: 9.69k]
  ------------------
  613|       |
  614|  9.69k|    if (flush) dav1d_flush(c);
  ------------------
  |  Branch (614:9): [True: 9.69k, False: 0]
  ------------------
  615|       |
  616|  9.69k|    if (c->tc) {
  ------------------
  |  Branch (616:9): [True: 9.69k, False: 0]
  ------------------
  617|  9.69k|        struct TaskThreadData *ttd = &c->task_thread;
  618|  9.69k|        if (ttd->inited) {
  ------------------
  |  Branch (618:13): [True: 0, False: 9.69k]
  ------------------
  619|      0|            pthread_mutex_lock(&ttd->lock);
  620|      0|            for (unsigned n = 0; n < c->n_tc && c->tc[n].task_thread.td.inited; n++)
  ------------------
  |  Branch (620:34): [True: 0, False: 0]
  |  Branch (620:49): [True: 0, False: 0]
  ------------------
  621|      0|                c->tc[n].task_thread.die = 1;
  622|      0|            pthread_cond_broadcast(&ttd->cond);
  623|      0|            pthread_mutex_unlock(&ttd->lock);
  624|      0|            for (unsigned n = 0; n < c->n_tc; n++) {
  ------------------
  |  Branch (624:34): [True: 0, False: 0]
  ------------------
  625|      0|                Dav1dTaskContext *const pf = &c->tc[n];
  626|      0|                if (!pf->task_thread.td.inited) break;
  ------------------
  |  Branch (626:21): [True: 0, False: 0]
  ------------------
  627|      0|                pthread_join(pf->task_thread.td.thread, NULL);
  628|      0|                pthread_cond_destroy(&pf->task_thread.td.cond);
  629|      0|                pthread_mutex_destroy(&pf->task_thread.td.lock);
  630|      0|            }
  631|      0|            pthread_cond_destroy(&ttd->delayed_fg.cond);
  632|      0|            pthread_cond_destroy(&ttd->cond);
  633|      0|            pthread_mutex_destroy(&ttd->lock);
  634|      0|        }
  635|  9.69k|        dav1d_free_aligned(c->tc);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  636|  9.69k|    }
  637|       |
  638|  19.3k|    for (unsigned n = 0; c->fc && n < c->n_fc; n++) {
  ------------------
  |  Branch (638:26): [True: 19.3k, False: 0]
  |  Branch (638:35): [True: 9.69k, False: 9.69k]
  ------------------
  639|  9.69k|        Dav1dFrameContext *const f = &c->fc[n];
  640|       |
  641|       |        // clean-up threading stuff
  642|  9.69k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (642:13): [True: 0, False: 9.69k]
  ------------------
  643|      0|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  644|      0|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  645|      0|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  646|      0|            dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  647|      0|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  648|      0|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  649|      0|            dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|      0|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  650|      0|        }
  651|  9.69k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (651:13): [True: 0, False: 9.69k]
  ------------------
  652|      0|            pthread_mutex_destroy(&f->task_thread.pending_tasks.lock);
  653|      0|            pthread_cond_destroy(&f->task_thread.cond);
  654|      0|            pthread_mutex_destroy(&f->task_thread.lock);
  655|      0|        }
  656|  9.69k|        dav1d_free(f->frame_thread.frame_progress);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  657|  9.69k|        dav1d_free(f->task_thread.tasks);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  658|  9.69k|        dav1d_free(f->task_thread.tile_tasks[0]);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  659|  9.69k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  660|  9.69k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  661|  9.69k|        dav1d_free(f->a);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  662|  9.69k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  663|  9.69k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  664|  9.69k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  665|  9.69k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  666|  9.69k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  667|  9.69k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  668|  9.69k|        dav1d_free_aligned(f->rf.r);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  669|  9.69k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  670|  9.69k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  671|  9.69k|    }
  672|  9.69k|    dav1d_free_aligned(c->fc);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  673|  9.69k|    if (c->n_fc > 1 && c->frame_thread.out_delayed) {
  ------------------
  |  Branch (673:9): [True: 0, False: 9.69k]
  |  Branch (673:24): [True: 0, False: 0]
  ------------------
  674|      0|        for (unsigned n = 0; n < c->n_fc; n++)
  ------------------
  |  Branch (674:30): [True: 0, False: 0]
  ------------------
  675|      0|            if (c->frame_thread.out_delayed[n].p.frame_hdr)
  ------------------
  |  Branch (675:17): [True: 0, False: 0]
  ------------------
  676|      0|                dav1d_thread_picture_unref(&c->frame_thread.out_delayed[n]);
  677|      0|        dav1d_free(c->frame_thread.out_delayed);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  678|      0|    }
  679|  9.83k|    for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (679:21): [True: 140, False: 9.69k]
  ------------------
  680|    140|        dav1d_data_unref_internal(&c->tile[n].data);
  681|  9.69k|    dav1d_free(c->tile);
  ------------------
  |  |  135|  9.69k|#define dav1d_free(ptr) free(ptr)
  ------------------
  682|  87.2k|    for (int n = 0; n < 8; n++) {
  ------------------
  |  Branch (682:21): [True: 77.5k, False: 9.69k]
  ------------------
  683|  77.5k|        dav1d_cdf_thread_unref(&c->cdf[n]);
  684|  77.5k|        if (c->refs[n].p.p.frame_hdr)
  ------------------
  |  Branch (684:13): [True: 0, False: 77.5k]
  ------------------
  685|      0|            dav1d_thread_picture_unref(&c->refs[n].p);
  686|  77.5k|        dav1d_ref_dec(&c->refs[n].refmvs);
  687|  77.5k|        dav1d_ref_dec(&c->refs[n].segmap);
  688|  77.5k|    }
  689|  9.69k|    dav1d_ref_dec(&c->seq_hdr_ref);
  690|  9.69k|    dav1d_ref_dec(&c->frame_hdr_ref);
  691|       |
  692|  9.69k|    dav1d_ref_dec(&c->mastering_display_ref);
  693|  9.69k|    dav1d_ref_dec(&c->content_light_ref);
  694|  9.69k|    dav1d_ref_dec(&c->itut_t35_ref);
  695|       |
  696|  9.69k|    dav1d_mem_pool_end(c->seq_hdr_pool);
  697|  9.69k|    dav1d_mem_pool_end(c->frame_hdr_pool);
  698|  9.69k|    dav1d_mem_pool_end(c->segmap_pool);
  699|  9.69k|    dav1d_mem_pool_end(c->refmvs_pool);
  700|  9.69k|    dav1d_mem_pool_end(c->cdf_pool);
  701|  9.69k|    dav1d_mem_pool_end(c->picture_pool);
  702|  9.69k|    dav1d_mem_pool_end(c->pic_ctx_pool);
  703|       |
  704|  9.69k|    dav1d_freep_aligned(c_out);
  705|  9.69k|}

dav1d_loop_filter_dsp_init_8bpc:
  259|  3.47k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  3.47k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  3.47k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  3.47k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  3.47k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  3.47k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  3.47k|#endif
  275|  3.47k|#endif
  276|  3.47k|}
dav1d_loop_filter_dsp_init_16bpc:
  259|  4.68k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  4.68k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  4.68k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  4.68k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  4.68k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  4.68k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  4.68k|#endif
  275|  4.68k|#endif
  276|  4.68k|}

dav1d_loop_restoration_dsp_init_8bpc:
 1367|  3.47k|{
 1368|  3.47k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  3.47k|    c->sgr[0] = sgr_5x5_c;
 1370|  3.47k|    c->sgr[1] = sgr_3x3_c;
 1371|  3.47k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  3.47k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  3.47k|#endif
 1383|  3.47k|#endif
 1384|  3.47k|}
looprestoration_tmpl.c:sgr_5x5_c:
  830|  5.02k|{
  831|  5.02k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.02k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.02k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  832|  5.02k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.02k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.02k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  833|  5.02k|    int32_t *sumsq_ptrs[5], *sumsq_rows[5];
  834|  5.02k|    coef *sum_ptrs[5], *sum_rows[5];
  835|  30.1k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (835:21): [True: 25.1k, False: 5.02k]
  ------------------
  836|  25.1k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  25.1k|#define BUF_STRIDE (384 + 16)
  ------------------
  837|  25.1k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  25.1k|#define BUF_STRIDE (384 + 16)
  ------------------
  838|  25.1k|    }
  839|       |
  840|  5.02k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.02k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.02k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  841|  5.02k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.02k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.02k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  842|  5.02k|    int32_t *A_ptrs[2];
  843|  5.02k|    coef *B_ptrs[2];
  844|  15.0k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (844:21): [True: 10.0k, False: 5.02k]
  ------------------
  845|  10.0k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  10.0k|#define BUF_STRIDE (384 + 16)
  ------------------
  846|  10.0k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  10.0k|#define BUF_STRIDE (384 + 16)
  ------------------
  847|  10.0k|    }
  848|  5.02k|    const pixel *src = dst;
  849|  5.02k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  5.02k|#define PXSTRIDE(x) (x)
  ------------------
  850|       |
  851|  5.02k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (851:9): [True: 2.38k, False: 2.64k]
  ------------------
  852|  2.38k|        sumsq_ptrs[0] = sumsq_rows[0];
  853|  2.38k|        sumsq_ptrs[1] = sumsq_rows[0];
  854|  2.38k|        sumsq_ptrs[2] = sumsq_rows[1];
  855|  2.38k|        sumsq_ptrs[3] = sumsq_rows[2];
  856|  2.38k|        sumsq_ptrs[4] = sumsq_rows[3];
  857|  2.38k|        sum_ptrs[0] = sum_rows[0];
  858|  2.38k|        sum_ptrs[1] = sum_rows[0];
  859|  2.38k|        sum_ptrs[2] = sum_rows[1];
  860|  2.38k|        sum_ptrs[3] = sum_rows[2];
  861|  2.38k|        sum_ptrs[4] = sum_rows[3];
  862|       |
  863|  2.38k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  864|  2.38k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.38k|#define PXSTRIDE(x) (x)
  ------------------
  865|  2.38k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  866|       |
  867|  2.38k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  868|  2.38k|        left++;
  869|  2.38k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.38k|#define PXSTRIDE(x) (x)
  ------------------
  870|       |
  871|  2.38k|        if (--h <= 0)
  ------------------
  |  Branch (871:13): [True: 289, False: 2.09k]
  ------------------
  872|    289|            goto vert_1;
  873|       |
  874|  2.09k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  875|  2.09k|        left++;
  876|  2.09k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.09k|#define PXSTRIDE(x) (x)
  ------------------
  877|  2.09k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  878|  2.09k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.09k|#define BITDEPTH_MAX 0xff
  ------------------
  879|  2.09k|        rotate(A_ptrs, B_ptrs, 2);
  880|       |
  881|  2.09k|        if (--h <= 0)
  ------------------
  |  Branch (881:13): [True: 276, False: 1.81k]
  ------------------
  882|    276|            goto vert_2;
  883|       |
  884|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  885|       |        // one of them to point at the previously unused rows[4].
  886|  1.81k|        sumsq_ptrs[3] = sumsq_rows[4];
  887|  1.81k|        sum_ptrs[3] = sum_rows[4];
  888|  2.64k|    } else {
  889|  2.64k|        sumsq_ptrs[0] = sumsq_rows[0];
  890|  2.64k|        sumsq_ptrs[1] = sumsq_rows[0];
  891|  2.64k|        sumsq_ptrs[2] = sumsq_rows[0];
  892|  2.64k|        sumsq_ptrs[3] = sumsq_rows[0];
  893|  2.64k|        sumsq_ptrs[4] = sumsq_rows[0];
  894|  2.64k|        sum_ptrs[0] = sum_rows[0];
  895|  2.64k|        sum_ptrs[1] = sum_rows[0];
  896|  2.64k|        sum_ptrs[2] = sum_rows[0];
  897|  2.64k|        sum_ptrs[3] = sum_rows[0];
  898|  2.64k|        sum_ptrs[4] = sum_rows[0];
  899|       |
  900|  2.64k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  901|  2.64k|        left++;
  902|  2.64k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.64k|#define PXSTRIDE(x) (x)
  ------------------
  903|       |
  904|  2.64k|        if (--h <= 0)
  ------------------
  |  Branch (904:13): [True: 293, False: 2.35k]
  ------------------
  905|    293|            goto vert_1;
  906|       |
  907|  2.35k|        sumsq_ptrs[4] = sumsq_rows[1];
  908|  2.35k|        sum_ptrs[4] = sum_rows[1];
  909|       |
  910|  2.35k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], left, src, w, edges);
  911|  2.35k|        left++;
  912|  2.35k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.35k|#define PXSTRIDE(x) (x)
  ------------------
  913|       |
  914|  2.35k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  915|  2.35k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.35k|#define BITDEPTH_MAX 0xff
  ------------------
  916|  2.35k|        rotate(A_ptrs, B_ptrs, 2);
  917|       |
  918|  2.35k|        if (--h <= 0)
  ------------------
  |  Branch (918:13): [True: 439, False: 1.91k]
  ------------------
  919|    439|            goto vert_2;
  920|       |
  921|  1.91k|        sumsq_ptrs[3] = sumsq_rows[2];
  922|  1.91k|        sumsq_ptrs[4] = sumsq_rows[3];
  923|  1.91k|        sum_ptrs[3] = sum_rows[2];
  924|  1.91k|        sum_ptrs[4] = sum_rows[3];
  925|       |
  926|  1.91k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  927|  1.91k|        left++;
  928|  1.91k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.91k|#define PXSTRIDE(x) (x)
  ------------------
  929|       |
  930|  1.91k|        if (--h <= 0)
  ------------------
  |  Branch (930:13): [True: 263, False: 1.65k]
  ------------------
  931|    263|            goto odd;
  932|       |
  933|  1.65k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  934|  1.65k|        left++;
  935|  1.65k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.65k|#define PXSTRIDE(x) (x)
  ------------------
  936|       |
  937|  1.65k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  938|  1.65k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.65k|#define BITDEPTH_MAX 0xff
  ------------------
  939|  1.65k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  940|  1.65k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  941|       |
  942|  1.65k|        if (--h <= 0)
  ------------------
  |  Branch (942:13): [True: 285, False: 1.36k]
  ------------------
  943|    285|            goto vert_2;
  944|       |
  945|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  946|       |        // one of them to point at the previously unused rows[4].
  947|  1.36k|        sumsq_ptrs[3] = sumsq_rows[4];
  948|  1.36k|        sum_ptrs[3] = sum_rows[4];
  949|  1.36k|    }
  950|       |
  951|  73.7k|    do {
  952|  73.7k|        sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], left, src, w, edges);
  953|  73.7k|        left++;
  954|  73.7k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  73.7k|#define PXSTRIDE(x) (x)
  ------------------
  955|       |
  956|  73.7k|        if (--h <= 0)
  ------------------
  |  Branch (956:13): [True: 313, False: 73.4k]
  ------------------
  957|    313|            goto odd;
  958|       |
  959|  73.4k|        sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], left, src, w, edges);
  960|  73.4k|        left++;
  961|  73.4k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  73.4k|#define PXSTRIDE(x) (x)
  ------------------
  962|       |
  963|  73.4k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  964|  73.4k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  73.4k|#define BITDEPTH_MAX 0xff
  ------------------
  965|  73.4k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  966|  73.4k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  967|  73.4k|    } while (--h > 0);
  ------------------
  |  Branch (967:14): [True: 70.5k, False: 2.86k]
  ------------------
  968|       |
  969|  2.86k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (969:9): [True: 418, False: 2.45k]
  ------------------
  970|    418|        goto vert_2;
  971|       |
  972|  2.45k|    sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], NULL, lpf_bottom, w, edges);
  973|  2.45k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.45k|#define PXSTRIDE(x) (x)
  ------------------
  974|  2.45k|    sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], NULL, lpf_bottom, w, edges);
  975|       |
  976|  3.86k|output_2:
  977|  3.86k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  978|  3.86k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.86k|#define BITDEPTH_MAX 0xff
  ------------------
  979|  3.86k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  980|  3.86k|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  981|  3.86k|    return;
  982|       |
  983|  1.41k|vert_2:
  984|       |    // Duplicate the last row twice more
  985|  1.41k|    sumsq_ptrs[3] = sumsq_ptrs[2];
  986|  1.41k|    sumsq_ptrs[4] = sumsq_ptrs[2];
  987|  1.41k|    sum_ptrs[3] = sum_ptrs[2];
  988|  1.41k|    sum_ptrs[4] = sum_ptrs[2];
  989|  1.41k|    goto output_2;
  990|       |
  991|    576|odd:
  992|       |    // Copy the last row as padding once
  993|    576|    sumsq_ptrs[4] = sumsq_ptrs[3];
  994|    576|    sum_ptrs[4] = sum_ptrs[3];
  995|       |
  996|    576|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  997|    576|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    576|#define BITDEPTH_MAX 0xff
  ------------------
  998|    576|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  999|    576|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1000|       |
 1001|  1.15k|output_1:
 1002|       |    // Duplicate the last row twice more
 1003|  1.15k|    sumsq_ptrs[3] = sumsq_ptrs[2];
 1004|  1.15k|    sumsq_ptrs[4] = sumsq_ptrs[2];
 1005|  1.15k|    sum_ptrs[3] = sum_ptrs[2];
 1006|  1.15k|    sum_ptrs[4] = sum_ptrs[2];
 1007|       |
 1008|  1.15k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1009|  1.15k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.15k|#define BITDEPTH_MAX 0xff
  ------------------
 1010|       |    // Output only one row
 1011|  1.15k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
 1012|  1.15k|                w, 1, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1013|  1.15k|    return;
 1014|       |
 1015|    582|vert_1:
 1016|       |    // Copy the last row as padding once
 1017|    582|    sumsq_ptrs[4] = sumsq_ptrs[3];
 1018|    582|    sum_ptrs[4] = sum_ptrs[3];
 1019|       |
 1020|    582|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1021|    582|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    582|#define BITDEPTH_MAX 0xff
  ------------------
 1022|    582|    rotate(A_ptrs, B_ptrs, 2);
 1023|       |
 1024|    582|    goto output_1;
 1025|    576|}
looprestoration_tmpl.c:sgr_box5_row_h:
  441|   339k|{
  442|   339k|    sumsq++;
  443|   339k|    sum++;
  444|   339k|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][1] : src[-3]) : src[0];
  ------------------
  |  Branch (444:13): [True: 278k, False: 60.8k]
  |  Branch (444:37): [True: 262k, False: 15.6k]
  ------------------
  445|   339k|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (445:13): [True: 278k, False: 60.8k]
  |  Branch (445:37): [True: 262k, False: 15.6k]
  ------------------
  446|   339k|    int c = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (446:13): [True: 278k, False: 60.8k]
  |  Branch (446:37): [True: 262k, False: 15.6k]
  ------------------
  447|   339k|    int d = src[0];
  448|  44.4M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (448:22): [True: 44.1M, False: 339k]
  ------------------
  449|  44.1M|        int e = (x + 2 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 2] : src[w - 1];
  ------------------
  |  Branch (449:18): [True: 43.1M, False: 1.01M]
  |  Branch (449:31): [True: 827k, False: 190k]
  ------------------
  450|  44.1M|        sum[x] = a + b + c + d + e;
  451|  44.1M|        sumsq[x] = a * a + b * b + c * c + d * d + e * e;
  452|  44.1M|        a = b;
  453|  44.1M|        b = c;
  454|  44.1M|        c = d;
  455|  44.1M|        d = e;
  456|  44.1M|    }
  457|   339k|}
looprestoration_tmpl.c:sgr_box5_vert:
  537|   171k|{
  538|   171k|    sgr_box5_row_v(sumsq, sum, sumsq_out, sum_out, w);
  539|   171k|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 25, 164);
  540|   171k|    rotate5_x2(sumsq, sum);
  541|   171k|}
looprestoration_tmpl.c:sgr_box5_row_v:
  488|   171k|{
  489|  22.2M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (489:21): [True: 22.0M, False: 171k]
  ------------------
  490|  22.0M|        int sq_a = sumsq[0][x];
  491|  22.0M|        int sq_b = sumsq[1][x];
  492|  22.0M|        int sq_c = sumsq[2][x];
  493|  22.0M|        int sq_d = sumsq[3][x];
  494|  22.0M|        int sq_e = sumsq[4][x];
  495|  22.0M|        int s_a = sum[0][x];
  496|  22.0M|        int s_b = sum[1][x];
  497|  22.0M|        int s_c = sum[2][x];
  498|  22.0M|        int s_d = sum[3][x];
  499|  22.0M|        int s_e = sum[4][x];
  500|  22.0M|        sumsq_out[x] = sq_a + sq_b + sq_c + sq_d + sq_e;
  501|  22.0M|        sum_out[x] = s_a + s_b + s_c + s_d + s_e;
  502|  22.0M|    }
  503|   171k|}
looprestoration_tmpl.c:sgr_calc_row_ab:
  507|   433k|{
  508|   433k|    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
  ------------------
  |  |   58|   433k|#define bitdepth_from_max(x) 8
  ------------------
  509|  51.4M|    for (int i = 0; i < w + 2; i++) {
  ------------------
  |  Branch (509:21): [True: 51.0M, False: 433k]
  ------------------
  510|  51.0M|        const int a =
  511|  51.0M|            (AA[i] + ((1 << (2 * bitdepth_min_8)) >> 1)) >> (2 * bitdepth_min_8);
  512|  51.0M|        const int b =
  513|  51.0M|            (BB[i] + ((1 << bitdepth_min_8) >> 1)) >> bitdepth_min_8;
  514|       |
  515|  51.0M|        const unsigned p = imax(a * n - b * b, 0);
  516|  51.0M|        const unsigned z = (p * s + (1 << 19)) >> 20;
  517|  51.0M|        const unsigned x = dav1d_sgr_x_by_x[umin(z, 255)];
  518|       |
  519|       |        // This is where we invert A and B, so that B is of size coef.
  520|  51.0M|        AA[i] = (x * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
  521|  51.0M|        BB[i] = x;
  522|  51.0M|    }
  523|   433k|}
looprestoration_tmpl.c:rotate5_x2:
  402|   171k|{
  403|   171k|    int32_t *tmp32[2];
  404|   171k|    coef *tmpc[2];
  405|   515k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (405:21): [True: 343k, False: 171k]
  ------------------
  406|   343k|        tmp32[i] = sumsq_ptrs[i];
  407|   343k|        tmpc[i] = sum_ptrs[i];
  408|   343k|    }
  409|   686k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (409:21): [True: 515k, False: 171k]
  ------------------
  410|   515k|        sumsq_ptrs[i] = sumsq_ptrs[i + 2];
  411|   515k|        sum_ptrs[i] = sum_ptrs[i + 2];
  412|   515k|    }
  413|   515k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (413:21): [True: 343k, False: 171k]
  ------------------
  414|   343k|        sumsq_ptrs[3 + i] = tmp32[i];
  415|   343k|        sum_ptrs[3 + i] = tmpc[i];
  416|   343k|    }
  417|   171k|}
looprestoration_tmpl.c:rotate:
  390|   696k|{
  391|   696k|    int32_t *tmp32 = sumsq_ptrs[0];
  392|   696k|    coef *tmpc = sum_ptrs[0];
  393|  2.09M|    for (int i = 0; i < n - 1; i++) {
  ------------------
  |  Branch (393:21): [True: 1.39M, False: 696k]
  ------------------
  394|  1.39M|        sumsq_ptrs[i] = sumsq_ptrs[i + 1];
  395|  1.39M|        sum_ptrs[i] = sum_ptrs[i + 1];
  396|  1.39M|    }
  397|   696k|    sumsq_ptrs[n - 1] = tmp32;
  398|   696k|    sum_ptrs[n - 1] = tmpc;
  399|   696k|}
looprestoration_tmpl.c:sgr_finish2:
  645|  80.6k|{
  646|  80.6k|    ALIGN_STK_16(coef, tmp, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  80.6k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  80.6k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  647|       |
  648|  80.6k|    sgr_finish_filter2(tmp, *dst, stride, A_ptrs, B_ptrs, w, h);
  649|  80.6k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  650|  80.6k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  80.6k|#define PXSTRIDE(x) (x)
  ------------------
  651|  80.6k|    if (h > 1) {
  ------------------
  |  Branch (651:9): [True: 79.5k, False: 1.15k]
  ------------------
  652|  79.5k|        sgr_weighted_row1(*dst, tmp + FILTER_OUT_STRIDE, w, w1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |  572|  79.5k|#define FILTER_OUT_STRIDE (384)
  ------------------
  653|  79.5k|        *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  79.5k|#define PXSTRIDE(x) (x)
  ------------------
  654|  79.5k|    }
  655|  80.6k|    rotate(A_ptrs, B_ptrs, 2);
  656|  80.6k|}
looprestoration_tmpl.c:sgr_finish_filter2:
  579|   161k|{
  580|   161k|#define SIX_NEIGHBORS(P, i)\
  581|   161k|    ((P[0][i]     + P[1][i]) * 6 +   \
  582|   161k|     (P[0][i - 1] + P[1][i - 1] +    \
  583|   161k|      P[0][i + 1] + P[1][i + 1]) * 5)
  584|  20.7M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (584:21): [True: 20.5M, False: 161k]
  ------------------
  585|  20.5M|        const int a = SIX_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  581|  20.5M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  20.5M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  20.5M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  586|  20.5M|        const int b = SIX_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  581|  20.5M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  20.5M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  20.5M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  587|  20.5M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  588|  20.5M|    }
  589|   161k|    if (h <= 1)
  ------------------
  |  Branch (589:9): [True: 2.64k, False: 158k]
  ------------------
  590|  2.64k|        return;
  591|   158k|    tmp += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   158k|#define FILTER_OUT_STRIDE (384)
  ------------------
  592|   158k|    src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|   158k|#define PXSTRIDE(x) (x)
  ------------------
  593|   158k|    const int32_t *A = &A_ptrs[1][1];
  594|   158k|    const coef *B = &B_ptrs[1][1];
  595|  20.4M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (595:21): [True: 20.3M, False: 158k]
  ------------------
  596|  20.3M|        const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
  597|  20.3M|        const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
  598|  20.3M|        tmp[i] = (b - a * src[i] + (1 << 7)) >> 8;
  599|  20.3M|    }
  600|   158k|#undef SIX_NEIGHBORS
  601|   158k|}
looprestoration_tmpl.c:sgr_weighted_row1:
  605|   245k|{
  606|  33.8M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (606:21): [True: 33.5M, False: 245k]
  ------------------
  607|  33.5M|        const int v = w1 * t1[i];
  608|  33.5M|        dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  33.5M|#define iclip_pixel iclip_u8
  ------------------
  609|  33.5M|    }
  610|   245k|}
looprestoration_tmpl.c:sgr_3x3_c:
  684|  2.84k|{
  685|  2.84k|#define BUF_STRIDE (384 + 16)
  686|  2.84k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  2.84k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  2.84k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  687|  2.84k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  2.84k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  2.84k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  688|  2.84k|    int32_t *sumsq_ptrs[3], *sumsq_rows[3];
  689|  2.84k|    coef *sum_ptrs[3], *sum_rows[3];
  690|  11.3k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (690:21): [True: 8.52k, False: 2.84k]
  ------------------
  691|  8.52k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  8.52k|#define BUF_STRIDE (384 + 16)
  ------------------
  692|  8.52k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  8.52k|#define BUF_STRIDE (384 + 16)
  ------------------
  693|  8.52k|    }
  694|       |
  695|  2.84k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  2.84k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  2.84k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  696|  2.84k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  2.84k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  2.84k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  697|  2.84k|    int32_t *A_ptrs[3];
  698|  2.84k|    coef *B_ptrs[3];
  699|  11.3k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (699:21): [True: 8.52k, False: 2.84k]
  ------------------
  700|  8.52k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  8.52k|#define BUF_STRIDE (384 + 16)
  ------------------
  701|  8.52k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  8.52k|#define BUF_STRIDE (384 + 16)
  ------------------
  702|  8.52k|    }
  703|  2.84k|    const pixel *src = dst;
  704|  2.84k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  2.84k|#define PXSTRIDE(x) (x)
  ------------------
  705|       |
  706|  2.84k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (706:9): [True: 1.34k, False: 1.49k]
  ------------------
  707|  1.34k|        sumsq_ptrs[0] = sumsq_rows[0];
  708|  1.34k|        sumsq_ptrs[1] = sumsq_rows[1];
  709|  1.34k|        sumsq_ptrs[2] = sumsq_rows[2];
  710|  1.34k|        sum_ptrs[0] = sum_rows[0];
  711|  1.34k|        sum_ptrs[1] = sum_rows[1];
  712|  1.34k|        sum_ptrs[2] = sum_rows[2];
  713|       |
  714|  1.34k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  715|  1.34k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.34k|#define PXSTRIDE(x) (x)
  ------------------
  716|  1.34k|        sgr_box3_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  717|       |
  718|  1.34k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  719|  1.34k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.34k|#define BITDEPTH_MAX 0xff
  ------------------
  720|  1.34k|        left++;
  721|  1.34k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.34k|#define PXSTRIDE(x) (x)
  ------------------
  722|  1.34k|        rotate(A_ptrs, B_ptrs, 3);
  723|       |
  724|  1.34k|        if (--h <= 0)
  ------------------
  |  Branch (724:13): [True: 369, False: 977]
  ------------------
  725|    369|            goto vert_1;
  726|       |
  727|    977|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  728|    977|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|    977|#define BITDEPTH_MAX 0xff
  ------------------
  729|    977|        left++;
  730|    977|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|    977|#define PXSTRIDE(x) (x)
  ------------------
  731|    977|        rotate(A_ptrs, B_ptrs, 3);
  732|       |
  733|    977|        if (--h <= 0)
  ------------------
  |  Branch (733:13): [True: 140, False: 837]
  ------------------
  734|    140|            goto vert_2;
  735|  1.49k|    } else {
  736|  1.49k|        sumsq_ptrs[0] = sumsq_rows[0];
  737|  1.49k|        sumsq_ptrs[1] = sumsq_rows[0];
  738|  1.49k|        sumsq_ptrs[2] = sumsq_rows[0];
  739|  1.49k|        sum_ptrs[0] = sum_rows[0];
  740|  1.49k|        sum_ptrs[1] = sum_rows[0];
  741|  1.49k|        sum_ptrs[2] = sum_rows[0];
  742|       |
  743|  1.49k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  744|  1.49k|        left++;
  745|  1.49k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.49k|#define PXSTRIDE(x) (x)
  ------------------
  746|       |
  747|  1.49k|        sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  748|  1.49k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.49k|#define BITDEPTH_MAX 0xff
  ------------------
  749|  1.49k|        rotate(A_ptrs, B_ptrs, 3);
  750|       |
  751|  1.49k|        if (--h <= 0)
  ------------------
  |  Branch (751:13): [True: 149, False: 1.34k]
  ------------------
  752|    149|            goto vert_1;
  753|       |
  754|  1.34k|        sumsq_ptrs[2] = sumsq_rows[1];
  755|  1.34k|        sum_ptrs[2] = sum_rows[1];
  756|       |
  757|  1.34k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  758|  1.34k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.34k|#define BITDEPTH_MAX 0xff
  ------------------
  759|  1.34k|        left++;
  760|  1.34k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.34k|#define PXSTRIDE(x) (x)
  ------------------
  761|  1.34k|        rotate(A_ptrs, B_ptrs, 3);
  762|       |
  763|  1.34k|        if (--h <= 0)
  ------------------
  |  Branch (763:13): [True: 321, False: 1.02k]
  ------------------
  764|    321|            goto vert_2;
  765|       |
  766|  1.02k|        sumsq_ptrs[2] = sumsq_rows[2];
  767|  1.02k|        sum_ptrs[2] = sum_rows[2];
  768|  1.02k|    }
  769|       |
  770|  80.5k|    do {
  771|  80.5k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  772|  80.5k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  80.5k|#define BITDEPTH_MAX 0xff
  ------------------
  773|  80.5k|        left++;
  774|  80.5k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  80.5k|#define PXSTRIDE(x) (x)
  ------------------
  775|       |
  776|  80.5k|        sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  777|  80.5k|                    w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  778|  80.5k|    } while (--h > 0);
  ------------------
  |  Branch (778:14): [True: 78.7k, False: 1.86k]
  ------------------
  779|       |
  780|  1.86k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (780:9): [True: 485, False: 1.37k]
  ------------------
  781|    485|        goto vert_2;
  782|       |
  783|  1.37k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  784|  1.37k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.37k|#define BITDEPTH_MAX 0xff
  ------------------
  785|  1.37k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.37k|#define PXSTRIDE(x) (x)
  ------------------
  786|       |
  787|  1.37k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  788|  1.37k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  789|       |
  790|  1.37k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  791|  1.37k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.37k|#define BITDEPTH_MAX 0xff
  ------------------
  792|       |
  793|  1.37k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  794|  1.37k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  795|  1.37k|    return;
  796|       |
  797|    946|vert_2:
  798|    946|    sumsq_ptrs[2] = sumsq_ptrs[1];
  799|    946|    sum_ptrs[2] = sum_ptrs[1];
  800|    946|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  801|    946|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    946|#define BITDEPTH_MAX 0xff
  ------------------
  802|       |
  803|    946|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  804|    946|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  805|       |
  806|  1.46k|output_1:
  807|  1.46k|    sumsq_ptrs[2] = sumsq_ptrs[1];
  808|  1.46k|    sum_ptrs[2] = sum_ptrs[1];
  809|  1.46k|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  810|  1.46k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.46k|#define BITDEPTH_MAX 0xff
  ------------------
  811|       |
  812|  1.46k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  813|  1.46k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  814|  1.46k|    return;
  815|       |
  816|    518|vert_1:
  817|    518|    sumsq_ptrs[2] = sumsq_ptrs[1];
  818|    518|    sum_ptrs[2] = sum_ptrs[1];
  819|    518|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  820|    518|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    518|#define BITDEPTH_MAX 0xff
  ------------------
  821|    518|    rotate(A_ptrs, B_ptrs, 3);
  822|    518|    goto output_1;
  823|    946|}
looprestoration_tmpl.c:sgr_box3_row_h:
  423|   260k|{
  424|   260k|    sumsq++;
  425|   260k|    sum++;
  426|   260k|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (426:13): [True: 222k, False: 37.7k]
  |  Branch (426:37): [True: 210k, False: 12.7k]
  ------------------
  427|   260k|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (427:13): [True: 222k, False: 37.7k]
  |  Branch (427:37): [True: 210k, False: 12.7k]
  ------------------
  428|  29.3M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (428:22): [True: 29.0M, False: 260k]
  ------------------
  429|  29.0M|        int c = (x + 1 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 1] : src[w - 1];
  ------------------
  |  Branch (429:18): [True: 28.5M, False: 521k]
  |  Branch (429:31): [True: 426k, False: 94.2k]
  ------------------
  430|  29.0M|        sum[x] = a + b + c;
  431|  29.0M|        sumsq[x] = a * a + b * b + c * c;
  432|  29.0M|        a = b;
  433|  29.0M|        b = c;
  434|  29.0M|    }
  435|   260k|}
looprestoration_tmpl.c:sgr_box3_hv:
  550|  86.9k|{
  551|  86.9k|    sgr_box3_row_h(sumsq[2], sum[2], left, src, w, edges);
  552|  86.9k|    sgr_box3_vert(sumsq, sum, AA, BB, w, s, bitdepth_max);
  553|  86.9k|}
looprestoration_tmpl.c:sgr_box3_vert:
  528|   261k|{
  529|   261k|    sgr_box3_row_v(sumsq, sum, sumsq_out, sum_out, w);
  530|   261k|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 9, 455);
  531|   261k|    rotate(sumsq, sum, 3);
  532|   261k|}
looprestoration_tmpl.c:sgr_box3_row_v:
  472|   261k|{
  473|  29.2M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (473:21): [True: 28.9M, False: 261k]
  ------------------
  474|  28.9M|        int sq_a = sumsq[0][x];
  475|  28.9M|        int sq_b = sumsq[1][x];
  476|  28.9M|        int sq_c = sumsq[2][x];
  477|  28.9M|        int s_a = sum[0][x];
  478|  28.9M|        int s_b = sum[1][x];
  479|  28.9M|        int s_c = sum[2][x];
  480|  28.9M|        sumsq_out[x] = sq_a + sq_b + sq_c;
  481|  28.9M|        sum_out[x] = s_a + s_b + s_c;
  482|  28.9M|    }
  483|   261k|}
looprestoration_tmpl.c:sgr_finish1:
  631|  85.7k|{
  632|       |    // Only one single row, no stride needed
  633|  85.7k|    ALIGN_STK_16(coef, tmp, 384,);
  ------------------
  |  |  100|  85.7k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  85.7k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  634|       |
  635|  85.7k|    sgr_finish_filter_row1(tmp, *dst, A_ptrs, B_ptrs, w);
  636|  85.7k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  637|  85.7k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|  85.7k|#define PXSTRIDE(x) (x)
  ------------------
  638|  85.7k|    rotate(A_ptrs, B_ptrs, 3);
  639|  85.7k|}
looprestoration_tmpl.c:sgr_finish_filter_row1:
  559|   244k|{
  560|   244k|#define EIGHT_NEIGHBORS(P, i)\
  561|   244k|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  562|   244k|     (P[0][i - 1] + P[2][i - 1] +                           \
  563|   244k|      P[0][i + 1] + P[2][i + 1]) * 3)
  564|  27.0M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (564:21): [True: 26.8M, False: 244k]
  ------------------
  565|  26.8M|        const int a = EIGHT_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  561|  26.8M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|  26.8M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|  26.8M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  566|  26.8M|        const int b = EIGHT_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  561|  26.8M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|  26.8M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|  26.8M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  567|  26.8M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  568|  26.8M|    }
  569|   244k|#undef EIGHT_NEIGHBORS
  570|   244k|}
looprestoration_tmpl.c:sgr_mix_c:
 1032|  5.61k|{
 1033|  5.61k|    ALIGN_STK_16(int32_t, sumsq5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1034|  5.61k|    ALIGN_STK_16(coef, sum5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1035|  5.61k|    int32_t *sumsq5_ptrs[5], *sumsq5_rows[5];
 1036|  5.61k|    coef *sum5_ptrs[5], *sum5_rows[5];
 1037|  33.6k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (1037:21): [True: 28.0k, False: 5.61k]
  ------------------
 1038|  28.0k|        sumsq5_rows[i] = &sumsq5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  28.0k|#define BUF_STRIDE (384 + 16)
  ------------------
 1039|  28.0k|        sum5_rows[i] = &sum5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  28.0k|#define BUF_STRIDE (384 + 16)
  ------------------
 1040|  28.0k|    }
 1041|  5.61k|    ALIGN_STK_16(int32_t, sumsq3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1042|  5.61k|    ALIGN_STK_16(coef, sum3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1043|  5.61k|    int32_t *sumsq3_ptrs[3], *sumsq3_rows[3];
 1044|  5.61k|    coef *sum3_ptrs[3], *sum3_rows[3];
 1045|  22.4k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1045:21): [True: 16.8k, False: 5.61k]
  ------------------
 1046|  16.8k|        sumsq3_rows[i] = &sumsq3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  16.8k|#define BUF_STRIDE (384 + 16)
  ------------------
 1047|  16.8k|        sum3_rows[i] = &sum3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  16.8k|#define BUF_STRIDE (384 + 16)
  ------------------
 1048|  16.8k|    }
 1049|       |
 1050|  5.61k|    ALIGN_STK_16(int32_t, A5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1051|  5.61k|    ALIGN_STK_16(coef, B5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1052|  5.61k|    int32_t *A5_ptrs[2];
 1053|  5.61k|    coef *B5_ptrs[2];
 1054|  16.8k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1054:21): [True: 11.2k, False: 5.61k]
  ------------------
 1055|  11.2k|        A5_ptrs[i] = &A5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  11.2k|#define BUF_STRIDE (384 + 16)
  ------------------
 1056|  11.2k|        B5_ptrs[i] = &B5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  11.2k|#define BUF_STRIDE (384 + 16)
  ------------------
 1057|  11.2k|    }
 1058|  5.61k|    ALIGN_STK_16(int32_t, A3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1059|  5.61k|    ALIGN_STK_16(coef, B3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  5.61k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.61k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1060|  5.61k|    int32_t *A3_ptrs[4];
 1061|  5.61k|    coef *B3_ptrs[4];
 1062|  28.0k|    for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (1062:21): [True: 22.4k, False: 5.61k]
  ------------------
 1063|  22.4k|        A3_ptrs[i] = &A3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  22.4k|#define BUF_STRIDE (384 + 16)
  ------------------
 1064|  22.4k|        B3_ptrs[i] = &B3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  22.4k|#define BUF_STRIDE (384 + 16)
  ------------------
 1065|  22.4k|    }
 1066|  5.61k|    const pixel *src = dst;
 1067|  5.61k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  5.61k|#define PXSTRIDE(x) (x)
  ------------------
 1068|       |
 1069|  5.61k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (1069:9): [True: 2.48k, False: 3.13k]
  ------------------
 1070|  2.48k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1071|  2.48k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1072|  2.48k|        sumsq5_ptrs[2] = sumsq5_rows[1];
 1073|  2.48k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1074|  2.48k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1075|  2.48k|        sum5_ptrs[0] = sum5_rows[0];
 1076|  2.48k|        sum5_ptrs[1] = sum5_rows[0];
 1077|  2.48k|        sum5_ptrs[2] = sum5_rows[1];
 1078|  2.48k|        sum5_ptrs[3] = sum5_rows[2];
 1079|  2.48k|        sum5_ptrs[4] = sum5_rows[3];
 1080|       |
 1081|  2.48k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1082|  2.48k|        sumsq3_ptrs[1] = sumsq3_rows[1];
 1083|  2.48k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1084|  2.48k|        sum3_ptrs[0] = sum3_rows[0];
 1085|  2.48k|        sum3_ptrs[1] = sum3_rows[1];
 1086|  2.48k|        sum3_ptrs[2] = sum3_rows[2];
 1087|       |
 1088|  2.48k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1089|  2.48k|                        sumsq5_rows[0], sum5_rows[0],
 1090|  2.48k|                        NULL, lpf, w, edges);
 1091|  2.48k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.48k|#define PXSTRIDE(x) (x)
  ------------------
 1092|  2.48k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1093|  2.48k|                        sumsq5_rows[1], sum5_rows[1],
 1094|  2.48k|                        NULL, lpf, w, edges);
 1095|       |
 1096|  2.48k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1097|  2.48k|                        sumsq5_rows[2], sum5_rows[2],
 1098|  2.48k|                        left, src, w, edges);
 1099|  2.48k|        left++;
 1100|  2.48k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.48k|#define PXSTRIDE(x) (x)
  ------------------
 1101|       |
 1102|  2.48k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1103|  2.48k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.48k|#define BITDEPTH_MAX 0xff
  ------------------
 1104|  2.48k|        rotate(A3_ptrs, B3_ptrs, 4);
 1105|       |
 1106|  2.48k|        if (--h <= 0)
  ------------------
  |  Branch (1106:13): [True: 537, False: 1.94k]
  ------------------
 1107|    537|            goto vert_1;
 1108|       |
 1109|  1.94k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1110|  1.94k|                        sumsq5_rows[3], sum5_rows[3],
 1111|  1.94k|                        left, src, w, edges);
 1112|  1.94k|        left++;
 1113|  1.94k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.94k|#define PXSTRIDE(x) (x)
  ------------------
 1114|  1.94k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1115|  1.94k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.94k|#define BITDEPTH_MAX 0xff
  ------------------
 1116|  1.94k|        rotate(A5_ptrs, B5_ptrs, 2);
 1117|  1.94k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1118|  1.94k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.94k|#define BITDEPTH_MAX 0xff
  ------------------
 1119|  1.94k|        rotate(A3_ptrs, B3_ptrs, 4);
 1120|       |
 1121|  1.94k|        if (--h <= 0)
  ------------------
  |  Branch (1121:13): [True: 282, False: 1.66k]
  ------------------
 1122|    282|            goto vert_2;
 1123|       |
 1124|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1125|       |        // one of them to point at the previously unused rows[4].
 1126|  1.66k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1127|  1.66k|        sum5_ptrs[3] = sum5_rows[4];
 1128|  3.13k|    } else {
 1129|  3.13k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1130|  3.13k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1131|  3.13k|        sumsq5_ptrs[2] = sumsq5_rows[0];
 1132|  3.13k|        sumsq5_ptrs[3] = sumsq5_rows[0];
 1133|  3.13k|        sumsq5_ptrs[4] = sumsq5_rows[0];
 1134|  3.13k|        sum5_ptrs[0] = sum5_rows[0];
 1135|  3.13k|        sum5_ptrs[1] = sum5_rows[0];
 1136|  3.13k|        sum5_ptrs[2] = sum5_rows[0];
 1137|  3.13k|        sum5_ptrs[3] = sum5_rows[0];
 1138|  3.13k|        sum5_ptrs[4] = sum5_rows[0];
 1139|       |
 1140|  3.13k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1141|  3.13k|        sumsq3_ptrs[1] = sumsq3_rows[0];
 1142|  3.13k|        sumsq3_ptrs[2] = sumsq3_rows[0];
 1143|  3.13k|        sum3_ptrs[0] = sum3_rows[0];
 1144|  3.13k|        sum3_ptrs[1] = sum3_rows[0];
 1145|  3.13k|        sum3_ptrs[2] = sum3_rows[0];
 1146|       |
 1147|  3.13k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1148|  3.13k|                        sumsq5_rows[0], sum5_rows[0],
 1149|  3.13k|                        left, src, w, edges);
 1150|  3.13k|        left++;
 1151|  3.13k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  3.13k|#define PXSTRIDE(x) (x)
  ------------------
 1152|       |
 1153|  3.13k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1154|  3.13k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.13k|#define BITDEPTH_MAX 0xff
  ------------------
 1155|  3.13k|        rotate(A3_ptrs, B3_ptrs, 4);
 1156|       |
 1157|  3.13k|        if (--h <= 0)
  ------------------
  |  Branch (1157:13): [True: 356, False: 2.77k]
  ------------------
 1158|    356|            goto vert_1;
 1159|       |
 1160|  2.77k|        sumsq5_ptrs[4] = sumsq5_rows[1];
 1161|  2.77k|        sum5_ptrs[4] = sum5_rows[1];
 1162|       |
 1163|  2.77k|        sumsq3_ptrs[2] = sumsq3_rows[1];
 1164|  2.77k|        sum3_ptrs[2] = sum3_rows[1];
 1165|       |
 1166|  2.77k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1167|  2.77k|                        sumsq5_rows[1], sum5_rows[1],
 1168|  2.77k|                        left, src, w, edges);
 1169|  2.77k|        left++;
 1170|  2.77k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.77k|#define PXSTRIDE(x) (x)
  ------------------
 1171|       |
 1172|  2.77k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1173|  2.77k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.77k|#define BITDEPTH_MAX 0xff
  ------------------
 1174|  2.77k|        rotate(A5_ptrs, B5_ptrs, 2);
 1175|  2.77k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1176|  2.77k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.77k|#define BITDEPTH_MAX 0xff
  ------------------
 1177|  2.77k|        rotate(A3_ptrs, B3_ptrs, 4);
 1178|       |
 1179|  2.77k|        if (--h <= 0)
  ------------------
  |  Branch (1179:13): [True: 698, False: 2.07k]
  ------------------
 1180|    698|            goto vert_2;
 1181|       |
 1182|  2.07k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1183|  2.07k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1184|  2.07k|        sum5_ptrs[3] = sum5_rows[2];
 1185|  2.07k|        sum5_ptrs[4] = sum5_rows[3];
 1186|       |
 1187|  2.07k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1188|  2.07k|        sum3_ptrs[2] = sum3_rows[2];
 1189|       |
 1190|  2.07k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1191|  2.07k|                        sumsq5_rows[2], sum5_rows[2],
 1192|  2.07k|                        left, src, w, edges);
 1193|  2.07k|        left++;
 1194|  2.07k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.07k|#define PXSTRIDE(x) (x)
  ------------------
 1195|       |
 1196|  2.07k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1197|  2.07k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.07k|#define BITDEPTH_MAX 0xff
  ------------------
 1198|  2.07k|        rotate(A3_ptrs, B3_ptrs, 4);
 1199|       |
 1200|  2.07k|        if (--h <= 0)
  ------------------
  |  Branch (1200:13): [True: 317, False: 1.76k]
  ------------------
 1201|    317|            goto odd;
 1202|       |
 1203|  1.76k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1204|  1.76k|                        sumsq5_rows[3], sum5_rows[3],
 1205|  1.76k|                        left, src, w, edges);
 1206|  1.76k|        left++;
 1207|  1.76k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.76k|#define PXSTRIDE(x) (x)
  ------------------
 1208|       |
 1209|  1.76k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1210|  1.76k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.76k|#define BITDEPTH_MAX 0xff
  ------------------
 1211|  1.76k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1212|  1.76k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.76k|#define BITDEPTH_MAX 0xff
  ------------------
 1213|  1.76k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1214|  1.76k|                       w, 2, params->sgr.w0, params->sgr.w1
 1215|  1.76k|                       HIGHBD_TAIL_SUFFIX);
 1216|       |
 1217|  1.76k|        if (--h <= 0)
  ------------------
  |  Branch (1217:13): [True: 290, False: 1.47k]
  ------------------
 1218|    290|            goto vert_2;
 1219|       |
 1220|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1221|       |        // one of them to point at the previously unused rows[4].
 1222|  1.47k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1223|  1.47k|        sum5_ptrs[3] = sum5_rows[4];
 1224|  1.47k|    }
 1225|       |
 1226|  72.6k|    do {
 1227|  72.6k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1228|  72.6k|                        sumsq5_ptrs[3], sum5_ptrs[3],
 1229|  72.6k|                        left, src, w, edges);
 1230|  72.6k|        left++;
 1231|  72.6k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  72.6k|#define PXSTRIDE(x) (x)
  ------------------
 1232|       |
 1233|  72.6k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1234|  72.6k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  72.6k|#define BITDEPTH_MAX 0xff
  ------------------
 1235|  72.6k|        rotate(A3_ptrs, B3_ptrs, 4);
 1236|       |
 1237|  72.6k|        if (--h <= 0)
  ------------------
  |  Branch (1237:13): [True: 280, False: 72.4k]
  ------------------
 1238|    280|            goto odd;
 1239|       |
 1240|  72.4k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1241|  72.4k|                        sumsq5_ptrs[4], sum5_ptrs[4],
 1242|  72.4k|                        left, src, w, edges);
 1243|  72.4k|        left++;
 1244|  72.4k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  72.4k|#define PXSTRIDE(x) (x)
  ------------------
 1245|       |
 1246|  72.4k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1247|  72.4k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  72.4k|#define BITDEPTH_MAX 0xff
  ------------------
 1248|  72.4k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1249|  72.4k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  72.4k|#define BITDEPTH_MAX 0xff
  ------------------
 1250|  72.4k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1251|  72.4k|                       w, 2, params->sgr.w0, params->sgr.w1
 1252|  72.4k|                       HIGHBD_TAIL_SUFFIX);
 1253|  72.4k|    } while (--h > 0);
  ------------------
  |  Branch (1253:14): [True: 69.5k, False: 2.85k]
  ------------------
 1254|       |
 1255|  2.85k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (1255:9): [True: 293, False: 2.56k]
  ------------------
 1256|    293|        goto vert_2;
 1257|       |
 1258|  2.56k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1259|  2.56k|                    sumsq5_ptrs[3], sum5_ptrs[3],
 1260|  2.56k|                    NULL, lpf_bottom, w, edges);
 1261|  2.56k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.56k|#define PXSTRIDE(x) (x)
  ------------------
 1262|  2.56k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1263|  2.56k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.56k|#define BITDEPTH_MAX 0xff
  ------------------
 1264|  2.56k|    rotate(A3_ptrs, B3_ptrs, 4);
 1265|       |
 1266|  2.56k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1267|  2.56k|                    sumsq5_ptrs[4], sum5_ptrs[4],
 1268|  2.56k|                    NULL, lpf_bottom, w, edges);
 1269|       |
 1270|  4.12k|output_2:
 1271|  4.12k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1272|  4.12k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.12k|#define BITDEPTH_MAX 0xff
  ------------------
 1273|  4.12k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1274|  4.12k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.12k|#define BITDEPTH_MAX 0xff
  ------------------
 1275|  4.12k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1276|  4.12k|                   w, 2, params->sgr.w0, params->sgr.w1
 1277|  4.12k|                   HIGHBD_TAIL_SUFFIX);
 1278|  4.12k|    return;
 1279|       |
 1280|  1.56k|vert_2:
 1281|       |    // Duplicate the last row twice more
 1282|  1.56k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1283|  1.56k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1284|  1.56k|    sum5_ptrs[3] = sum5_ptrs[2];
 1285|  1.56k|    sum5_ptrs[4] = sum5_ptrs[2];
 1286|       |
 1287|  1.56k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1288|  1.56k|    sum3_ptrs[2] = sum3_ptrs[1];
 1289|  1.56k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1290|  1.56k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.56k|#define BITDEPTH_MAX 0xff
  ------------------
 1291|  1.56k|    rotate(A3_ptrs, B3_ptrs, 4);
 1292|       |
 1293|  1.56k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1294|  1.56k|    sum3_ptrs[2] = sum3_ptrs[1];
 1295|       |
 1296|  1.56k|    goto output_2;
 1297|       |
 1298|    597|odd:
 1299|       |    // Copy the last row as padding once
 1300|    597|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1301|    597|    sum5_ptrs[4] = sum5_ptrs[3];
 1302|       |
 1303|    597|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1304|    597|    sum3_ptrs[2] = sum3_ptrs[1];
 1305|       |
 1306|    597|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1307|    597|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    597|#define BITDEPTH_MAX 0xff
  ------------------
 1308|    597|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1309|    597|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    597|#define BITDEPTH_MAX 0xff
  ------------------
 1310|    597|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1311|    597|                   w, 2, params->sgr.w0, params->sgr.w1
 1312|    597|                   HIGHBD_TAIL_SUFFIX);
 1313|       |
 1314|  1.49k|output_1:
 1315|       |    // Duplicate the last row twice more
 1316|  1.49k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1317|  1.49k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1318|  1.49k|    sum5_ptrs[3] = sum5_ptrs[2];
 1319|  1.49k|    sum5_ptrs[4] = sum5_ptrs[2];
 1320|       |
 1321|  1.49k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1322|  1.49k|    sum3_ptrs[2] = sum3_ptrs[1];
 1323|       |
 1324|  1.49k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1325|  1.49k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.49k|#define BITDEPTH_MAX 0xff
  ------------------
 1326|  1.49k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1327|  1.49k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.49k|#define BITDEPTH_MAX 0xff
  ------------------
 1328|  1.49k|    rotate(A3_ptrs, B3_ptrs, 4);
 1329|       |    // Output only one row
 1330|  1.49k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1331|  1.49k|                   w, 1, params->sgr.w0, params->sgr.w1
 1332|  1.49k|                   HIGHBD_TAIL_SUFFIX);
 1333|  1.49k|    return;
 1334|       |
 1335|    893|vert_1:
 1336|       |    // Copy the last row as padding once
 1337|    893|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1338|    893|    sum5_ptrs[4] = sum5_ptrs[3];
 1339|       |
 1340|    893|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1341|    893|    sum3_ptrs[2] = sum3_ptrs[1];
 1342|       |
 1343|    893|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1344|    893|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    893|#define BITDEPTH_MAX 0xff
  ------------------
 1345|    893|    rotate(A5_ptrs, B5_ptrs, 2);
 1346|    893|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1347|    893|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    893|#define BITDEPTH_MAX 0xff
  ------------------
 1348|    893|    rotate(A3_ptrs, B3_ptrs, 4);
 1349|       |
 1350|    893|    goto output_1;
 1351|    597|}
looprestoration_tmpl.c:sgr_box35_row_h:
  464|   169k|{
  465|   169k|    sgr_box3_row_h(sumsq3, sum3, left, src, w, edges);
  466|   169k|    sgr_box5_row_h(sumsq5, sum5, left, src, w, edges);
  467|   169k|}
looprestoration_tmpl.c:sgr_finish_mix:
  663|  80.3k|{
  664|  80.3k|    ALIGN_STK_16(coef, tmp5, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  80.3k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  80.3k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  665|  80.3k|    ALIGN_STK_16(coef, tmp3, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|  80.3k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  80.3k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  666|       |
  667|  80.3k|    sgr_finish_filter2(tmp5, *dst, stride, A5_ptrs, B5_ptrs, w, h);
  668|  80.3k|    sgr_finish_filter_row1(tmp3, *dst, A3_ptrs, B3_ptrs, w);
  669|  80.3k|    if (h > 1)
  ------------------
  |  Branch (669:9): [True: 78.8k, False: 1.49k]
  ------------------
  670|  78.8k|        sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |  572|  78.8k|#define FILTER_OUT_STRIDE (384)
  ------------------
                      sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |   53|  78.8k|#define PXSTRIDE(x) (x)
  ------------------
  671|  78.8k|                               &A3_ptrs[1], &B3_ptrs[1], w);
  672|  80.3k|    sgr_weighted2(*dst, stride, tmp5, tmp3, w, h, w0, w1 HIGHBD_TAIL_SUFFIX);
  673|  80.3k|    *dst += h*PXSTRIDE(stride);
  ------------------
  |  |   53|  80.3k|#define PXSTRIDE(x) (x)
  ------------------
  674|  80.3k|    rotate(A5_ptrs, B5_ptrs, 2);
  675|  80.3k|    rotate(A3_ptrs, B3_ptrs, 4);
  676|  80.3k|}
looprestoration_tmpl.c:sgr_weighted2:
  616|  80.3k|{
  617|   239k|    for (int j = 0; j < h; j++) {
  ------------------
  |  Branch (617:21): [True: 159k, False: 80.3k]
  ------------------
  618|  17.2M|        for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (618:25): [True: 17.0M, False: 159k]
  ------------------
  619|  17.0M|            const int v = w0 * t1[i] + w1 * t2[i];
  620|  17.0M|            dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  17.0M|#define iclip_pixel iclip_u8
  ------------------
  621|  17.0M|        }
  622|   159k|        dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   159k|#define PXSTRIDE(x) (x)
  ------------------
  623|   159k|        t1 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   159k|#define FILTER_OUT_STRIDE (384)
  ------------------
  624|   159k|        t2 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   159k|#define FILTER_OUT_STRIDE (384)
  ------------------
  625|   159k|    }
  626|  80.3k|}
dav1d_loop_restoration_dsp_init_16bpc:
 1367|  4.68k|{
 1368|  4.68k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  4.68k|    c->sgr[0] = sgr_5x5_c;
 1370|  4.68k|    c->sgr[1] = sgr_3x3_c;
 1371|  4.68k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  4.68k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  4.68k|#endif
 1383|  4.68k|#endif
 1384|  4.68k|}

dav1d_lr_sbrow_8bpc:
  170|  20.1k|{
  171|  20.1k|    const int offset_y = 8 * !!sby;
  172|  20.1k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|  20.1k|    const int restore_planes = f->lf.restore_planes;
  174|  20.1k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|  20.1k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 16.4k, False: 3.70k]
  ------------------
  177|  16.4k|        const int h = f->sr_cur.p.p.h;
  178|  16.4k|        const int w = f->sr_cur.p.p.w;
  179|  16.4k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|  16.4k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|  16.4k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|  16.4k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  ------------------
  |  |   53|  16.4k|#define PXSTRIDE(x) (x)
  ------------------
  183|  16.4k|                 h, row_h, 0);
  184|  16.4k|    }
  185|  20.1k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 6.11k, False: 14.0k]
  ------------------
  186|  6.11k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  6.11k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  6.11k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  6.11k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  6.11k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  6.11k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  6.11k|        const int offset_uv = offset_y >> ss_ver;
  193|  6.11k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  6.11k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 1.95k, False: 4.16k]
  ------------------
  195|  1.95k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  1.95k|#define PXSTRIDE(x) (x)
  ------------------
  196|  1.95k|                     w, h, row_h, 1);
  197|       |
  198|  6.11k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 5.16k, False: 947]
  ------------------
  199|  5.16k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  5.16k|#define PXSTRIDE(x) (x)
  ------------------
  200|  5.16k|                     w, h, row_h, 2);
  201|  6.11k|    }
  202|  20.1k|}
lr_apply_tmpl.c:lr_sbrow:
  109|  41.6k|{
  110|  41.6k|    const int chroma = !!plane;
  111|  41.6k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
  112|  41.6k|    const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
  113|  41.6k|    const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
  114|       |
  115|  41.6k|    const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!plane];
  116|  41.6k|    const int unit_size = 1 << unit_size_log2;
  117|  41.6k|    const int half_unit_size = unit_size >> 1;
  118|  41.6k|    const int max_unit_size = unit_size + half_unit_size;
  119|       |
  120|       |    // Y coordinate of the sbrow (y is 8 luma pixel rows above row_y)
  121|  41.6k|    const int row_y = y + ((8 >> ss_ver) * !!y);
  122|       |
  123|       |    // FIXME This is an ugly hack to lookup the proper AV1Filter unit for
  124|       |    // chroma planes. Question: For Multithreaded decoding, is it better
  125|       |    // to store the chroma LR information with collocated Luma information?
  126|       |    // In other words. For a chroma restoration unit locate at 128,128 and
  127|       |    // with a 4:2:0 chroma subsampling, do we store the filter information at
  128|       |    // the AV1Filter unit located at (128,128) or (256,256)
  129|       |    // TODO Support chroma subsampling.
  130|  41.6k|    const int shift_hor = 7 - ss_hor;
  131|       |
  132|       |    /* maximum sbrow height is 128 + 8 rows offset */
  133|  41.6k|    ALIGN_STK_16(pixel, pre_lr_border, 2, [128 + 8][4]);
  ------------------
  |  |  100|  41.6k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  41.6k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  134|  41.6k|    const Av1RestorationUnit *lr[2];
  135|       |
  136|  41.6k|    enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) | LR_HAVE_RIGHT;
  ------------------
  |  Branch (136:31): [True: 28.2k, False: 13.4k]
  ------------------
  137|       |
  138|  41.6k|    int aligned_unit_pos = row_y & ~(unit_size - 1);
  139|  41.6k|    if (aligned_unit_pos && aligned_unit_pos + half_unit_size > h)
  ------------------
  |  Branch (139:9): [True: 26.6k, False: 15.0k]
  |  Branch (139:29): [True: 557, False: 26.1k]
  ------------------
  140|    557|        aligned_unit_pos -= unit_size;
  141|  41.6k|    aligned_unit_pos <<= ss_ver;
  142|  41.6k|    const int sb_idx = (aligned_unit_pos >> 7) * f->sr_sb128w;
  143|  41.6k|    const int unit_idx = ((aligned_unit_pos >> 6) & 1) << 1;
  144|  41.6k|    lr[0] = &f->lf.lr_mask[sb_idx].lr[plane][unit_idx];
  145|  41.6k|    int restore = lr[0]->type != DAV1D_RESTORATION_NONE;
  146|  41.6k|    int x = 0, bit = 0;
  147|  63.4k|    for (; x + max_unit_size <= w; p += unit_size, edges |= LR_HAVE_LEFT, bit ^= 1) {
  ------------------
  |  Branch (147:12): [True: 21.8k, False: 41.6k]
  ------------------
  148|  21.8k|        const int next_x = x + unit_size;
  149|  21.8k|        const int next_u_idx = unit_idx + ((next_x >> (shift_hor - 1)) & 1);
  150|  21.8k|        lr[!bit] =
  151|  21.8k|            &f->lf.lr_mask[sb_idx + (next_x >> shift_hor)].lr[plane][next_u_idx];
  152|  21.8k|        const int restore_next = lr[!bit]->type != DAV1D_RESTORATION_NONE;
  153|  21.8k|        if (restore_next)
  ------------------
  |  Branch (153:13): [True: 12.1k, False: 9.68k]
  ------------------
  154|  12.1k|            backup4xU(pre_lr_border[bit], p + unit_size - 4, p_stride, row_h - y);
  155|  21.8k|        if (restore)
  ------------------
  |  Branch (155:13): [True: 12.0k, False: 9.77k]
  ------------------
  156|  12.0k|            lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_size, row_h,
  157|  12.0k|                      lr[bit], edges);
  158|  21.8k|        x = next_x;
  159|  21.8k|        restore = restore_next;
  160|  21.8k|    }
  161|  41.6k|    if (restore) {
  ------------------
  |  Branch (161:9): [True: 6.19k, False: 35.4k]
  ------------------
  162|  6.19k|        edges &= ~LR_HAVE_RIGHT;
  163|  6.19k|        const int unit_w = w - x;
  164|  6.19k|        lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_w, row_h, lr[bit], edges);
  165|  6.19k|    }
  166|  41.6k|}
lr_apply_tmpl.c:backup4xU:
  102|  12.1k|{
  103|   713k|    for (; u > 0; u--, dst++, src += PXSTRIDE(src_stride))
  ------------------
  |  |   53|   701k|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (103:12): [True: 701k, False: 12.1k]
  ------------------
  104|   701k|        pixel_copy(dst, src, 4);
  ------------------
  |  |   47|   701k|#define pixel_copy memcpy
  ------------------
  105|  12.1k|}
lr_apply_tmpl.c:lr_stripe:
   40|  18.2k|{
   41|  18.2k|    const Dav1dDSPContext *const dsp = f->dsp;
   42|  18.2k|    const int chroma = !!plane;
   43|  18.2k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
   44|  18.2k|    const ptrdiff_t stride = f->sr_cur.p.stride[chroma];
   45|  18.2k|    const int sby = (y + (y ? 8 << ss_ver : 0)) >> (6 - ss_ver + f->seq_hdr->sb128);
  ------------------
  |  Branch (45:27): [True: 7.78k, False: 10.4k]
  ------------------
   46|  18.2k|    const int have_tt = f->c->n_tc > 1;
   47|  18.2k|    const pixel *lpf = f->lf.lr_lpf_line[plane] +
   48|  18.2k|        have_tt * (sby * (4 << f->seq_hdr->sb128) - 4) * PXSTRIDE(stride) + x;
  ------------------
  |  |   53|  18.2k|#define PXSTRIDE(x) (x)
  ------------------
   49|       |
   50|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   51|  18.2k|    int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);
   52|       |
   53|  18.2k|    looprestorationfilter_fn lr_fn;
   54|  18.2k|    LooprestorationParams params;
   55|  18.2k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (55:9): [True: 4.45k, False: 13.7k]
  ------------------
   56|  4.45k|        int16_t (*const filter)[8] = params.filter;
   57|  4.45k|        filter[0][0] = filter[0][6] = lr->filter_h[0];
   58|  4.45k|        filter[0][1] = filter[0][5] = lr->filter_h[1];
   59|  4.45k|        filter[0][2] = filter[0][4] = lr->filter_h[2];
   60|  4.45k|        filter[0][3] = -(filter[0][0] + filter[0][1] + filter[0][2]) * 2;
   61|       |#if BITDEPTH != 8
   62|       |        /* For 8-bit SIMD it's beneficial to handle the +128 separately
   63|       |         * in order to avoid overflows. */
   64|       |        filter[0][3] += 128;
   65|       |#endif
   66|       |
   67|  4.45k|        filter[1][0] = filter[1][6] = lr->filter_v[0];
   68|  4.45k|        filter[1][1] = filter[1][5] = lr->filter_v[1];
   69|  4.45k|        filter[1][2] = filter[1][4] = lr->filter_v[2];
   70|  4.45k|        filter[1][3] = 128 - (filter[1][0] + filter[1][1] + filter[1][2]) * 2;
   71|       |
   72|  4.45k|        lr_fn = dsp->lr.wiener[!(filter[0][0] | filter[1][0])];
   73|  13.7k|    } else {
   74|  13.7k|        assert(lr->type >= DAV1D_RESTORATION_SGRPROJ);
  ------------------
  |  Branch (74:9): [True: 13.7k, False: 0]
  ------------------
   75|  13.7k|        const int sgr_idx = lr->type - DAV1D_RESTORATION_SGRPROJ;
   76|  13.7k|        const uint16_t *const sgr_params = dav1d_sgr_params[sgr_idx];
   77|  13.7k|        params.sgr.s0 = sgr_params[0];
   78|  13.7k|        params.sgr.s1 = sgr_params[1];
   79|  13.7k|        params.sgr.w0 = lr->sgr_weights[0];
   80|  13.7k|        params.sgr.w1 = 128 - (lr->sgr_weights[0] + lr->sgr_weights[1]);
   81|       |
   82|  13.7k|        lr_fn = dsp->lr.sgr[!!sgr_params[0] + !!sgr_params[1] * 2 - 1];
   83|  13.7k|    }
   84|       |
   85|  28.2k|    while (y + stripe_h <= row_h) {
  ------------------
  |  Branch (85:12): [True: 28.2k, False: 0]
  ------------------
   86|       |        // Change the HAVE_BOTTOM bit in edges to (sby + 1 != f->sbh || y + stripe_h != row_h)
   87|  28.2k|        edges ^= (-(sby + 1 != f->sbh || y + stripe_h != row_h) ^ edges) & LR_HAVE_BOTTOM;
  ------------------
  |  Branch (87:21): [True: 13.2k, False: 14.9k]
  |  Branch (87:42): [True: 5.03k, False: 9.95k]
  ------------------
   88|  28.2k|        lr_fn(p, stride, left, lpf, unit_w, stripe_h, &params, edges HIGHBD_CALL_SUFFIX);
   89|       |
   90|  28.2k|        left += stripe_h;
   91|  28.2k|        y += stripe_h;
   92|  28.2k|        p += stripe_h * PXSTRIDE(stride);
  ------------------
  |  |   53|  28.2k|#define PXSTRIDE(x) (x)
  ------------------
   93|  28.2k|        edges |= LR_HAVE_TOP;
   94|  28.2k|        stripe_h = imin(64 >> ss_ver, row_h - y);
   95|  28.2k|        if (stripe_h == 0) break;
  ------------------
  |  Branch (95:13): [True: 18.2k, False: 10.0k]
  ------------------
   96|  10.0k|        lpf += 4 * PXSTRIDE(stride);
  ------------------
  |  |   53|  10.0k|#define PXSTRIDE(x) (x)
  ------------------
   97|  10.0k|    }
   98|  18.2k|}
dav1d_lr_sbrow_16bpc:
  170|  13.9k|{
  171|  13.9k|    const int offset_y = 8 * !!sby;
  172|  13.9k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|  13.9k|    const int restore_planes = f->lf.restore_planes;
  174|  13.9k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|  13.9k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 12.5k, False: 1.38k]
  ------------------
  177|  12.5k|        const int h = f->sr_cur.p.p.h;
  178|  12.5k|        const int w = f->sr_cur.p.p.w;
  179|  12.5k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|  12.5k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|  12.5k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|  12.5k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  183|  12.5k|                 h, row_h, 0);
  184|  12.5k|    }
  185|  13.9k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 3.85k, False: 10.1k]
  ------------------
  186|  3.85k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  3.85k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  3.85k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  3.85k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  3.85k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  3.85k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  3.85k|        const int offset_uv = offset_y >> ss_ver;
  193|  3.85k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  3.85k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 2.83k, False: 1.01k]
  ------------------
  195|  2.83k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  196|  2.83k|                     w, h, row_h, 1);
  197|       |
  198|  3.85k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 2.66k, False: 1.18k]
  ------------------
  199|  2.66k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  200|  2.66k|                     w, h, row_h, 2);
  201|  3.85k|    }
  202|  13.9k|}

dav1d_mc_dsp_init_8bpc:
  960|  3.47k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  3.47k|#define init_mc_fns(type, name) do { \
  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  3.47k|} while (0)
  967|       |
  968|  3.47k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  969|  3.47k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  970|  3.47k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  971|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  972|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  973|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  974|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  975|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  976|  3.47k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  977|  3.47k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  3.47k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.47k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.47k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.47k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.47k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.47k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.47k]
  |  |  ------------------
  ------------------
  978|       |
  979|  3.47k|    c->avg      = avg_c;
  980|  3.47k|    c->w_avg    = w_avg_c;
  981|  3.47k|    c->mask     = mask_c;
  982|  3.47k|    c->blend    = blend_c;
  983|  3.47k|    c->blend_v  = blend_v_c;
  984|  3.47k|    c->blend_h  = blend_h_c;
  985|  3.47k|    c->w_mask[0] = w_mask_444_c;
  986|  3.47k|    c->w_mask[1] = w_mask_422_c;
  987|  3.47k|    c->w_mask[2] = w_mask_420_c;
  988|  3.47k|    c->warp8x8  = warp_affine_8x8_c;
  989|  3.47k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  3.47k|    c->emu_edge = emu_edge_c;
  991|  3.47k|    c->resize   = resize_c;
  992|       |
  993|  3.47k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  3.47k|#endif
 1005|  3.47k|#endif
 1006|  3.47k|}
dav1d_mc_dsp_init_16bpc:
  960|  4.68k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  4.68k|#define init_mc_fns(type, name) do { \
  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  4.68k|} while (0)
  967|       |
  968|  4.68k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  969|  4.68k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  970|  4.68k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  971|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  972|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  973|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  974|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  975|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  976|  4.68k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  977|  4.68k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  4.68k|#define init_mc_fns(type, name) do { \
  |  |  962|  4.68k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  4.68k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  4.68k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  4.68k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  4.68k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 4.68k]
  |  |  ------------------
  ------------------
  978|       |
  979|  4.68k|    c->avg      = avg_c;
  980|  4.68k|    c->w_avg    = w_avg_c;
  981|  4.68k|    c->mask     = mask_c;
  982|  4.68k|    c->blend    = blend_c;
  983|  4.68k|    c->blend_v  = blend_v_c;
  984|  4.68k|    c->blend_h  = blend_h_c;
  985|  4.68k|    c->w_mask[0] = w_mask_444_c;
  986|  4.68k|    c->w_mask[1] = w_mask_422_c;
  987|  4.68k|    c->w_mask[2] = w_mask_420_c;
  988|  4.68k|    c->warp8x8  = warp_affine_8x8_c;
  989|  4.68k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  4.68k|    c->emu_edge = emu_edge_c;
  991|  4.68k|    c->resize   = resize_c;
  992|       |
  993|  4.68k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  4.68k|#endif
 1005|  4.68k|#endif
 1006|  4.68k|}

dav1d_mem_pool_push:
  224|   210k|void dav1d_mem_pool_push(Dav1dMemPool *const pool, void *const ptr) {
  225|   210k|    pthread_mutex_lock(&pool->lock);
  226|   210k|    Dav1dMemPoolBuffer *const buf = (Dav1dMemPoolBuffer*)((uintptr_t)ptr - 64);
  227|   210k|    const int ref_cnt = --pool->ref_cnt;
  228|   210k|    if (!pool->end) {
  ------------------
  |  Branch (228:9): [True: 209k, False: 960]
  ------------------
  229|   209k|        buf->next = pool->buf;
  230|   209k|        pool->buf = buf;
  231|   209k|        pthread_mutex_unlock(&pool->lock);
  232|   209k|        assert(ref_cnt > 0);
  ------------------
  |  Branch (232:9): [True: 209k, False: 0]
  ------------------
  233|   209k|    } else {
  234|    960|        pthread_mutex_unlock(&pool->lock);
  235|    960|        dav1d_free_aligned(buf);
  ------------------
  |  |  136|    960|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  236|    960|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (236:13): [True: 960, False: 0]
  ------------------
  237|    960|    }
  238|   210k|}
dav1d_mem_pool_pop:
  240|   210k|void *dav1d_mem_pool_pop(Dav1dMemPool *const pool, const size_t size) {
  241|   210k|    pthread_mutex_lock(&pool->lock);
  242|   210k|    Dav1dMemPoolBuffer *buf = pool->buf;
  243|   210k|    pool->ref_cnt++;
  244|       |
  245|   210k|    if (buf) {
  ------------------
  |  Branch (245:9): [True: 150k, False: 60.4k]
  ------------------
  246|   150k|        pool->buf = buf->next;
  247|   150k|        pthread_mutex_unlock(&pool->lock);
  248|   150k|        if (buf->size != size) {
  ------------------
  |  Branch (248:13): [True: 2.80k, False: 147k]
  ------------------
  249|       |            /* Reallocate if the size has changed */
  250|  2.80k|            dav1d_free_aligned(buf);
  ------------------
  |  |  136|  2.80k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  251|  2.80k|            goto alloc;
  252|  2.80k|        }
  253|       |#if TRACK_HEAP_ALLOCATIONS
  254|       |        dav1d_track_reuse(pool->type);
  255|       |#endif
  256|   150k|    } else {
  257|  60.4k|        pthread_mutex_unlock(&pool->lock);
  258|  63.3k|alloc:
  259|  63.3k|        buf = dav1d_alloc_aligned(pool->type, size + 64, 64);
  ------------------
  |  |  134|  63.3k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  260|  63.3k|        if (!buf) {
  ------------------
  |  Branch (260:13): [True: 0, False: 63.3k]
  ------------------
  261|      0|            pthread_mutex_lock(&pool->lock);
  262|      0|            const int ref_cnt = --pool->ref_cnt;
  263|      0|            pthread_mutex_unlock(&pool->lock);
  264|      0|            if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (264:17): [True: 0, False: 0]
  ------------------
  265|      0|            return NULL;
  266|      0|        }
  267|  63.3k|        buf->size = size;
  268|  63.3k|    }
  269|       |
  270|   210k|    return (void*)((uintptr_t)buf + 64);
  271|   210k|}
dav1d_mem_pool_init:
  275|  67.8k|{
  276|  67.8k|    Dav1dMemPool *const pool = dav1d_malloc(ALLOC_COMMON_CTX,
  ------------------
  |  |  132|  67.8k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  277|  67.8k|                                            sizeof(Dav1dMemPool));
  278|  67.8k|    if (pool) {
  ------------------
  |  Branch (278:9): [True: 67.8k, False: 0]
  ------------------
  279|  67.8k|        if (!pthread_mutex_init(&pool->lock, NULL)) {
  ------------------
  |  Branch (279:13): [True: 67.8k, False: 0]
  ------------------
  280|  67.8k|            pool->buf = NULL;
  281|  67.8k|            pool->ref_cnt = 1;
  282|  67.8k|            pool->end = 0;
  283|       |#if TRACK_HEAP_ALLOCATIONS
  284|       |            pool->type = type;
  285|       |#endif
  286|  67.8k|            *ppool = pool;
  287|  67.8k|            return 0;
  288|  67.8k|        }
  289|      0|        dav1d_free(pool);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  290|      0|    }
  291|      0|    *ppool = NULL;
  292|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  293|  67.8k|}
dav1d_mem_pool_end:
  295|  67.8k|COLD void dav1d_mem_pool_end(Dav1dMemPool *const pool) {
  296|  67.8k|    if (pool) {
  ------------------
  |  Branch (296:9): [True: 67.8k, False: 0]
  ------------------
  297|  67.8k|        pthread_mutex_lock(&pool->lock);
  298|  67.8k|        Dav1dMemPoolBuffer *buf = pool->buf;
  299|  67.8k|        const int ref_cnt = --pool->ref_cnt;
  300|  67.8k|        pool->buf = NULL;
  301|  67.8k|        pool->end = 1;
  302|  67.8k|        pthread_mutex_unlock(&pool->lock);
  303|       |
  304|   127k|        while (buf) {
  ------------------
  |  Branch (304:16): [True: 59.5k, False: 67.8k]
  ------------------
  305|  59.5k|            void *const ptr = buf;
  306|  59.5k|            buf = buf->next;
  307|  59.5k|            dav1d_free_aligned(ptr);
  ------------------
  |  |  136|  59.5k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  308|  59.5k|        }
  309|  67.8k|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (309:13): [True: 66.9k, False: 960]
  ------------------
  310|  67.8k|    }
  311|  67.8k|}
mem.c:mem_pool_destroy:
  219|  67.8k|static COLD void mem_pool_destroy(Dav1dMemPool *const pool) {
  220|  67.8k|    pthread_mutex_destroy(&pool->lock);
  221|  67.8k|    dav1d_free(pool);
  ------------------
  |  |  135|  67.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  222|  67.8k|}

lib.c:dav1d_alloc_aligned_internal:
   89|  29.0k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  29.0k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 29.0k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  29.0k|    void *ptr;
   95|  29.0k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 29.0k]
  ------------------
   96|  29.0k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  29.0k|}
lib.c:dav1d_free_aligned_internal:
  113|  77.5k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  77.5k|}
lib.c:dav1d_freep_aligned:
  144|  9.69k|static inline void dav1d_freep_aligned(void *ptr) {
  145|  9.69k|    void **mem = (void **) ptr;
  146|  9.69k|    if (*mem) {
  ------------------
  |  Branch (146:9): [True: 9.69k, False: 0]
  ------------------
  147|  9.69k|        dav1d_free_aligned(*mem);
  ------------------
  |  |  136|  9.69k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  148|       |        *mem = NULL;
  149|  9.69k|    }
  150|  9.69k|}
mem.c:dav1d_free_aligned_internal:
  113|  63.3k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  63.3k|}
mem.c:dav1d_alloc_aligned_internal:
   89|  63.3k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  63.3k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 63.3k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  63.3k|    void *ptr;
   95|  63.3k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 63.3k]
  ------------------
   96|  63.3k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  63.3k|}
ref.c:dav1d_alloc_aligned_internal:
   89|  75.1k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  75.1k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 75.1k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  75.1k|    void *ptr;
   95|  75.1k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 75.1k]
  ------------------
   96|  75.1k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  75.1k|}
ref.c:dav1d_free_aligned_internal:
  113|  75.1k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  75.1k|}
refmvs.c:dav1d_free_aligned_internal:
  113|  6.60k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  6.60k|}
refmvs.c:dav1d_alloc_aligned_internal:
   89|  6.60k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  6.60k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 6.60k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  6.60k|    void *ptr;
   95|  6.60k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 6.60k]
  ------------------
   96|  6.60k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  6.60k|}
decode.c:dav1d_free_aligned_internal:
  113|  38.1k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  38.1k|}
decode.c:dav1d_alloc_aligned_internal:
   89|  38.1k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  38.1k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 38.1k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  38.1k|    void *ptr;
   95|  38.1k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 38.1k]
  ------------------
   96|  38.1k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  38.1k|}

dav1d_msac_decode_subexp:
   62|   134k|{
   63|   134k|    assert(n >> k == 8);
  ------------------
  |  Branch (63:5): [True: 134k, False: 0]
  ------------------
   64|       |
   65|   134k|    unsigned a = 0;
   66|   134k|    if (dav1d_msac_decode_bool_equi(s)) {
  ------------------
  |  |   53|   134k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (66:9): [True: 73.7k, False: 60.6k]
  ------------------
   67|  73.7k|        if (dav1d_msac_decode_bool_equi(s))
  ------------------
  |  |   53|  73.7k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (67:13): [True: 44.0k, False: 29.7k]
  ------------------
   68|  44.0k|            k += dav1d_msac_decode_bool_equi(s) + 1;
  ------------------
  |  |   53|  44.0k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   69|  73.7k|        a = 1 << k;
   70|  73.7k|    }
   71|   134k|    const unsigned v = dav1d_msac_decode_bools(s, k) + a;
   72|   134k|    return ref * 2 <= n ? inv_recenter(ref, v) :
  ------------------
  |  Branch (72:12): [True: 76.5k, False: 57.9k]
  ------------------
   73|   134k|                          n - 1 - inv_recenter(n - 1 - ref, v);
   74|   134k|}
dav1d_msac_init:
  206|  45.9k|{
  207|  45.9k|    s->buf_pos = data;
  208|  45.9k|    s->buf_end = data + sz;
  209|  45.9k|    s->dif = 0;
  210|  45.9k|    s->rng = 0x8000;
  211|  45.9k|    s->cnt = -15;
  212|  45.9k|    s->allow_update_cdf = !disable_cdf_update_flag;
  213|  45.9k|    ctx_refill(s);
  214|       |
  215|  45.9k|#if ARCH_X86_64 && HAVE_ASM
  216|  45.9k|    s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt_c;
  217|       |
  218|  45.9k|    msac_init_x86(s);
  219|  45.9k|#endif
  220|  45.9k|}
msac.c:ctx_refill:
   41|  45.9k|static inline void ctx_refill(MsacContext *const s) {
   42|  45.9k|    const uint8_t *buf_pos = s->buf_pos;
   43|  45.9k|    const uint8_t *buf_end = s->buf_end;
   44|  45.9k|    int c = EC_WIN_SIZE - s->cnt - 24;
  ------------------
  |  |   39|  45.9k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   45|  45.9k|    ec_win dif = s->dif;
   46|   155k|    do {
   47|   155k|        if (buf_pos >= buf_end) {
  ------------------
  |  Branch (47:13): [True: 39.3k, False: 115k]
  ------------------
   48|       |            // set remaining bits to 1;
   49|  39.3k|            dif |= ~(~(ec_win)0xff << c);
   50|  39.3k|            break;
   51|  39.3k|        }
   52|   115k|        dif |= (ec_win)(*buf_pos++ ^ 0xff) << c;
   53|   115k|        c -= 8;
   54|   115k|    } while (c >= 0);
  ------------------
  |  Branch (54:14): [True: 109k, False: 6.58k]
  ------------------
   55|  45.9k|    s->dif = dif;
   56|  45.9k|    s->cnt = EC_WIN_SIZE - c - 24;
  ------------------
  |  |   39|  45.9k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   57|  45.9k|    s->buf_pos = buf_pos;
   58|  45.9k|}

decode.c:dav1d_msac_decode_bools:
   94|   539k|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|   539k|    unsigned v = 0;
   96|  1.36M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 820k, False: 539k]
  ------------------
   97|   820k|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|   820k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|   539k|    return v;
   99|   539k|}
decode.c:dav1d_msac_decode_uniform:
  101|  76.1k|static inline int dav1d_msac_decode_uniform(MsacContext *const s, const unsigned n) {
  102|  76.1k|    assert(n > 0);
  ------------------
  |  Branch (102:5): [True: 76.1k, False: 0]
  ------------------
  103|  76.1k|    const int l = ulog2(n) + 1;
  104|  76.1k|    assert(l > 1);
  ------------------
  |  Branch (104:5): [True: 76.1k, False: 0]
  ------------------
  105|  76.1k|    const unsigned m = (1 << l) - n;
  106|  76.1k|    const unsigned v = dav1d_msac_decode_bools(s, l - 1);
  107|  76.1k|    return v < m ? v : (v << 1) - m + dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  19.7k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (107:12): [True: 56.3k, False: 19.7k]
  ------------------
  108|  76.1k|}
msac.c:dav1d_msac_decode_bools:
   94|   134k|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|   134k|    unsigned v = 0;
   96|   593k|    while (n--)
  ------------------
  |  Branch (96:12): [True: 458k, False: 134k]
  ------------------
   97|   458k|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|   458k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|   134k|    return v;
   99|   134k|}
recon_tmpl.c:dav1d_msac_decode_bools:
   94|  3.01M|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|  3.01M|    unsigned v = 0;
   96|  11.6M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 8.59M, False: 3.01M]
  ------------------
   97|  8.59M|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  8.59M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|  3.01M|    return v;
   99|  3.01M|}

dav1d_parse_sequence_header:
  304|  12.7k|{
  305|  12.7k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  12.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 12.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  306|  12.7k|    validate_input_or_ret(ptr != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  12.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 12.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  307|  12.7k|    validate_input_or_ret(sz > 0 && sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  25.5k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 12.7k, False: 0]
  |  |  |  Branch (52:11): [True: 12.7k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  308|       |
  309|  12.7k|    GetBits gb;
  310|  12.7k|    dav1d_init_get_bits(&gb, ptr, sz);
  311|  12.7k|    int res = DAV1D_ERR(ENOENT);
  ------------------
  |  |   58|  12.7k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  312|       |
  313|  26.9k|    do {
  314|  26.9k|        dav1d_get_bit(&gb); // obu_forbidden_bit
  315|  26.9k|        const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
  316|  26.9k|        const int has_extension = dav1d_get_bit(&gb);
  317|  26.9k|        const int has_length_field = dav1d_get_bit(&gb);
  318|  26.9k|        dav1d_get_bits(&gb, 1 + 8 * has_extension); // ignore
  319|       |
  320|  26.9k|        const uint8_t *obu_end = gb.ptr_end;
  321|  26.9k|        if (has_length_field) {
  ------------------
  |  Branch (321:13): [True: 16.2k, False: 10.6k]
  ------------------
  322|  16.2k|            const size_t len = dav1d_get_uleb128(&gb);
  323|  16.2k|            if (len > (size_t)(obu_end - gb.ptr)) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    248|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (323:17): [True: 248, False: 16.0k]
  ------------------
  324|  16.0k|            obu_end = gb.ptr + len;
  325|  16.0k|        }
  326|       |
  327|  26.6k|        if (type == DAV1D_OBU_SEQ_HDR) {
  ------------------
  |  Branch (327:13): [True: 11.9k, False: 14.7k]
  ------------------
  328|  11.9k|            if ((res = parse_seq_hdr(out, &gb, 0)) < 0) return res;
  ------------------
  |  Branch (328:17): [True: 2.09k, False: 9.84k]
  ------------------
  329|  9.84k|            if (gb.ptr > obu_end) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|     88|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (329:17): [True: 88, False: 9.75k]
  ------------------
  330|  9.75k|            dav1d_bytealign_get_bits(&gb);
  331|  9.75k|        }
  332|       |
  333|  24.4k|        if (gb.error) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    713|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (333:13): [True: 713, False: 23.7k]
  ------------------
  334|  24.4k|        assert(gb.state == 0 && gb.bits_left == 0);
  ------------------
  |  Branch (334:9): [True: 23.7k, False: 0]
  |  Branch (334:9): [True: 23.7k, False: 0]
  ------------------
  335|  23.7k|        gb.ptr = obu_end;
  336|  23.7k|    } while (gb.ptr < gb.ptr_end);
  ------------------
  |  Branch (336:14): [True: 14.1k, False: 9.62k]
  ------------------
  337|       |
  338|  9.62k|    return res;
  339|  12.7k|}
dav1d_parse_obus:
 1169|   107k|ptrdiff_t dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
 1170|   107k|    GetBits gb;
 1171|   107k|    int res;
 1172|       |
 1173|   107k|    dav1d_init_get_bits(&gb, in->data, in->sz);
 1174|       |
 1175|       |    // obu header
 1176|   107k|    const int obu_forbidden_bit = dav1d_get_bit(&gb);
 1177|   107k|    if (c->strict_std_compliance && obu_forbidden_bit) goto error;
  ------------------
  |  Branch (1177:9): [True: 0, False: 107k]
  |  Branch (1177:37): [True: 0, False: 0]
  ------------------
 1178|   107k|    const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
 1179|   107k|    const int has_extension = dav1d_get_bit(&gb);
 1180|   107k|    const int has_length_field = dav1d_get_bit(&gb);
 1181|   107k|    dav1d_get_bit(&gb); // reserved
 1182|       |
 1183|   107k|    int temporal_id = 0, spatial_id = 0;
 1184|   107k|    if (has_extension) {
  ------------------
  |  Branch (1184:9): [True: 4.92k, False: 102k]
  ------------------
 1185|  4.92k|        temporal_id = dav1d_get_bits(&gb, 3);
 1186|  4.92k|        spatial_id = dav1d_get_bits(&gb, 2);
 1187|  4.92k|        dav1d_get_bits(&gb, 3); // reserved
 1188|  4.92k|    }
 1189|       |
 1190|   107k|    if (has_length_field) {
  ------------------
  |  Branch (1190:9): [True: 38.9k, False: 68.7k]
  ------------------
 1191|  38.9k|        const size_t len = dav1d_get_uleb128(&gb);
 1192|  38.9k|        if (len > (size_t)(gb.ptr_end - gb.ptr)) goto error;
  ------------------
  |  Branch (1192:13): [True: 927, False: 38.0k]
  ------------------
 1193|  38.0k|        gb.ptr_end = gb.ptr + len;
 1194|  38.0k|    }
 1195|   106k|    if (gb.error) goto error;
  ------------------
  |  Branch (1195:9): [True: 333, False: 106k]
  ------------------
 1196|       |
 1197|       |    // We must have read a whole number of bytes at this point (1 byte
 1198|       |    // for the header and whole bytes at a time when reading the
 1199|       |    // leb128 length field).
 1200|   106k|    assert(gb.bits_left == 0);
  ------------------
  |  Branch (1200:5): [True: 106k, False: 0]
  ------------------
 1201|       |
 1202|       |    // skip obu not belonging to the selected temporal/spatial layer
 1203|   106k|    if (type != DAV1D_OBU_SEQ_HDR && type != DAV1D_OBU_TD &&
  ------------------
  |  Branch (1203:9): [True: 86.3k, False: 20.0k]
  |  Branch (1203:38): [True: 82.9k, False: 3.41k]
  ------------------
 1204|  82.9k|        has_extension && c->operating_point_idc != 0)
  ------------------
  |  Branch (1204:9): [True: 4.14k, False: 78.8k]
  |  Branch (1204:26): [True: 1.08k, False: 3.06k]
  ------------------
 1205|  1.08k|    {
 1206|  1.08k|        const int in_temporal_layer = (c->operating_point_idc >> temporal_id) & 1;
 1207|  1.08k|        const int in_spatial_layer = (c->operating_point_idc >> (spatial_id + 8)) & 1;
 1208|  1.08k|        if (!in_temporal_layer || !in_spatial_layer)
  ------------------
  |  Branch (1208:13): [True: 309, False: 771]
  |  Branch (1208:35): [True: 229, False: 542]
  ------------------
 1209|    538|            return gb.ptr_end - gb.ptr_start;
 1210|  1.08k|    }
 1211|       |
 1212|   105k|    switch (type) {
 1213|  20.0k|    case DAV1D_OBU_SEQ_HDR: {
  ------------------
  |  Branch (1213:5): [True: 20.0k, False: 85.8k]
  ------------------
 1214|  20.0k|        Dav1dRef *ref = dav1d_ref_create_using_pool(c->seq_hdr_pool,
 1215|  20.0k|                                                    sizeof(Dav1dSequenceHeader));
 1216|  20.0k|        if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1216:13): [True: 0, False: 20.0k]
  ------------------
 1217|  20.0k|        Dav1dSequenceHeader *seq_hdr = ref->data;
 1218|  20.0k|        if ((res = parse_seq_hdr(seq_hdr, &gb, c->strict_std_compliance)) < 0) {
  ------------------
  |  Branch (1218:13): [True: 1.45k, False: 18.6k]
  ------------------
 1219|  1.45k|            dav1d_log(c, "Error parsing sequence header\n");
  ------------------
  |  |   44|  1.45k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 1.45k]
  |  |  ------------------
  ------------------
 1220|  1.45k|            dav1d_ref_dec(&ref);
 1221|  1.45k|            goto error;
 1222|  1.45k|        }
 1223|       |
 1224|  18.6k|        const int op_idx =
 1225|  18.6k|            c->operating_point < seq_hdr->num_operating_points ? c->operating_point : 0;
  ------------------
  |  Branch (1225:13): [True: 18.6k, False: 0]
  ------------------
 1226|  18.6k|        c->operating_point_idc = seq_hdr->operating_points[op_idx].idc;
 1227|  18.6k|        const unsigned spatial_mask = c->operating_point_idc >> 8;
 1228|  18.6k|        c->max_spatial_id = spatial_mask ? ulog2(spatial_mask) : 0;
  ------------------
  |  Branch (1228:29): [True: 5.23k, False: 13.3k]
  ------------------
 1229|       |
 1230|       |        // If we have read a sequence header which is different from
 1231|       |        // the old one, this is a new video sequence and can't use any
 1232|       |        // previous state. Free that state.
 1233|       |
 1234|  18.6k|        if (!c->seq_hdr) {
  ------------------
  |  Branch (1234:13): [True: 9.32k, False: 9.29k]
  ------------------
 1235|  9.32k|            c->frame_hdr = NULL;
 1236|  9.32k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1237|       |        // see 7.5, operating_parameter_info is allowed to change in
 1238|       |        // sequence headers of a single sequence
 1239|  9.32k|        } else if (memcmp(seq_hdr, c->seq_hdr, offsetof(Dav1dSequenceHeader, operating_parameter_info))) {
  ------------------
  |  Branch (1239:20): [True: 4.11k, False: 5.17k]
  ------------------
 1240|  4.11k|            c->frame_hdr = NULL;
 1241|  4.11k|            c->mastering_display = NULL;
 1242|  4.11k|            c->content_light = NULL;
 1243|  4.11k|            dav1d_ref_dec(&c->mastering_display_ref);
 1244|  4.11k|            dav1d_ref_dec(&c->content_light_ref);
 1245|  37.0k|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1245:29): [True: 32.9k, False: 4.11k]
  ------------------
 1246|  32.9k|                if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1246:21): [True: 1.45k, False: 31.4k]
  ------------------
 1247|  1.45k|                    dav1d_thread_picture_unref(&c->refs[i].p);
 1248|  32.9k|                dav1d_ref_dec(&c->refs[i].segmap);
 1249|  32.9k|                dav1d_ref_dec(&c->refs[i].refmvs);
 1250|  32.9k|                dav1d_cdf_thread_unref(&c->cdf[i]);
 1251|  32.9k|            }
 1252|  4.11k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1253|       |        // If operating_parameter_info changed, signal it
 1254|  5.17k|        } else if (memcmp(seq_hdr->operating_parameter_info, c->seq_hdr->operating_parameter_info,
  ------------------
  |  Branch (1254:20): [True: 250, False: 4.92k]
  ------------------
 1255|  5.17k|                          sizeof(seq_hdr->operating_parameter_info)))
 1256|    250|        {
 1257|    250|            c->frame_flags |= PICTURE_FLAG_NEW_OP_PARAMS_INFO;
 1258|    250|        }
 1259|  18.6k|        dav1d_ref_dec(&c->seq_hdr_ref);
 1260|  18.6k|        c->seq_hdr_ref = ref;
 1261|  18.6k|        c->seq_hdr = seq_hdr;
 1262|  18.6k|        break;
 1263|  20.0k|    }
 1264|  1.39k|    case DAV1D_OBU_REDUNDANT_FRAME_HDR:
  ------------------
  |  Branch (1264:5): [True: 1.39k, False: 104k]
  ------------------
 1265|  1.39k|        if (c->frame_hdr) break;
  ------------------
  |  Branch (1265:13): [True: 615, False: 777]
  ------------------
 1266|       |        // fall-through
 1267|  57.4k|    case DAV1D_OBU_FRAME:
  ------------------
  |  Branch (1267:5): [True: 56.6k, False: 49.2k]
  ------------------
 1268|  73.4k|    case DAV1D_OBU_FRAME_HDR:
  ------------------
  |  Branch (1268:5): [True: 16.0k, False: 89.9k]
  ------------------
 1269|  73.4k|        if (!c->seq_hdr) goto error;
  ------------------
  |  Branch (1269:13): [True: 207, False: 73.2k]
  ------------------
 1270|  73.2k|        if (!c->frame_hdr_ref) {
  ------------------
  |  Branch (1270:13): [True: 45.7k, False: 27.5k]
  ------------------
 1271|  45.7k|            c->frame_hdr_ref = dav1d_ref_create_using_pool(c->frame_hdr_pool,
 1272|  45.7k|                                                           sizeof(Dav1dFrameHeader));
 1273|  45.7k|            if (!c->frame_hdr_ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1273:17): [True: 0, False: 45.7k]
  ------------------
 1274|  45.7k|        }
 1275|  73.2k|#ifndef NDEBUG
 1276|       |        // ensure that the reference is writable
 1277|  73.2k|        assert(dav1d_ref_is_writable(c->frame_hdr_ref));
  ------------------
  |  Branch (1277:9): [True: 73.2k, False: 0]
  ------------------
 1278|  73.2k|#endif
 1279|  73.2k|        c->frame_hdr = c->frame_hdr_ref->data;
 1280|  73.2k|        memset(c->frame_hdr, 0, sizeof(*c->frame_hdr));
 1281|  73.2k|        c->frame_hdr->temporal_id = temporal_id;
 1282|  73.2k|        c->frame_hdr->spatial_id = spatial_id;
 1283|  73.2k|        if ((res = parse_frame_hdr(c, &gb)) < 0) {
  ------------------
  |  Branch (1283:13): [True: 5.41k, False: 67.8k]
  ------------------
 1284|  5.41k|            c->frame_hdr = NULL;
 1285|  5.41k|            goto error;
 1286|  5.41k|        }
 1287|  71.5k|        for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (1287:25): [True: 3.69k, False: 67.8k]
  ------------------
 1288|  3.69k|            dav1d_data_unref_internal(&c->tile[n].data);
 1289|  67.8k|        c->n_tile_data = 0;
 1290|  67.8k|        c->n_tiles = 0;
 1291|  67.8k|        if (type != DAV1D_OBU_FRAME) {
  ------------------
  |  Branch (1291:13): [True: 15.1k, False: 52.7k]
  ------------------
 1292|       |            // This is actually a frame header OBU so read the
 1293|       |            // trailing bit and check for overrun.
 1294|  15.1k|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1294:17): [True: 6.38k, False: 8.71k]
  ------------------
 1295|  6.38k|                c->frame_hdr = NULL;
 1296|  6.38k|                goto error;
 1297|  6.38k|            }
 1298|  15.1k|        }
 1299|       |
 1300|  61.4k|        if (c->frame_size_limit && (int64_t)c->frame_hdr->width[1] *
  ------------------
  |  Branch (1300:13): [True: 61.4k, False: 0]
  |  Branch (1300:36): [True: 357, False: 61.0k]
  ------------------
 1301|  61.4k|            c->frame_hdr->height > c->frame_size_limit)
 1302|    357|        {
 1303|    357|            dav1d_log(c, "Frame size %dx%d exceeds limit %u\n", c->frame_hdr->width[1],
  ------------------
  |  |   44|    357|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 357]
  |  |  ------------------
  ------------------
 1304|    357|                      c->frame_hdr->height, c->frame_size_limit);
 1305|    357|            c->frame_hdr = NULL;
 1306|    357|            return DAV1D_ERR(ERANGE);
  ------------------
  |  |   58|    357|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1307|    357|        }
 1308|       |
 1309|  61.0k|        if (type != DAV1D_OBU_FRAME)
  ------------------
  |  Branch (1309:13): [True: 8.71k, False: 52.3k]
  ------------------
 1310|  8.71k|            break;
 1311|       |        // OBU_FRAMEs shouldn't be signaled with show_existing_frame
 1312|  52.3k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1312:13): [True: 214, False: 52.1k]
  ------------------
 1313|    214|            c->frame_hdr = NULL;
 1314|    214|            goto error;
 1315|    214|        }
 1316|       |
 1317|       |        // This is the frame header at the start of a frame OBU.
 1318|       |        // There's no trailing bit at the end to skip, but we do need
 1319|       |        // to align to the next byte.
 1320|  52.1k|        dav1d_bytealign_get_bits(&gb);
 1321|       |        // fall-through
 1322|  54.3k|    case DAV1D_OBU_TILE_GRP: {
  ------------------
  |  Branch (1322:5): [True: 2.14k, False: 103k]
  ------------------
 1323|  54.3k|        if (!c->frame_hdr) goto error;
  ------------------
  |  Branch (1323:13): [True: 483, False: 53.8k]
  ------------------
 1324|  53.8k|        if (c->n_tile_data_alloc < c->n_tile_data + 1) {
  ------------------
  |  Branch (1324:13): [True: 8.60k, False: 45.2k]
  ------------------
 1325|  8.60k|            if ((c->n_tile_data + 1) > INT_MAX / (int)sizeof(*c->tile)) goto error;
  ------------------
  |  Branch (1325:17): [True: 0, False: 8.60k]
  ------------------
 1326|  8.60k|            struct Dav1dTileGroup *tile = dav1d_realloc(ALLOC_TILE, c->tile,
  ------------------
  |  |  133|  8.60k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1327|  8.60k|                                                        (c->n_tile_data + 1) * sizeof(*c->tile));
 1328|  8.60k|            if (!tile) goto error;
  ------------------
  |  Branch (1328:17): [True: 0, False: 8.60k]
  ------------------
 1329|  8.60k|            c->tile = tile;
 1330|  8.60k|            memset(c->tile + c->n_tile_data, 0, sizeof(*c->tile));
 1331|  8.60k|            c->n_tile_data_alloc = c->n_tile_data + 1;
 1332|  8.60k|        }
 1333|  53.8k|        parse_tile_hdr(c, &gb);
 1334|       |        // Align to the next byte boundary and check for overrun.
 1335|  53.8k|        dav1d_bytealign_get_bits(&gb);
 1336|  53.8k|        if (gb.error) goto error;
  ------------------
  |  Branch (1336:13): [True: 7.56k, False: 46.2k]
  ------------------
 1337|       |
 1338|  46.2k|        dav1d_data_ref(&c->tile[c->n_tile_data].data, in);
 1339|  46.2k|        c->tile[c->n_tile_data].data.data = gb.ptr;
 1340|  46.2k|        c->tile[c->n_tile_data].data.sz = (size_t)(gb.ptr_end - gb.ptr);
 1341|       |        // ensure tile groups are in order and sane, see 6.10.1
 1342|  46.2k|        if (c->tile[c->n_tile_data].start > c->tile[c->n_tile_data].end ||
  ------------------
  |  Branch (1342:13): [True: 253, False: 46.0k]
  ------------------
 1343|  46.0k|            c->tile[c->n_tile_data].start != c->n_tiles)
  ------------------
  |  Branch (1343:13): [True: 434, False: 45.5k]
  ------------------
 1344|    687|        {
 1345|  1.65k|            for (int i = 0; i <= c->n_tile_data; i++)
  ------------------
  |  Branch (1345:29): [True: 965, False: 687]
  ------------------
 1346|    965|                dav1d_data_unref_internal(&c->tile[i].data);
 1347|    687|            c->n_tile_data = 0;
 1348|    687|            c->n_tiles = 0;
 1349|    687|            goto error;
 1350|    687|        }
 1351|  45.5k|        c->n_tiles += 1 + c->tile[c->n_tile_data].end -
 1352|  45.5k|                          c->tile[c->n_tile_data].start;
 1353|  45.5k|        c->n_tile_data++;
 1354|  45.5k|        break;
 1355|  46.2k|    }
 1356|  3.31k|    case DAV1D_OBU_METADATA: {
  ------------------
  |  Branch (1356:5): [True: 3.31k, False: 102k]
  ------------------
 1357|  3.31k|#define DEBUG_OBU_METADATA 0
 1358|       |#if DEBUG_OBU_METADATA
 1359|       |        const uint8_t *const init_ptr = gb.ptr;
 1360|       |#endif
 1361|       |        // obu metadta type field
 1362|  3.31k|        const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
 1363|  3.31k|        if (gb.error) goto error;
  ------------------
  |  Branch (1363:13): [True: 244, False: 3.07k]
  ------------------
 1364|       |
 1365|  3.07k|        switch (meta_type) {
 1366|    590|        case OBU_META_HDR_CLL: {
  ------------------
  |  Branch (1366:9): [True: 590, False: 2.48k]
  ------------------
 1367|    590|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    590|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1368|    590|                                             sizeof(Dav1dContentLightLevel));
 1369|    590|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1369:17): [True: 0, False: 590]
  ------------------
 1370|    590|            Dav1dContentLightLevel *const content_light = ref->data;
 1371|       |
 1372|    590|            content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
 1373|       |#if DEBUG_OBU_METADATA
 1374|       |            printf("CLLOBU: max-content-light-level: %d [off=%td]\n",
 1375|       |                   content_light->max_content_light_level,
 1376|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1377|       |#endif
 1378|    590|            content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
 1379|       |#if DEBUG_OBU_METADATA
 1380|       |            printf("CLLOBU: max-frame-average-light-level: %d [off=%td]\n",
 1381|       |                   content_light->max_frame_average_light_level,
 1382|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1383|       |#endif
 1384|       |
 1385|    590|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1385:17): [True: 286, False: 304]
  ------------------
 1386|    286|                dav1d_ref_dec(&ref);
 1387|    286|                goto error;
 1388|    286|            }
 1389|       |
 1390|    304|            dav1d_ref_dec(&c->content_light_ref);
 1391|    304|            c->content_light = content_light;
 1392|    304|            c->content_light_ref = ref;
 1393|    304|            break;
 1394|    590|        }
 1395|    565|        case OBU_META_HDR_MDCV: {
  ------------------
  |  Branch (1395:9): [True: 565, False: 2.50k]
  ------------------
 1396|    565|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    565|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1397|    565|                                             sizeof(Dav1dMasteringDisplay));
 1398|    565|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1398:17): [True: 0, False: 565]
  ------------------
 1399|    565|            Dav1dMasteringDisplay *const mastering_display = ref->data;
 1400|       |
 1401|  2.26k|            for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1401:29): [True: 1.69k, False: 565]
  ------------------
 1402|  1.69k|                mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
 1403|  1.69k|                mastering_display->primaries[i][1] = dav1d_get_bits(&gb, 16);
 1404|       |#if DEBUG_OBU_METADATA
 1405|       |                printf("MDCVOBU: primaries[%d]: (%d, %d) [off=%td]\n", i,
 1406|       |                       mastering_display->primaries[i][0],
 1407|       |                       mastering_display->primaries[i][1],
 1408|       |                       (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1409|       |#endif
 1410|  1.69k|            }
 1411|    565|            mastering_display->white_point[0] = dav1d_get_bits(&gb, 16);
 1412|       |#if DEBUG_OBU_METADATA
 1413|       |            printf("MDCVOBU: white-point-x: %d [off=%td]\n",
 1414|       |                   mastering_display->white_point[0],
 1415|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1416|       |#endif
 1417|    565|            mastering_display->white_point[1] = dav1d_get_bits(&gb, 16);
 1418|       |#if DEBUG_OBU_METADATA
 1419|       |            printf("MDCVOBU: white-point-y: %d [off=%td]\n",
 1420|       |                   mastering_display->white_point[1],
 1421|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1422|       |#endif
 1423|    565|            mastering_display->max_luminance = dav1d_get_bits(&gb, 32);
 1424|       |#if DEBUG_OBU_METADATA
 1425|       |            printf("MDCVOBU: max-luminance: %d [off=%td]\n",
 1426|       |                   mastering_display->max_luminance,
 1427|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1428|       |#endif
 1429|    565|            mastering_display->min_luminance = dav1d_get_bits(&gb, 32);
 1430|       |#if DEBUG_OBU_METADATA
 1431|       |            printf("MDCVOBU: min-luminance: %d [off=%td]\n",
 1432|       |                   mastering_display->min_luminance,
 1433|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1434|       |#endif
 1435|    565|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1435:17): [True: 241, False: 324]
  ------------------
 1436|    241|                dav1d_ref_dec(&ref);
 1437|    241|                goto error;
 1438|    241|            }
 1439|       |
 1440|    324|            dav1d_ref_dec(&c->mastering_display_ref);
 1441|    324|            c->mastering_display = mastering_display;
 1442|    324|            c->mastering_display_ref = ref;
 1443|    324|            break;
 1444|    565|        }
 1445|  1.50k|        case OBU_META_ITUT_T35: {
  ------------------
  |  Branch (1445:9): [True: 1.50k, False: 1.56k]
  ------------------
 1446|  1.50k|            ptrdiff_t payload_size = gb.ptr_end - gb.ptr;
 1447|       |            // Don't take into account all the trailing bits for payload_size
 1448|  1.73k|            while (payload_size > 0 && !gb.ptr[payload_size - 1])
  ------------------
  |  Branch (1448:20): [True: 1.47k, False: 254]
  |  Branch (1448:40): [True: 228, False: 1.25k]
  ------------------
 1449|    228|                payload_size--; // trailing_zero_bit x 8
 1450|  1.50k|            payload_size--; // trailing_one_bit + trailing_zero_bit x 7
 1451|       |
 1452|  1.50k|            int country_code_extension_byte = 0;
 1453|  1.50k|            const int country_code = dav1d_get_bits(&gb, 8);
 1454|  1.50k|            payload_size--;
 1455|  1.50k|            if (country_code == 0xFF) {
  ------------------
  |  Branch (1455:17): [True: 423, False: 1.08k]
  ------------------
 1456|    423|                country_code_extension_byte = dav1d_get_bits(&gb, 8);
 1457|    423|                payload_size--;
 1458|    423|            }
 1459|       |
 1460|  1.50k|            if (payload_size <= 0 || gb.ptr[payload_size] != 0x80) {
  ------------------
  |  Branch (1460:17): [True: 258, False: 1.24k]
  |  Branch (1460:38): [True: 127, False: 1.12k]
  ------------------
 1461|    385|                dav1d_log(c, "Malformed ITU-T T.35 metadata message format\n");
  ------------------
  |  |   44|    385|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 385]
  |  |  ------------------
  ------------------
 1462|    385|                break;
 1463|    385|            }
 1464|       |
 1465|  1.12k|            if ((c->n_itut_t35 + 1) > INT_MAX / (int)sizeof(*c->itut_t35)) goto error;
  ------------------
  |  Branch (1465:17): [True: 0, False: 1.12k]
  ------------------
 1466|  1.12k|            struct Dav1dITUTT35 *itut_t35 = dav1d_realloc(ALLOC_OBU_META, c->itut_t35,
  ------------------
  |  |  133|  1.12k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1467|  1.12k|                                                          (c->n_itut_t35 + 1) * sizeof(*c->itut_t35));
 1468|  1.12k|            if (!itut_t35) goto error;
  ------------------
  |  Branch (1468:17): [True: 0, False: 1.12k]
  ------------------
 1469|  1.12k|            c->itut_t35 = itut_t35;
 1470|  1.12k|            memset(c->itut_t35 + c->n_itut_t35, 0, sizeof(*c->itut_t35));
 1471|       |
 1472|  1.12k|            struct itut_t35_ctx_context *itut_t35_ctx;
 1473|  1.12k|            if (!c->n_itut_t35) {
  ------------------
  |  Branch (1473:17): [True: 575, False: 545]
  ------------------
 1474|    575|                assert(!c->itut_t35_ref);
  ------------------
  |  Branch (1474:17): [True: 575, False: 0]
  ------------------
 1475|    575|                itut_t35_ctx = dav1d_malloc(ALLOC_OBU_META, sizeof(struct itut_t35_ctx_context));
  ------------------
  |  |  132|    575|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1476|    575|                if (!itut_t35_ctx) goto error;
  ------------------
  |  Branch (1476:21): [True: 0, False: 575]
  ------------------
 1477|    575|                c->itut_t35_ref = dav1d_ref_init(&itut_t35_ctx->ref, c->itut_t35,
 1478|    575|                                                 dav1d_picture_free_itut_t35, itut_t35_ctx, 0);
 1479|    575|            } else {
 1480|    545|                assert(c->itut_t35_ref && atomic_load(&c->itut_t35_ref->ref_cnt) == 1);
  ------------------
  |  Branch (1480:17): [True: 545, False: 0]
  |  Branch (1480:17): [True: 545, False: 0]
  ------------------
 1481|    545|                itut_t35_ctx = c->itut_t35_ref->user_data;
 1482|    545|                c->itut_t35_ref->const_data = (uint8_t *)c->itut_t35;
 1483|    545|            }
 1484|  1.12k|            itut_t35_ctx->itut_t35 = c->itut_t35;
 1485|  1.12k|            itut_t35_ctx->n_itut_t35 = c->n_itut_t35 + 1;
 1486|       |
 1487|  1.12k|            Dav1dITUTT35 *const itut_t35_metadata = &c->itut_t35[c->n_itut_t35];
 1488|  1.12k|            itut_t35_metadata->payload = dav1d_malloc(ALLOC_OBU_META, payload_size);
  ------------------
  |  |  132|  1.12k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1489|  1.12k|            if (!itut_t35_metadata->payload) goto error;
  ------------------
  |  Branch (1489:17): [True: 0, False: 1.12k]
  ------------------
 1490|       |
 1491|  1.12k|            itut_t35_metadata->country_code = country_code;
 1492|  1.12k|            itut_t35_metadata->country_code_extension_byte = country_code_extension_byte;
 1493|  1.12k|            itut_t35_metadata->payload_size = payload_size;
 1494|       |
 1495|       |            // We know that we've read a whole number of bytes and that the
 1496|       |            // payload is within the OBU boundaries, so just use memcpy()
 1497|  1.12k|            assert(gb.bits_left == 0);
  ------------------
  |  Branch (1497:13): [True: 1.12k, False: 0]
  ------------------
 1498|  1.12k|            memcpy(itut_t35_metadata->payload, gb.ptr, payload_size);
 1499|       |
 1500|  1.12k|            c->n_itut_t35++;
 1501|  1.12k|            break;
 1502|  1.12k|        }
 1503|      0|        case OBU_META_SCALABILITY:
  ------------------
  |  Branch (1503:9): [True: 0, False: 3.07k]
  ------------------
 1504|      1|        case OBU_META_TIMECODE:
  ------------------
  |  Branch (1504:9): [True: 1, False: 3.07k]
  ------------------
 1505|       |            // ignore metadata OBUs we don't care about
 1506|      1|            break;
 1507|    412|        default:
  ------------------
  |  Branch (1507:9): [True: 412, False: 2.66k]
  ------------------
 1508|       |            // print a warning but don't fail for unknown types
 1509|    412|            if (meta_type > 31) // Types 6 to 31 are "Unregistered user private", so ignore them.
  ------------------
  |  Branch (1509:17): [True: 210, False: 202]
  ------------------
 1510|    210|                dav1d_log(c, "Unknown Metadata OBU type %d\n", meta_type);
  ------------------
  |  |   44|    210|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 210]
  |  |  ------------------
  ------------------
 1511|    412|            break;
 1512|  3.07k|        }
 1513|       |
 1514|  2.54k|        break;
 1515|  3.07k|    }
 1516|  3.41k|    case DAV1D_OBU_TD:
  ------------------
  |  Branch (1516:5): [True: 3.41k, False: 102k]
  ------------------
 1517|  3.41k|        c->frame_flags |= PICTURE_FLAG_NEW_TEMPORAL_UNIT;
 1518|  3.41k|        break;
 1519|    260|    case DAV1D_OBU_PADDING:
  ------------------
  |  Branch (1519:5): [True: 260, False: 105k]
  ------------------
 1520|       |        // ignore OBUs we don't care about
 1521|    260|        break;
 1522|  2.63k|    default:
  ------------------
  |  Branch (1522:5): [True: 2.63k, False: 103k]
  ------------------
 1523|       |        // print a warning but don't fail for unknown types
 1524|  2.63k|        dav1d_log(c, "Unknown OBU type %d of size %td\n", type, gb.ptr_end - gb.ptr);
  ------------------
  |  |   44|  2.63k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 2.63k]
  |  |  ------------------
  ------------------
 1525|  2.63k|        break;
 1526|   105k|    }
 1527|       |
 1528|  82.3k|    if (c->seq_hdr && c->frame_hdr) {
  ------------------
  |  Branch (1528:9): [True: 81.3k, False: 979]
  |  Branch (1528:23): [True: 56.4k, False: 24.9k]
  ------------------
 1529|  56.4k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1529:13): [True: 6.03k, False: 50.3k]
  ------------------
 1530|  6.03k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1530:17): [True: 492, False: 5.54k]
  ------------------
 1531|  5.54k|            switch (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type) {
 1532|    217|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1532:13): [True: 217, False: 5.32k]
  ------------------
 1533|    441|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1533:13): [True: 224, False: 5.31k]
  ------------------
 1534|    441|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE)
  ------------------
  |  Branch (1534:21): [True: 0, False: 441]
  ------------------
 1535|      0|                    goto skip;
 1536|    441|                break;
 1537|    441|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1537:13): [True: 209, False: 5.33k]
  ------------------
 1538|    209|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA)
  ------------------
  |  Branch (1538:21): [True: 0, False: 209]
  ------------------
 1539|      0|                    goto skip;
 1540|       |                // fall-through
 1541|  5.09k|            default:
  ------------------
  |  Branch (1541:13): [True: 4.89k, False: 650]
  ------------------
 1542|  5.09k|                break;
 1543|  5.54k|            }
 1544|  5.54k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.data[0]) goto error;
  ------------------
  |  Branch (1544:17): [True: 0, False: 5.54k]
  ------------------
 1545|  5.54k|            if (c->strict_std_compliance &&
  ------------------
  |  Branch (1545:17): [True: 0, False: 5.54k]
  ------------------
 1546|      0|                !c->refs[c->frame_hdr->existing_frame_idx].p.showable)
  ------------------
  |  Branch (1546:17): [True: 0, False: 0]
  ------------------
 1547|      0|            {
 1548|      0|                goto error;
 1549|      0|            }
 1550|  5.54k|            if (c->n_fc == 1) {
  ------------------
  |  Branch (1550:17): [True: 5.54k, False: 0]
  ------------------
 1551|  5.54k|                dav1d_thread_picture_ref(&c->out,
 1552|  5.54k|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1553|  5.54k|                dav1d_picture_copy_props(&c->out.p,
 1554|  5.54k|                                         c->content_light, c->content_light_ref,
 1555|  5.54k|                                         c->mastering_display, c->mastering_display_ref,
 1556|  5.54k|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1557|  5.54k|                                         &in->m);
 1558|       |                // Must be removed from the context after being attached to the frame
 1559|  5.54k|                dav1d_ref_dec(&c->itut_t35_ref);
 1560|  5.54k|                c->itut_t35 = NULL;
 1561|  5.54k|                c->n_itut_t35 = 0;
 1562|  5.54k|                c->event_flags |= dav1d_picture_get_event_flags(&c->refs[c->frame_hdr->existing_frame_idx].p);
 1563|  5.54k|            } else {
 1564|      0|                pthread_mutex_lock(&c->task_thread.lock);
 1565|       |                // need to append this to the frame output queue
 1566|      0|                const unsigned next = c->frame_thread.next++;
 1567|      0|                if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (1567:21): [True: 0, False: 0]
  ------------------
 1568|      0|                    c->frame_thread.next = 0;
 1569|       |
 1570|      0|                Dav1dFrameContext *const f = &c->fc[next];
 1571|      0|                while (f->n_tile_data > 0)
  ------------------
  |  Branch (1571:24): [True: 0, False: 0]
  ------------------
 1572|      0|                    pthread_cond_wait(&f->task_thread.cond,
 1573|      0|                                      &f->task_thread.ttd->lock);
 1574|      0|                Dav1dThreadPicture *const out_delayed =
 1575|      0|                    &c->frame_thread.out_delayed[next];
 1576|      0|                if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (1576:21): [True: 0, False: 0]
  |  Branch (1576:47): [True: 0, False: 0]
  ------------------
 1577|      0|                    unsigned first = atomic_load(&c->task_thread.first);
 1578|      0|                    if (first + 1U < c->n_fc)
  ------------------
  |  Branch (1578:25): [True: 0, False: 0]
  ------------------
 1579|      0|                        atomic_fetch_add(&c->task_thread.first, 1U);
 1580|      0|                    else
 1581|      0|                        atomic_store(&c->task_thread.first, 0);
 1582|      0|                    atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 1583|      0|                                                   &first, UINT_MAX);
 1584|      0|                    if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (1584:25): [True: 0, False: 0]
  |  Branch (1584:47): [True: 0, False: 0]
  ------------------
 1585|      0|                        c->task_thread.cur--;
 1586|      0|                }
 1587|      0|                const int error = f->task_thread.retval;
 1588|      0|                if (error) {
  ------------------
  |  Branch (1588:21): [True: 0, False: 0]
  ------------------
 1589|      0|                    c->cached_error = error;
 1590|      0|                    f->task_thread.retval = 0;
 1591|      0|                    dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 1592|      0|                    dav1d_thread_picture_unref(out_delayed);
 1593|      0|                } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (1593:28): [True: 0, False: 0]
  ------------------
 1594|      0|                    const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 1595|      0|                                                                   memory_order_relaxed);
 1596|      0|                    if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (1596:26): [True: 0, False: 0]
  |  Branch (1596:50): [True: 0, False: 0]
  ------------------
 1597|      0|                        progress != FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (1597:25): [True: 0, False: 0]
  ------------------
 1598|      0|                    {
 1599|      0|                        dav1d_thread_picture_ref(&c->out, out_delayed);
 1600|      0|                        c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 1601|      0|                    }
 1602|      0|                    dav1d_thread_picture_unref(out_delayed);
 1603|      0|                }
 1604|      0|                dav1d_thread_picture_ref(out_delayed,
 1605|      0|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1606|      0|                out_delayed->visible = 1;
 1607|      0|                dav1d_picture_copy_props(&out_delayed->p,
 1608|      0|                                         c->content_light, c->content_light_ref,
 1609|      0|                                         c->mastering_display, c->mastering_display_ref,
 1610|      0|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1611|      0|                                         &in->m);
 1612|       |                // Must be removed from the context after being attached to the frame
 1613|      0|                dav1d_ref_dec(&c->itut_t35_ref);
 1614|      0|                c->itut_t35 = NULL;
 1615|      0|                c->n_itut_t35 = 0;
 1616|       |
 1617|      0|                pthread_mutex_unlock(&c->task_thread.lock);
 1618|      0|            }
 1619|  5.54k|            if (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY) {
  ------------------
  |  Branch (1619:17): [True: 4.89k, False: 650]
  ------------------
 1620|  4.89k|                const int r = c->frame_hdr->existing_frame_idx;
 1621|  4.89k|                c->refs[r].p.showable = 0;
 1622|  44.0k|                for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1622:33): [True: 39.1k, False: 4.89k]
  ------------------
 1623|  39.1k|                    if (i == r) continue;
  ------------------
  |  Branch (1623:25): [True: 4.89k, False: 34.2k]
  ------------------
 1624|       |
 1625|  34.2k|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1625:25): [True: 33.9k, False: 280]
  ------------------
 1626|  33.9k|                        dav1d_thread_picture_unref(&c->refs[i].p);
 1627|  34.2k|                    dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
 1628|       |
 1629|  34.2k|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 1630|  34.2k|                    dav1d_cdf_thread_ref(&c->cdf[i], &c->cdf[r]);
 1631|       |
 1632|  34.2k|                    dav1d_ref_dec(&c->refs[i].segmap);
 1633|  34.2k|                    c->refs[i].segmap = c->refs[r].segmap;
 1634|  34.2k|                    if (c->refs[r].segmap)
  ------------------
  |  Branch (1634:25): [True: 2.53k, False: 31.6k]
  ------------------
 1635|  2.53k|                        dav1d_ref_inc(c->refs[r].segmap);
 1636|  34.2k|                    dav1d_ref_dec(&c->refs[i].refmvs);
 1637|  34.2k|                }
 1638|  4.89k|            }
 1639|  5.54k|            c->frame_hdr = NULL;
 1640|  50.3k|        } else if (c->n_tiles == c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows) {
  ------------------
  |  Branch (1640:20): [True: 44.2k, False: 6.14k]
  ------------------
 1641|  44.2k|            switch (c->frame_hdr->frame_type) {
 1642|  14.8k|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1642:13): [True: 14.8k, False: 29.4k]
  ------------------
 1643|  15.1k|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1643:13): [True: 279, False: 43.9k]
  ------------------
 1644|  15.1k|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE ||
  ------------------
  |  Branch (1644:21): [True: 0, False: 15.1k]
  ------------------
 1645|  15.1k|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1645:22): [True: 0, False: 15.1k]
  ------------------
 1646|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1646:22): [True: 0, False: 0]
  ------------------
 1647|      0|                    goto skip;
 1648|  15.1k|                break;
 1649|  15.1k|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1649:13): [True: 575, False: 43.6k]
  ------------------
 1650|    575|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA ||
  ------------------
  |  Branch (1650:21): [True: 0, False: 575]
  ------------------
 1651|    575|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1651:22): [True: 0, False: 575]
  ------------------
 1652|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1652:22): [True: 0, False: 0]
  ------------------
 1653|      0|                    goto skip;
 1654|       |                // fall-through
 1655|  29.1k|            default:
  ------------------
  |  Branch (1655:13): [True: 28.5k, False: 15.7k]
  ------------------
 1656|  29.1k|                break;
 1657|  44.2k|            }
 1658|  44.2k|            if (!c->n_tile_data)
  ------------------
  |  Branch (1658:17): [True: 0, False: 44.2k]
  ------------------
 1659|      0|                goto error;
 1660|  44.2k|            if ((res = dav1d_submit_frame(c)) < 0)
  ------------------
  |  Branch (1660:17): [True: 27.0k, False: 17.2k]
  ------------------
 1661|  27.0k|                return res;
 1662|  44.2k|            assert(!c->n_tile_data);
  ------------------
  |  Branch (1662:13): [True: 17.2k, False: 0]
  ------------------
 1663|  17.2k|            c->frame_hdr = NULL;
 1664|  17.2k|            c->n_tiles = 0;
 1665|  17.2k|        }
 1666|  56.4k|    }
 1667|       |
 1668|  54.8k|    return gb.ptr_end - gb.ptr_start;
 1669|       |
 1670|      0|skip:
 1671|       |    // update refs with only the headers in case we skip the frame
 1672|      0|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1672:21): [True: 0, False: 0]
  ------------------
 1673|      0|        if (c->frame_hdr->refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (1673:13): [True: 0, False: 0]
  ------------------
 1674|      0|            dav1d_thread_picture_unref(&c->refs[i].p);
 1675|      0|            c->refs[i].p.p.frame_hdr = c->frame_hdr;
 1676|      0|            c->refs[i].p.p.seq_hdr = c->seq_hdr;
 1677|      0|            c->refs[i].p.p.frame_hdr_ref = c->frame_hdr_ref;
 1678|      0|            c->refs[i].p.p.seq_hdr_ref = c->seq_hdr_ref;
 1679|      0|            dav1d_ref_inc(c->frame_hdr_ref);
 1680|      0|            dav1d_ref_inc(c->seq_hdr_ref);
 1681|      0|        }
 1682|      0|    }
 1683|       |
 1684|      0|    dav1d_ref_dec(&c->frame_hdr_ref);
 1685|      0|    c->frame_hdr = NULL;
 1686|      0|    c->n_tiles = 0;
 1687|       |
 1688|      0|    return gb.ptr_end - gb.ptr_start;
 1689|       |
 1690|  24.9k|error:
 1691|  24.9k|    dav1d_data_props_copy(&c->cached_error_props, &in->m);
 1692|  24.9k|    dav1d_log(c, gb.error ? "Overrun in OBU bit buffer\n" :
  ------------------
  |  |   44|  24.9k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 24.9k]
  |  |  ------------------
  ------------------
 1693|  24.9k|                            "Error parsing OBU data\n");
 1694|  24.9k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  24.9k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1695|  82.3k|}
obu.c:parse_seq_hdr:
   75|  32.0k|{
   76|  32.0k|#define DEBUG_SEQ_HDR 0
   77|       |
   78|       |#if DEBUG_SEQ_HDR
   79|       |    const unsigned init_bit_pos = dav1d_get_bits_pos(gb);
   80|       |#endif
   81|       |
   82|  32.0k|    memset(hdr, 0, sizeof(*hdr));
   83|  32.0k|    hdr->profile = dav1d_get_bits(gb, 3);
   84|  32.0k|    if (hdr->profile > 2) goto error;
  ------------------
  |  Branch (84:9): [True: 742, False: 31.2k]
  ------------------
   85|       |#if DEBUG_SEQ_HDR
   86|       |    printf("SEQHDR: post-profile: off=%u\n",
   87|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   88|       |#endif
   89|       |
   90|  31.2k|    hdr->still_picture = dav1d_get_bit(gb);
   91|  31.2k|    hdr->reduced_still_picture_header = dav1d_get_bit(gb);
   92|  31.2k|    if (hdr->reduced_still_picture_header && !hdr->still_picture) goto error;
  ------------------
  |  Branch (92:9): [True: 19.4k, False: 11.7k]
  |  Branch (92:46): [True: 203, False: 19.2k]
  ------------------
   93|       |#if DEBUG_SEQ_HDR
   94|       |    printf("SEQHDR: post-stillpicture_flags: off=%u\n",
   95|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   96|       |#endif
   97|       |
   98|  31.0k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (98:9): [True: 19.2k, False: 11.7k]
  ------------------
   99|  19.2k|        hdr->num_operating_points = 1;
  100|  19.2k|        hdr->operating_points[0].major_level = dav1d_get_bits(gb, 3);
  101|  19.2k|        hdr->operating_points[0].minor_level = dav1d_get_bits(gb, 2);
  102|  19.2k|        hdr->operating_points[0].initial_display_delay = 10;
  103|  19.2k|    } else {
  104|  11.7k|        hdr->timing_info_present = dav1d_get_bit(gb);
  105|  11.7k|        if (hdr->timing_info_present) {
  ------------------
  |  Branch (105:13): [True: 2.01k, False: 9.77k]
  ------------------
  106|  2.01k|            hdr->num_units_in_tick = dav1d_get_bits(gb, 32);
  107|  2.01k|            hdr->time_scale = dav1d_get_bits(gb, 32);
  108|  2.01k|            if (strict_std_compliance && (!hdr->num_units_in_tick || !hdr->time_scale))
  ------------------
  |  Branch (108:17): [True: 0, False: 2.01k]
  |  Branch (108:43): [True: 0, False: 0]
  |  Branch (108:70): [True: 0, False: 0]
  ------------------
  109|      0|                goto error;
  110|  2.01k|            hdr->equal_picture_interval = dav1d_get_bit(gb);
  111|  2.01k|            if (hdr->equal_picture_interval) {
  ------------------
  |  Branch (111:17): [True: 1.14k, False: 870]
  ------------------
  112|  1.14k|                const unsigned num_ticks_per_picture = dav1d_get_vlc(gb);
  113|  1.14k|                if (num_ticks_per_picture == UINT32_MAX)
  ------------------
  |  Branch (113:21): [True: 73, False: 1.06k]
  ------------------
  114|     73|                    goto error;
  115|  1.06k|                hdr->num_ticks_per_picture = num_ticks_per_picture + 1;
  116|  1.06k|            }
  117|       |
  118|  1.93k|            hdr->decoder_model_info_present = dav1d_get_bit(gb);
  119|  1.93k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (119:17): [True: 1.11k, False: 824]
  ------------------
  120|  1.11k|                hdr->encoder_decoder_buffer_delay_length = dav1d_get_bits(gb, 5) + 1;
  121|  1.11k|                hdr->num_units_in_decoding_tick = dav1d_get_bits(gb, 32);
  122|  1.11k|                if (strict_std_compliance && !hdr->num_units_in_decoding_tick)
  ------------------
  |  Branch (122:21): [True: 0, False: 1.11k]
  |  Branch (122:46): [True: 0, False: 0]
  ------------------
  123|      0|                    goto error;
  124|  1.11k|                hdr->buffer_removal_delay_length = dav1d_get_bits(gb, 5) + 1;
  125|  1.11k|                hdr->frame_presentation_delay_length = dav1d_get_bits(gb, 5) + 1;
  126|  1.11k|            }
  127|  1.93k|        }
  128|       |#if DEBUG_SEQ_HDR
  129|       |        printf("SEQHDR: post-timinginfo: off=%u\n",
  130|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  131|       |#endif
  132|       |
  133|  11.7k|        hdr->display_model_info_present = dav1d_get_bit(gb);
  134|  11.7k|        hdr->num_operating_points = dav1d_get_bits(gb, 5) + 1;
  135|  29.3k|        for (int i = 0; i < hdr->num_operating_points; i++) {
  ------------------
  |  Branch (135:25): [True: 17.9k, False: 11.4k]
  ------------------
  136|  17.9k|            struct Dav1dSequenceHeaderOperatingPoint *const op =
  137|  17.9k|                &hdr->operating_points[i];
  138|  17.9k|            op->idc = dav1d_get_bits(gb, 12);
  139|  17.9k|            if (op->idc && (!(op->idc & 0xff) || !(op->idc & 0xf00)))
  ------------------
  |  Branch (139:17): [True: 11.8k, False: 6.07k]
  |  Branch (139:29): [True: 109, False: 11.7k]
  |  Branch (139:50): [True: 182, False: 11.5k]
  ------------------
  140|    291|                goto error;
  141|  17.6k|            op->major_level = 2 + dav1d_get_bits(gb, 3);
  142|  17.6k|            op->minor_level = dav1d_get_bits(gb, 2);
  143|  17.6k|            if (op->major_level > 3)
  ------------------
  |  Branch (143:17): [True: 4.80k, False: 12.8k]
  ------------------
  144|  4.80k|                op->tier = dav1d_get_bit(gb);
  145|  17.6k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (145:17): [True: 5.72k, False: 11.8k]
  ------------------
  146|  5.72k|                op->decoder_model_param_present = dav1d_get_bit(gb);
  147|  5.72k|                if (op->decoder_model_param_present) {
  ------------------
  |  Branch (147:21): [True: 2.68k, False: 3.04k]
  ------------------
  148|  2.68k|                    struct Dav1dSequenceHeaderOperatingParameterInfo *const opi =
  149|  2.68k|                        &hdr->operating_parameter_info[i];
  150|  2.68k|                    opi->decoder_buffer_delay =
  151|  2.68k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  152|  2.68k|                    opi->encoder_buffer_delay =
  153|  2.68k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  154|  2.68k|                    opi->low_delay_mode = dav1d_get_bit(gb);
  155|  2.68k|                }
  156|  5.72k|            }
  157|  17.6k|            if (hdr->display_model_info_present)
  ------------------
  |  Branch (157:17): [True: 5.42k, False: 12.1k]
  ------------------
  158|  5.42k|                op->display_model_param_present = dav1d_get_bit(gb);
  159|  17.6k|            op->initial_display_delay =
  160|  17.6k|                op->display_model_param_present ? dav1d_get_bits(gb, 4) + 1 : 10;
  ------------------
  |  Branch (160:17): [True: 1.95k, False: 15.6k]
  ------------------
  161|  17.6k|        }
  162|       |#if DEBUG_SEQ_HDR
  163|       |        printf("SEQHDR: post-operating-points: off=%u\n",
  164|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  165|       |#endif
  166|  11.7k|    }
  167|       |
  168|  30.7k|    hdr->width_n_bits = dav1d_get_bits(gb, 4) + 1;
  169|  30.7k|    hdr->height_n_bits = dav1d_get_bits(gb, 4) + 1;
  170|  30.7k|    hdr->max_width = dav1d_get_bits(gb, hdr->width_n_bits) + 1;
  171|  30.7k|    hdr->max_height = dav1d_get_bits(gb, hdr->height_n_bits) + 1;
  172|       |#if DEBUG_SEQ_HDR
  173|       |    printf("SEQHDR: post-size: off=%u\n",
  174|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  175|       |#endif
  176|  30.7k|    if (!hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (176:9): [True: 11.4k, False: 19.2k]
  ------------------
  177|  11.4k|        hdr->frame_id_numbers_present = dav1d_get_bit(gb);
  178|  11.4k|        if (hdr->frame_id_numbers_present) {
  ------------------
  |  Branch (178:13): [True: 1.18k, False: 10.2k]
  ------------------
  179|  1.18k|            hdr->delta_frame_id_n_bits = dav1d_get_bits(gb, 4) + 2;
  180|  1.18k|            hdr->frame_id_n_bits = dav1d_get_bits(gb, 3) + hdr->delta_frame_id_n_bits + 1;
  181|  1.18k|        }
  182|  11.4k|    }
  183|       |#if DEBUG_SEQ_HDR
  184|       |    printf("SEQHDR: post-frame-id-numbers-present: off=%u\n",
  185|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  186|       |#endif
  187|       |
  188|  30.7k|    hdr->sb128 = dav1d_get_bit(gb);
  189|  30.7k|    hdr->filter_intra = dav1d_get_bit(gb);
  190|  30.7k|    hdr->intra_edge_filter = dav1d_get_bit(gb);
  191|  30.7k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (191:9): [True: 19.2k, False: 11.4k]
  ------------------
  192|  19.2k|        hdr->screen_content_tools = DAV1D_ADAPTIVE;
  193|  19.2k|        hdr->force_integer_mv = DAV1D_ADAPTIVE;
  194|  19.2k|    } else {
  195|  11.4k|        hdr->inter_intra = dav1d_get_bit(gb);
  196|  11.4k|        hdr->masked_compound = dav1d_get_bit(gb);
  197|  11.4k|        hdr->warped_motion = dav1d_get_bit(gb);
  198|  11.4k|        hdr->dual_filter = dav1d_get_bit(gb);
  199|  11.4k|        hdr->order_hint = dav1d_get_bit(gb);
  200|  11.4k|        if (hdr->order_hint) {
  ------------------
  |  Branch (200:13): [True: 7.41k, False: 4.00k]
  ------------------
  201|  7.41k|            hdr->jnt_comp = dav1d_get_bit(gb);
  202|  7.41k|            hdr->ref_frame_mvs = dav1d_get_bit(gb);
  203|  7.41k|        }
  204|  11.4k|        hdr->screen_content_tools = dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb);
  ------------------
  |  Branch (204:37): [True: 4.53k, False: 6.89k]
  ------------------
  205|       |    #if DEBUG_SEQ_HDR
  206|       |        printf("SEQHDR: post-screentools: off=%u\n",
  207|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  208|       |    #endif
  209|  11.4k|        hdr->force_integer_mv = hdr->screen_content_tools ?
  ------------------
  |  Branch (209:33): [True: 8.06k, False: 3.35k]
  ------------------
  210|  8.06k|                                dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb) : 2;
  ------------------
  |  Branch (210:33): [True: 2.31k, False: 5.74k]
  ------------------
  211|  11.4k|        if (hdr->order_hint)
  ------------------
  |  Branch (211:13): [True: 7.41k, False: 4.00k]
  ------------------
  212|  7.41k|            hdr->order_hint_n_bits = dav1d_get_bits(gb, 3) + 1;
  213|  11.4k|    }
  214|  30.7k|    hdr->super_res = dav1d_get_bit(gb);
  215|  30.7k|    hdr->cdef = dav1d_get_bit(gb);
  216|  30.7k|    hdr->restoration = dav1d_get_bit(gb);
  217|       |#if DEBUG_SEQ_HDR
  218|       |    printf("SEQHDR: post-featurebits: off=%u\n",
  219|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  220|       |#endif
  221|       |
  222|  30.7k|    hdr->hbd = dav1d_get_bit(gb);
  223|  30.7k|    if (hdr->profile == 2 && hdr->hbd)
  ------------------
  |  Branch (223:9): [True: 13.3k, False: 17.3k]
  |  Branch (223:30): [True: 9.08k, False: 4.28k]
  ------------------
  224|  9.08k|        hdr->hbd += dav1d_get_bit(gb);
  225|  30.7k|    if (hdr->profile != 1)
  ------------------
  |  Branch (225:9): [True: 23.2k, False: 7.42k]
  ------------------
  226|  23.2k|        hdr->monochrome = dav1d_get_bit(gb);
  227|  30.7k|    hdr->color_description_present = dav1d_get_bit(gb);
  228|  30.7k|    if (hdr->color_description_present) {
  ------------------
  |  Branch (228:9): [True: 2.69k, False: 28.0k]
  ------------------
  229|  2.69k|        hdr->pri = dav1d_get_bits(gb, 8);
  230|  2.69k|        hdr->trc = dav1d_get_bits(gb, 8);
  231|  2.69k|        hdr->mtrx = dav1d_get_bits(gb, 8);
  232|  28.0k|    } else {
  233|  28.0k|        hdr->pri = DAV1D_COLOR_PRI_UNKNOWN;
  234|  28.0k|        hdr->trc = DAV1D_TRC_UNKNOWN;
  235|  28.0k|        hdr->mtrx = DAV1D_MC_UNKNOWN;
  236|  28.0k|    }
  237|  30.7k|    if (hdr->monochrome) {
  ------------------
  |  Branch (237:9): [True: 11.6k, False: 19.0k]
  ------------------
  238|  11.6k|        hdr->color_range = dav1d_get_bit(gb);
  239|  11.6k|        hdr->layout = DAV1D_PIXEL_LAYOUT_I400;
  240|  11.6k|        hdr->ss_hor = hdr->ss_ver = 1;
  241|  11.6k|        hdr->chr = DAV1D_CHR_UNKNOWN;
  242|  19.0k|    } else if (hdr->pri == DAV1D_COLOR_PRI_BT709 &&
  ------------------
  |  Branch (242:16): [True: 1.55k, False: 17.4k]
  ------------------
  243|  1.55k|               hdr->trc == DAV1D_TRC_SRGB &&
  ------------------
  |  Branch (243:16): [True: 1.10k, False: 442]
  ------------------
  244|  1.10k|               hdr->mtrx == DAV1D_MC_IDENTITY)
  ------------------
  |  Branch (244:16): [True: 860, False: 249]
  ------------------
  245|    860|    {
  246|    860|        hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  247|    860|        hdr->color_range = 1;
  248|    860|        if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->hbd == 2))
  ------------------
  |  Branch (248:13): [True: 664, False: 196]
  |  Branch (248:36): [True: 463, False: 201]
  |  Branch (248:57): [True: 267, False: 196]
  ------------------
  249|    397|            goto error;
  250|  18.1k|    } else {
  251|  18.1k|        hdr->color_range = dav1d_get_bit(gb);
  252|  18.1k|        switch (hdr->profile) {
  ------------------
  |  Branch (252:17): [True: 18.1k, False: 0]
  ------------------
  253|  5.30k|        case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (253:9): [True: 5.30k, False: 12.8k]
  ------------------
  254|  5.30k|                hdr->ss_hor = hdr->ss_ver = 1;
  255|  5.30k|                break;
  256|  7.22k|        case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (256:9): [True: 7.22k, False: 10.9k]
  ------------------
  257|  7.22k|                break;
  258|  5.62k|        case 2:
  ------------------
  |  Branch (258:9): [True: 5.62k, False: 12.5k]
  ------------------
  259|  5.62k|            if (hdr->hbd == 2) {
  ------------------
  |  Branch (259:17): [True: 2.44k, False: 3.17k]
  ------------------
  260|  2.44k|                hdr->ss_hor = dav1d_get_bit(gb);
  261|  2.44k|                if (hdr->ss_hor)
  ------------------
  |  Branch (261:21): [True: 924, False: 1.52k]
  ------------------
  262|    924|                    hdr->ss_ver = dav1d_get_bit(gb);
  263|  2.44k|            } else
  264|  3.17k|                hdr->ss_hor = 1;
  265|  5.62k|            hdr->layout = hdr->ss_hor ?
  ------------------
  |  Branch (265:27): [True: 4.10k, False: 1.52k]
  ------------------
  266|  4.10k|                          hdr->ss_ver ? DAV1D_PIXEL_LAYOUT_I420 :
  ------------------
  |  Branch (266:27): [True: 473, False: 3.62k]
  ------------------
  267|  4.10k|                                        DAV1D_PIXEL_LAYOUT_I422 :
  268|  5.62k|                                        DAV1D_PIXEL_LAYOUT_I444;
  269|  5.62k|            break;
  270|  18.1k|        }
  271|  18.1k|        hdr->chr = (hdr->ss_hor & hdr->ss_ver) ?
  ------------------
  |  Branch (271:20): [True: 5.77k, False: 12.3k]
  ------------------
  272|  12.3k|                   dav1d_get_bits(gb, 2) : DAV1D_CHR_UNKNOWN;
  273|  18.1k|    }
  274|  30.3k|    if (strict_std_compliance &&
  ------------------
  |  Branch (274:9): [True: 0, False: 30.3k]
  ------------------
  275|      0|        hdr->mtrx == DAV1D_MC_IDENTITY && hdr->layout != DAV1D_PIXEL_LAYOUT_I444)
  ------------------
  |  Branch (275:9): [True: 0, False: 0]
  |  Branch (275:43): [True: 0, False: 0]
  ------------------
  276|      0|    {
  277|      0|        goto error;
  278|      0|    }
  279|  30.3k|    if (!hdr->monochrome)
  ------------------
  |  Branch (279:9): [True: 18.6k, False: 11.6k]
  ------------------
  280|  18.6k|        hdr->separate_uv_delta_q = dav1d_get_bit(gb);
  281|       |#if DEBUG_SEQ_HDR
  282|       |    printf("SEQHDR: post-colorinfo: off=%u\n",
  283|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  284|       |#endif
  285|       |
  286|  30.3k|    hdr->film_grain_present = dav1d_get_bit(gb);
  287|       |#if DEBUG_SEQ_HDR
  288|       |    printf("SEQHDR: post-filmgrain: off=%u\n",
  289|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  290|       |#endif
  291|       |
  292|       |    // We needn't bother flushing the OBU here: we'll check we didn't
  293|       |    // overrun in the caller and will then discard gb, so there's no
  294|       |    // point in setting its position properly.
  295|       |
  296|  30.3k|    return check_trailing_bits(gb, strict_std_compliance);
  297|       |
  298|  1.70k|error:
  299|  1.70k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  1.70k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  300|  30.3k|}
obu.c:parse_frame_hdr:
  409|  73.2k|static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
  410|  73.2k|#define DEBUG_FRAME_HDR 0
  411|       |
  412|       |#if DEBUG_FRAME_HDR
  413|       |    const uint8_t *const init_ptr = gb->ptr;
  414|       |#endif
  415|  73.2k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  416|  73.2k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  417|       |
  418|  73.2k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (418:9): [True: 41.2k, False: 32.0k]
  ------------------
  419|  41.2k|        hdr->show_existing_frame = dav1d_get_bit(gb);
  420|       |#if DEBUG_FRAME_HDR
  421|       |    printf("HDR: post-show_existing_frame: off=%td\n",
  422|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  423|       |#endif
  424|  73.2k|    if (hdr->show_existing_frame) {
  ------------------
  |  Branch (424:9): [True: 6.56k, False: 66.6k]
  ------------------
  425|  6.56k|        hdr->existing_frame_idx = dav1d_get_bits(gb, 3);
  426|  6.56k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (426:13): [True: 547, False: 6.02k]
  |  Branch (426:51): [True: 199, False: 348]
  ------------------
  427|    199|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  428|  6.56k|        if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (428:13): [True: 590, False: 5.97k]
  ------------------
  429|    590|            hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  430|    590|            Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->existing_frame_idx].p.p.frame_hdr;
  431|    590|            if (!ref_frame_hdr || ref_frame_hdr->frame_id != hdr->frame_id) goto error;
  ------------------
  |  Branch (431:17): [True: 252, False: 338]
  |  Branch (431:35): [True: 71, False: 267]
  ------------------
  432|    590|        }
  433|  6.24k|        return 0;
  434|  6.56k|    }
  435|       |
  436|  66.6k|    if (seqhdr->reduced_still_picture_header) {
  ------------------
  |  Branch (436:9): [True: 32.0k, False: 34.6k]
  ------------------
  437|  32.0k|        hdr->frame_type = DAV1D_FRAME_TYPE_KEY;
  438|  32.0k|        hdr->show_frame = 1;
  439|  34.6k|    } else {
  440|  34.6k|        hdr->frame_type = dav1d_get_bits(gb, 2);
  441|  34.6k|        hdr->show_frame = dav1d_get_bit(gb);
  442|  34.6k|    }
  443|  66.6k|    if (hdr->show_frame) {
  ------------------
  |  Branch (443:9): [True: 59.3k, False: 7.34k]
  ------------------
  444|  59.3k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (444:13): [True: 1.45k, False: 57.8k]
  |  Branch (444:51): [True: 1.11k, False: 336]
  ------------------
  445|  1.11k|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  446|  59.3k|        hdr->showable_frame = hdr->frame_type != DAV1D_FRAME_TYPE_KEY;
  447|  59.3k|    } else
  448|  7.34k|        hdr->showable_frame = dav1d_get_bit(gb);
  449|  66.6k|    hdr->error_resilient_mode =
  450|  66.6k|        (hdr->frame_type == DAV1D_FRAME_TYPE_KEY && hdr->show_frame) ||
  ------------------
  |  Branch (450:10): [True: 41.4k, False: 25.2k]
  |  Branch (450:53): [True: 39.6k, False: 1.85k]
  ------------------
  451|  27.0k|        hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ||
  ------------------
  |  Branch (451:9): [True: 2.58k, False: 24.4k]
  ------------------
  452|  24.4k|        seqhdr->reduced_still_picture_header || dav1d_get_bit(gb);
  ------------------
  |  Branch (452:9): [True: 0, False: 24.4k]
  |  Branch (452:49): [True: 1.63k, False: 22.8k]
  ------------------
  453|       |#if DEBUG_FRAME_HDR
  454|       |    printf("HDR: post-frametype_bits: off=%td\n",
  455|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  456|       |#endif
  457|  66.6k|    hdr->disable_cdf_update = dav1d_get_bit(gb);
  458|  66.6k|    hdr->allow_screen_content_tools = seqhdr->screen_content_tools == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (458:39): [True: 46.8k, False: 19.8k]
  ------------------
  459|  46.8k|                                      dav1d_get_bit(gb) : seqhdr->screen_content_tools;
  460|  66.6k|    if (hdr->allow_screen_content_tools)
  ------------------
  |  Branch (460:9): [True: 42.4k, False: 24.2k]
  ------------------
  461|  42.4k|        hdr->force_integer_mv = seqhdr->force_integer_mv == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (461:33): [True: 27.8k, False: 14.5k]
  ------------------
  462|  27.8k|                                dav1d_get_bit(gb) : seqhdr->force_integer_mv;
  463|       |
  464|  66.6k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|  66.6k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  66.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 42.9k, False: 23.7k]
  |  |  ------------------
  ------------------
  465|  42.9k|        hdr->force_integer_mv = 1;
  466|       |
  467|  66.6k|    if (seqhdr->frame_id_numbers_present)
  ------------------
  |  Branch (467:9): [True: 1.87k, False: 64.8k]
  ------------------
  468|  1.87k|        hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  469|       |
  470|  66.6k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (470:9): [True: 34.6k, False: 32.0k]
  ------------------
  471|  34.6k|        hdr->frame_size_override = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 1 : dav1d_get_bit(gb);
  ------------------
  |  Branch (471:36): [True: 2.58k, False: 32.0k]
  ------------------
  472|       |#if DEBUG_FRAME_HDR
  473|       |    printf("HDR: post-frame_size_override_flag: off=%td\n",
  474|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  475|       |#endif
  476|  66.6k|    if (seqhdr->order_hint)
  ------------------
  |  Branch (476:9): [True: 24.0k, False: 42.6k]
  ------------------
  477|  24.0k|        hdr->frame_offset = dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  478|  66.6k|    hdr->primary_ref_frame = !hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) ?
  ------------------
  |  |   36|  22.8k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 20.6k, False: 2.26k]
  |  |  ------------------
  ------------------
  |  Branch (478:30): [True: 22.8k, False: 43.8k]
  ------------------
  479|  46.0k|                             dav1d_get_bits(gb, 3) : DAV1D_PRIMARY_REF_NONE;
  ------------------
  |  |   45|   112k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  480|       |
  481|  66.6k|    if (seqhdr->decoder_model_info_present) {
  ------------------
  |  Branch (481:9): [True: 1.46k, False: 65.2k]
  ------------------
  482|  1.46k|        hdr->buffer_removal_time_present = dav1d_get_bit(gb);
  483|  1.46k|        if (hdr->buffer_removal_time_present) {
  ------------------
  |  Branch (483:13): [True: 884, False: 579]
  ------------------
  484|  5.71k|            for (int i = 0; i < c->seq_hdr->num_operating_points; i++) {
  ------------------
  |  Branch (484:29): [True: 4.83k, False: 884]
  ------------------
  485|  4.83k|                const struct Dav1dSequenceHeaderOperatingPoint *const seqop = &seqhdr->operating_points[i];
  486|  4.83k|                struct Dav1dFrameHeaderOperatingPoint *const op = &hdr->operating_points[i];
  487|  4.83k|                if (seqop->decoder_model_param_present) {
  ------------------
  |  Branch (487:21): [True: 3.82k, False: 1.00k]
  ------------------
  488|  3.82k|                    int in_temporal_layer = (seqop->idc >> hdr->temporal_id) & 1;
  489|  3.82k|                    int in_spatial_layer  = (seqop->idc >> (hdr->spatial_id + 8)) & 1;
  490|  3.82k|                    if (!seqop->idc || (in_temporal_layer && in_spatial_layer))
  ------------------
  |  Branch (490:25): [True: 258, False: 3.56k]
  |  Branch (490:41): [True: 2.95k, False: 608]
  |  Branch (490:62): [True: 2.17k, False: 785]
  ------------------
  491|  2.43k|                        op->buffer_removal_time = dav1d_get_bits(gb, seqhdr->buffer_removal_delay_length);
  492|  3.82k|                }
  493|  4.83k|            }
  494|    884|        }
  495|  1.46k|    }
  496|       |
  497|  66.6k|    if (IS_KEY_OR_INTRA(hdr)) {
  ------------------
  |  |   43|  66.6k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  66.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 42.9k, False: 23.7k]
  |  |  ------------------
  ------------------
  498|  42.9k|        hdr->refresh_frame_flags = (hdr->frame_type == DAV1D_FRAME_TYPE_KEY &&
  ------------------
  |  Branch (498:37): [True: 41.4k, False: 1.46k]
  ------------------
  499|  41.4k|                                    hdr->show_frame) ? 0xff : dav1d_get_bits(gb, 8);
  ------------------
  |  Branch (499:37): [True: 39.6k, False: 1.85k]
  ------------------
  500|  42.9k|        if (hdr->refresh_frame_flags != 0xff && hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (500:13): [True: 3.20k, False: 39.7k]
  |  Branch (500:49): [True: 986, False: 2.22k]
  |  Branch (500:78): [True: 745, False: 241]
  ------------------
  501|  6.70k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (501:29): [True: 5.96k, False: 745]
  ------------------
  502|  5.96k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  503|  42.9k|        if (c->strict_std_compliance &&
  ------------------
  |  Branch (503:13): [True: 0, False: 42.9k]
  ------------------
  504|      0|            hdr->frame_type == DAV1D_FRAME_TYPE_INTRA && hdr->refresh_frame_flags == 0xff)
  ------------------
  |  Branch (504:13): [True: 0, False: 0]
  |  Branch (504:58): [True: 0, False: 0]
  ------------------
  505|      0|        {
  506|      0|            goto error;
  507|      0|        }
  508|  42.9k|        if (read_frame_size(c, gb, 0) < 0) goto error;
  ------------------
  |  Branch (508:13): [True: 0, False: 42.9k]
  ------------------
  509|  42.9k|        if (hdr->allow_screen_content_tools && !hdr->super_res.enabled)
  ------------------
  |  Branch (509:13): [True: 27.0k, False: 15.9k]
  |  Branch (509:48): [True: 25.6k, False: 1.34k]
  ------------------
  510|  25.6k|            hdr->allow_intrabc = dav1d_get_bit(gb);
  511|  42.9k|    } else {
  512|  23.7k|        hdr->refresh_frame_flags = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 0xff :
  ------------------
  |  Branch (512:36): [True: 2.58k, False: 21.1k]
  ------------------
  513|  23.7k|                                   dav1d_get_bits(gb, 8);
  514|  23.7k|        if (hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (514:13): [True: 3.15k, False: 20.6k]
  |  Branch (514:42): [True: 2.29k, False: 861]
  ------------------
  515|  20.6k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (515:29): [True: 18.3k, False: 2.29k]
  ------------------
  516|  18.3k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  517|  23.7k|        if (seqhdr->order_hint) {
  ------------------
  |  Branch (517:13): [True: 18.4k, False: 5.33k]
  ------------------
  518|  18.4k|            hdr->frame_ref_short_signaling = dav1d_get_bit(gb);
  519|  18.4k|            if (hdr->frame_ref_short_signaling) {
  ------------------
  |  Branch (519:17): [True: 12.8k, False: 5.62k]
  ------------------
  520|  12.8k|                hdr->refidx[0] = dav1d_get_bits(gb, 3);
  521|  12.8k|                hdr->refidx[1] = hdr->refidx[2] = -1;
  522|  12.8k|                hdr->refidx[3] = dav1d_get_bits(gb, 3);
  523|       |
  524|       |                /* +1 allows for unconditional stores, as unused
  525|       |                 * values can be dumped into frame_offset[-1]. */
  526|  12.8k|                int frame_offset_mem[8+1];
  527|  12.8k|                int *const frame_offset = &frame_offset_mem[1];
  528|  12.8k|                int earliest_ref = -1;
  529|   112k|                for (int i = 0, earliest_offset = INT_MAX; i < 8; i++) {
  ------------------
  |  Branch (529:60): [True: 100k, False: 12.2k]
  ------------------
  530|   100k|                    const Dav1dFrameHeader *const refhdr = c->refs[i].p.p.frame_hdr;
  531|   100k|                    if (!refhdr) goto error;
  ------------------
  |  Branch (531:25): [True: 564, False: 99.5k]
  ------------------
  532|  99.5k|                    const int diff = get_poc_diff(seqhdr->order_hint_n_bits,
  533|  99.5k|                                                  refhdr->frame_offset,
  534|  99.5k|                                                  hdr->frame_offset);
  535|  99.5k|                    frame_offset[i] = diff;
  536|  99.5k|                    if (diff < earliest_offset) {
  ------------------
  |  Branch (536:25): [True: 15.3k, False: 84.2k]
  ------------------
  537|  15.3k|                        earliest_offset = diff;
  538|  15.3k|                        earliest_ref = i;
  539|  15.3k|                    }
  540|  99.5k|                }
  541|  12.2k|                frame_offset[hdr->refidx[0]] = INT_MIN; // = reference frame is used
  542|  12.2k|                frame_offset[hdr->refidx[3]] = INT_MIN;
  543|  12.2k|                assert(earliest_ref >= 0);
  ------------------
  |  Branch (543:17): [True: 12.2k, False: 0]
  ------------------
  544|       |
  545|  12.2k|                int refidx = -1;
  546|   110k|                for (int i = 0, latest_offset = 0; i < 8; i++) {
  ------------------
  |  Branch (546:52): [True: 97.8k, False: 12.2k]
  ------------------
  547|  97.8k|                    const int hint = frame_offset[i];
  548|  97.8k|                    if (hint >= latest_offset) {
  ------------------
  |  Branch (548:25): [True: 52.7k, False: 45.1k]
  ------------------
  549|  52.7k|                        latest_offset = hint;
  550|  52.7k|                        refidx = i;
  551|  52.7k|                    }
  552|  97.8k|                }
  553|  12.2k|                frame_offset[refidx] = INT_MIN;
  554|  12.2k|                hdr->refidx[6] = refidx;
  555|       |
  556|  36.7k|                for (int i = 4; i < 6; i++) {
  ------------------
  |  Branch (556:33): [True: 24.4k, False: 12.2k]
  ------------------
  557|       |                    /* Unsigned compares to handle negative values. */
  558|  24.4k|                    unsigned earliest_offset = UINT8_MAX;
  559|  24.4k|                    refidx = -1;
  560|   220k|                    for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (560:37): [True: 195k, False: 24.4k]
  ------------------
  561|   195k|                        const unsigned hint = frame_offset[j];
  562|   195k|                        if (hint < earliest_offset) {
  ------------------
  |  Branch (562:29): [True: 20.5k, False: 175k]
  ------------------
  563|  20.5k|                            earliest_offset = hint;
  564|  20.5k|                            refidx = j;
  565|  20.5k|                        }
  566|   195k|                    }
  567|  24.4k|                    frame_offset[refidx] = INT_MIN;
  568|  24.4k|                    hdr->refidx[i] = refidx;
  569|  24.4k|                }
  570|       |
  571|  85.6k|                for (int i = 1; i < 7; i++) {
  ------------------
  |  Branch (571:33): [True: 73.4k, False: 12.2k]
  ------------------
  572|  73.4k|                    refidx = hdr->refidx[i];
  573|  73.4k|                    if (refidx < 0) {
  ------------------
  |  Branch (573:25): [True: 31.5k, False: 41.8k]
  ------------------
  574|  31.5k|                        unsigned latest_offset = ~UINT8_MAX;
  575|   283k|                        for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (575:41): [True: 252k, False: 31.5k]
  ------------------
  576|   252k|                            const unsigned hint = frame_offset[j];
  577|   252k|                            if (hint >= latest_offset) {
  ------------------
  |  Branch (577:33): [True: 49.4k, False: 202k]
  ------------------
  578|  49.4k|                                latest_offset = hint;
  579|  49.4k|                                refidx = j;
  580|  49.4k|                            }
  581|   252k|                        }
  582|  31.5k|                        frame_offset[refidx] = INT_MIN;
  583|  31.5k|                        hdr->refidx[i] = refidx >= 0 ? refidx : earliest_ref;
  ------------------
  |  Branch (583:42): [True: 14.6k, False: 16.9k]
  ------------------
  584|  31.5k|                    }
  585|  73.4k|                }
  586|  12.2k|            }
  587|  18.4k|        }
  588|   181k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (588:25): [True: 158k, False: 22.5k]
  ------------------
  589|   158k|            if (!hdr->frame_ref_short_signaling)
  ------------------
  |  Branch (589:17): [True: 73.0k, False: 85.5k]
  ------------------
  590|  73.0k|                hdr->refidx[i] = dav1d_get_bits(gb, 3);
  591|   158k|            if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (591:17): [True: 840, False: 157k]
  ------------------
  592|    840|                const unsigned delta_ref_frame_id = dav1d_get_bits(gb, seqhdr->delta_frame_id_n_bits) + 1;
  593|    840|                const unsigned ref_frame_id = (hdr->frame_id + (1 << seqhdr->frame_id_n_bits) - delta_ref_frame_id) & ((1 << seqhdr->frame_id_n_bits) - 1);
  594|    840|                Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->refidx[i]].p.p.frame_hdr;
  595|    840|                if (!ref_frame_hdr || ref_frame_hdr->frame_id != ref_frame_id) goto error;
  ------------------
  |  Branch (595:21): [True: 337, False: 503]
  |  Branch (595:39): [True: 301, False: 202]
  ------------------
  596|    840|            }
  597|   158k|        }
  598|  22.5k|        const int use_ref = !hdr->error_resilient_mode &&
  ------------------
  |  Branch (598:29): [True: 20.2k, False: 2.29k]
  ------------------
  599|  20.2k|                            hdr->frame_size_override;
  ------------------
  |  Branch (599:29): [True: 12.0k, False: 8.20k]
  ------------------
  600|  22.5k|        if (read_frame_size(c, gb, use_ref) < 0) goto error;
  ------------------
  |  Branch (600:13): [True: 202, False: 22.3k]
  ------------------
  601|  22.3k|        if (!hdr->force_integer_mv)
  ------------------
  |  Branch (601:13): [True: 12.9k, False: 9.35k]
  ------------------
  602|  12.9k|            hdr->hp = dav1d_get_bit(gb);
  603|  22.3k|        hdr->subpel_filter_mode = dav1d_get_bit(gb) ? DAV1D_FILTER_SWITCHABLE :
  ------------------
  |  Branch (603:35): [True: 4.04k, False: 18.3k]
  ------------------
  604|  22.3k|                                                      dav1d_get_bits(gb, 2);
  605|  22.3k|        hdr->switchable_motion_mode = dav1d_get_bit(gb);
  606|  22.3k|        if (!hdr->error_resilient_mode && seqhdr->ref_frame_mvs &&
  ------------------
  |  Branch (606:13): [True: 20.0k, False: 2.29k]
  |  Branch (606:43): [True: 13.4k, False: 6.63k]
  ------------------
  607|  13.4k|            seqhdr->order_hint && IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|  13.4k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 13.4k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (607:13): [True: 13.4k, False: 0]
  ------------------
  608|  13.4k|        {
  609|  13.4k|            hdr->use_ref_frame_mvs = dav1d_get_bit(gb);
  610|  13.4k|        }
  611|  22.3k|    }
  612|       |#if DEBUG_FRAME_HDR
  613|       |    printf("HDR: post-frametype-specific-bits: off=%td\n",
  614|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  615|       |#endif
  616|       |
  617|  65.2k|    if (!seqhdr->reduced_still_picture_header && !hdr->disable_cdf_update)
  ------------------
  |  Branch (617:9): [True: 33.2k, False: 32.0k]
  |  Branch (617:50): [True: 27.1k, False: 6.10k]
  ------------------
  618|  27.1k|        hdr->refresh_context = !dav1d_get_bit(gb);
  619|       |#if DEBUG_FRAME_HDR
  620|       |    printf("HDR: post-refresh_context: off=%td\n",
  621|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  622|       |#endif
  623|       |
  624|       |    // tile data
  625|  65.2k|    hdr->tiling.uniform = dav1d_get_bit(gb);
  626|  65.2k|    const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
  627|  65.2k|    const int sbsz_log2 = 6 + seqhdr->sb128;
  628|  65.2k|    const int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;
  629|  65.2k|    const int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
  630|  65.2k|    const int max_tile_width_sb = 4096 >> sbsz_log2;
  631|  65.2k|    const int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
  632|  65.2k|    hdr->tiling.min_log2_cols = tile_log2(max_tile_width_sb, sbw);
  633|  65.2k|    hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, DAV1D_MAX_TILE_COLS));
  ------------------
  |  |   41|  65.2k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  634|  65.2k|    hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, DAV1D_MAX_TILE_ROWS));
  ------------------
  |  |   42|  65.2k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  635|  65.2k|    const int min_log2_tiles = imax(tile_log2(max_tile_area_sb, sbw * sbh),
  636|  65.2k|                              hdr->tiling.min_log2_cols);
  637|  65.2k|    if (hdr->tiling.uniform) {
  ------------------
  |  Branch (637:9): [True: 38.9k, False: 26.3k]
  ------------------
  638|  38.9k|        for (hdr->tiling.log2_cols = hdr->tiling.min_log2_cols;
  639|  40.6k|             hdr->tiling.log2_cols < hdr->tiling.max_log2_cols && dav1d_get_bit(gb);
  ------------------
  |  Branch (639:14): [True: 18.1k, False: 22.5k]
  |  Branch (639:67): [True: 1.68k, False: 16.4k]
  ------------------
  640|  38.9k|             hdr->tiling.log2_cols++) ;
  641|  38.9k|        const int tile_w = 1 + ((sbw - 1) >> hdr->tiling.log2_cols);
  642|  38.9k|        hdr->tiling.cols = 0;
  643|  92.9k|        for (int sbx = 0; sbx < sbw; sbx += tile_w, hdr->tiling.cols++)
  ------------------
  |  Branch (643:27): [True: 53.9k, False: 38.9k]
  ------------------
  644|  53.9k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  645|  38.9k|        hdr->tiling.min_log2_rows =
  646|  38.9k|            imax(min_log2_tiles - hdr->tiling.log2_cols, 0);
  647|       |
  648|  38.9k|        for (hdr->tiling.log2_rows = hdr->tiling.min_log2_rows;
  649|  40.3k|             hdr->tiling.log2_rows < hdr->tiling.max_log2_rows && dav1d_get_bit(gb);
  ------------------
  |  Branch (649:14): [True: 10.1k, False: 30.1k]
  |  Branch (649:67): [True: 1.36k, False: 8.81k]
  ------------------
  650|  38.9k|             hdr->tiling.log2_rows++) ;
  651|  38.9k|        const int tile_h = 1 + ((sbh - 1) >> hdr->tiling.log2_rows);
  652|  38.9k|        hdr->tiling.rows = 0;
  653|  84.8k|        for (int sby = 0; sby < sbh; sby += tile_h, hdr->tiling.rows++)
  ------------------
  |  Branch (653:27): [True: 45.9k, False: 38.9k]
  ------------------
  654|  45.9k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  655|  38.9k|    } else {
  656|  26.3k|        hdr->tiling.cols = 0;
  657|  26.3k|        int widest_tile = 0, max_tile_area_sb = sbw * sbh;
  658|   103k|        for (int sbx = 0; sbx < sbw && hdr->tiling.cols < DAV1D_MAX_TILE_COLS; hdr->tiling.cols++) {
  ------------------
  |  |   41|  77.3k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  |  Branch (658:27): [True: 77.3k, False: 25.8k]
  |  Branch (658:40): [True: 76.8k, False: 503]
  ------------------
  659|  76.8k|            const int tile_width_sb = imin(sbw - sbx, max_tile_width_sb);
  660|  76.8k|            const int tile_w = (tile_width_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_width_sb) : 1;
  ------------------
  |  Branch (660:32): [True: 52.5k, False: 24.2k]
  ------------------
  661|  76.8k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  662|  76.8k|            sbx += tile_w;
  663|  76.8k|            widest_tile = imax(widest_tile, tile_w);
  664|  76.8k|        }
  665|  26.3k|        hdr->tiling.log2_cols = tile_log2(1, hdr->tiling.cols);
  666|  26.3k|        if (min_log2_tiles) max_tile_area_sb >>= min_log2_tiles + 1;
  ------------------
  |  Branch (666:13): [True: 541, False: 25.7k]
  ------------------
  667|  26.3k|        const int max_tile_height_sb = imax(max_tile_area_sb / widest_tile, 1);
  668|       |
  669|  26.3k|        hdr->tiling.rows = 0;
  670|   104k|        for (int sby = 0; sby < sbh && hdr->tiling.rows < DAV1D_MAX_TILE_ROWS; hdr->tiling.rows++) {
  ------------------
  |  |   42|  78.5k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  |  Branch (670:27): [True: 78.5k, False: 25.6k]
  |  Branch (670:40): [True: 77.9k, False: 687]
  ------------------
  671|  77.9k|            const int tile_height_sb = imin(sbh - sby, max_tile_height_sb);
  672|  77.9k|            const int tile_h = (tile_height_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_height_sb) : 1;
  ------------------
  |  Branch (672:32): [True: 52.7k, False: 25.1k]
  ------------------
  673|  77.9k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  674|  77.9k|            sby += tile_h;
  675|  77.9k|        }
  676|  26.3k|        hdr->tiling.log2_rows = tile_log2(1, hdr->tiling.rows);
  677|  26.3k|    }
  678|  65.2k|    hdr->tiling.col_start_sb[hdr->tiling.cols] = sbw;
  679|  65.2k|    hdr->tiling.row_start_sb[hdr->tiling.rows] = sbh;
  680|  65.2k|    if (hdr->tiling.log2_cols || hdr->tiling.log2_rows) {
  ------------------
  |  Branch (680:9): [True: 6.52k, False: 58.7k]
  |  Branch (680:34): [True: 1.92k, False: 56.8k]
  ------------------
  681|  8.45k|        hdr->tiling.update = dav1d_get_bits(gb, hdr->tiling.log2_cols + hdr->tiling.log2_rows);
  682|  8.45k|        if (hdr->tiling.update >= hdr->tiling.cols * hdr->tiling.rows)
  ------------------
  |  Branch (682:13): [True: 493, False: 7.95k]
  ------------------
  683|    493|            goto error;
  684|  7.95k|        hdr->tiling.n_bytes = dav1d_get_bits(gb, 2) + 1;
  685|  7.95k|    }
  686|       |#if DEBUG_FRAME_HDR
  687|       |    printf("HDR: post-tiling: off=%td\n",
  688|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  689|       |#endif
  690|       |
  691|       |    // quant data
  692|  64.7k|    hdr->quant.yac = dav1d_get_bits(gb, 8);
  693|  64.7k|    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (693:9): [True: 8.02k, False: 56.7k]
  ------------------
  694|  8.02k|        hdr->quant.ydc_delta = dav1d_get_sbits(gb, 7);
  695|  64.7k|    if (!seqhdr->monochrome) {
  ------------------
  |  Branch (695:9): [True: 39.0k, False: 25.6k]
  ------------------
  696|       |        // If the sequence header says that delta_q might be different
  697|       |        // for U, V, we must check whether it actually is for this
  698|       |        // frame.
  699|  39.0k|        const int diff_uv_delta = seqhdr->separate_uv_delta_q ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (699:35): [True: 10.0k, False: 29.0k]
  ------------------
  700|  39.0k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (700:13): [True: 4.22k, False: 34.8k]
  ------------------
  701|  4.22k|            hdr->quant.udc_delta = dav1d_get_sbits(gb, 7);
  702|  39.0k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (702:13): [True: 3.76k, False: 35.3k]
  ------------------
  703|  3.76k|            hdr->quant.uac_delta = dav1d_get_sbits(gb, 7);
  704|  39.0k|        if (diff_uv_delta) {
  ------------------
  |  Branch (704:13): [True: 3.31k, False: 35.7k]
  ------------------
  705|  3.31k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (705:17): [True: 2.06k, False: 1.25k]
  ------------------
  706|  2.06k|                hdr->quant.vdc_delta = dav1d_get_sbits(gb, 7);
  707|  3.31k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (707:17): [True: 1.24k, False: 2.07k]
  ------------------
  708|  1.24k|                hdr->quant.vac_delta = dav1d_get_sbits(gb, 7);
  709|  35.7k|        } else {
  710|  35.7k|            hdr->quant.vdc_delta = hdr->quant.udc_delta;
  711|  35.7k|            hdr->quant.vac_delta = hdr->quant.uac_delta;
  712|  35.7k|        }
  713|  39.0k|    }
  714|       |#if DEBUG_FRAME_HDR
  715|       |    printf("HDR: post-quant: off=%td\n",
  716|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  717|       |#endif
  718|  64.7k|    hdr->quant.qm = dav1d_get_bit(gb);
  719|  64.7k|    if (hdr->quant.qm) {
  ------------------
  |  Branch (719:9): [True: 8.38k, False: 56.4k]
  ------------------
  720|  8.38k|        hdr->quant.qm_y = dav1d_get_bits(gb, 4);
  721|  8.38k|        hdr->quant.qm_u = dav1d_get_bits(gb, 4);
  722|  8.38k|        hdr->quant.qm_v = seqhdr->separate_uv_delta_q ? dav1d_get_bits(gb, 4) :
  ------------------
  |  Branch (722:27): [True: 1.62k, False: 6.76k]
  ------------------
  723|  8.38k|                                                        hdr->quant.qm_u;
  724|  8.38k|    }
  725|       |#if DEBUG_FRAME_HDR
  726|       |    printf("HDR: post-qm: off=%td\n",
  727|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  728|       |#endif
  729|       |
  730|       |    // segmentation data
  731|  64.7k|    hdr->segmentation.enabled = dav1d_get_bit(gb);
  732|  64.7k|    if (hdr->segmentation.enabled) {
  ------------------
  |  Branch (732:9): [True: 10.9k, False: 53.8k]
  ------------------
  733|  10.9k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  10.9k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (733:13): [True: 2.96k, False: 7.93k]
  ------------------
  734|  2.96k|            hdr->segmentation.update_map = 1;
  735|  2.96k|            hdr->segmentation.update_data = 1;
  736|  7.93k|        } else {
  737|  7.93k|            hdr->segmentation.update_map = dav1d_get_bit(gb);
  738|  7.93k|            if (hdr->segmentation.update_map)
  ------------------
  |  Branch (738:17): [True: 1.19k, False: 6.74k]
  ------------------
  739|  1.19k|                hdr->segmentation.temporal = dav1d_get_bit(gb);
  740|  7.93k|            hdr->segmentation.update_data = dav1d_get_bit(gb);
  741|  7.93k|        }
  742|       |
  743|  10.9k|        if (hdr->segmentation.update_data) {
  ------------------
  |  Branch (743:13): [True: 3.86k, False: 7.03k]
  ------------------
  744|  3.86k|            hdr->segmentation.seg_data.last_active_segid = -1;
  745|  34.7k|            for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|  34.7k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (745:29): [True: 30.9k, False: 3.86k]
  ------------------
  746|  30.9k|                Dav1dSegmentationData *const seg =
  747|  30.9k|                    &hdr->segmentation.seg_data.d[i];
  748|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (748:21): [True: 5.43k, False: 25.4k]
  ------------------
  749|  5.43k|                    seg->delta_q = dav1d_get_sbits(gb, 9);
  750|  5.43k|                    hdr->segmentation.seg_data.last_active_segid = i;
  751|  5.43k|                }
  752|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (752:21): [True: 3.41k, False: 27.5k]
  ------------------
  753|  3.41k|                    seg->delta_lf_y_v = dav1d_get_sbits(gb, 7);
  754|  3.41k|                    hdr->segmentation.seg_data.last_active_segid = i;
  755|  3.41k|                }
  756|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (756:21): [True: 4.53k, False: 26.3k]
  ------------------
  757|  4.53k|                    seg->delta_lf_y_h = dav1d_get_sbits(gb, 7);
  758|  4.53k|                    hdr->segmentation.seg_data.last_active_segid = i;
  759|  4.53k|                }
  760|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (760:21): [True: 4.10k, False: 26.8k]
  ------------------
  761|  4.10k|                    seg->delta_lf_u = dav1d_get_sbits(gb, 7);
  762|  4.10k|                    hdr->segmentation.seg_data.last_active_segid = i;
  763|  4.10k|                }
  764|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (764:21): [True: 3.15k, False: 27.7k]
  ------------------
  765|  3.15k|                    seg->delta_lf_v = dav1d_get_sbits(gb, 7);
  766|  3.15k|                    hdr->segmentation.seg_data.last_active_segid = i;
  767|  3.15k|                }
  768|  30.9k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (768:21): [True: 3.35k, False: 27.5k]
  ------------------
  769|  3.35k|                    seg->ref = dav1d_get_bits(gb, 3);
  770|  3.35k|                    hdr->segmentation.seg_data.last_active_segid = i;
  771|  3.35k|                    hdr->segmentation.seg_data.preskip = 1;
  772|  27.5k|                } else {
  773|  27.5k|                    seg->ref = -1;
  774|  27.5k|                }
  775|  30.9k|                if ((seg->skip = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (775:21): [True: 4.35k, False: 26.5k]
  ------------------
  776|  4.35k|                    hdr->segmentation.seg_data.last_active_segid = i;
  777|  4.35k|                    hdr->segmentation.seg_data.preskip = 1;
  778|  4.35k|                }
  779|  30.9k|                if ((seg->globalmv = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (779:21): [True: 3.85k, False: 27.0k]
  ------------------
  780|  3.85k|                    hdr->segmentation.seg_data.last_active_segid = i;
  781|  3.85k|                    hdr->segmentation.seg_data.preskip = 1;
  782|  3.85k|                }
  783|  30.9k|            }
  784|  7.03k|        } else {
  785|       |            // segmentation.update_data was false so we should copy
  786|       |            // segmentation data from the reference frame.
  787|  7.03k|            assert(hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (787:13): [True: 7.03k, False: 0]
  ------------------
  788|  7.03k|            const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
  789|  7.03k|            if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (789:17): [True: 236, False: 6.80k]
  ------------------
  790|  6.80k|            hdr->segmentation.seg_data =
  791|  6.80k|                c->refs[pri_ref].p.p.frame_hdr->segmentation.seg_data;
  792|  6.80k|        }
  793|  53.8k|    } else {
  794|   484k|        for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++)
  ------------------
  |  |   43|   484k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (794:25): [True: 431k, False: 53.8k]
  ------------------
  795|   431k|            hdr->segmentation.seg_data.d[i].ref = -1;
  796|  53.8k|    }
  797|       |#if DEBUG_FRAME_HDR
  798|       |    printf("HDR: post-segmentation: off=%td\n",
  799|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  800|       |#endif
  801|       |
  802|       |    // delta q
  803|  64.5k|    if (hdr->quant.yac) {
  ------------------
  |  Branch (803:9): [True: 47.5k, False: 16.9k]
  ------------------
  804|  47.5k|        hdr->delta.q.present = dav1d_get_bit(gb);
  805|  47.5k|        if (hdr->delta.q.present) {
  ------------------
  |  Branch (805:13): [True: 7.33k, False: 40.2k]
  ------------------
  806|  7.33k|            hdr->delta.q.res_log2 = dav1d_get_bits(gb, 2);
  807|  7.33k|            if (!hdr->allow_intrabc) {
  ------------------
  |  Branch (807:17): [True: 4.81k, False: 2.51k]
  ------------------
  808|  4.81k|                hdr->delta.lf.present = dav1d_get_bit(gb);
  809|  4.81k|                if (hdr->delta.lf.present) {
  ------------------
  |  Branch (809:21): [True: 1.87k, False: 2.94k]
  ------------------
  810|  1.87k|                    hdr->delta.lf.res_log2 = dav1d_get_bits(gb, 2);
  811|  1.87k|                    hdr->delta.lf.multi = dav1d_get_bit(gb);
  812|  1.87k|                }
  813|  4.81k|            }
  814|  7.33k|        }
  815|  47.5k|    }
  816|       |#if DEBUG_FRAME_HDR
  817|       |    printf("HDR: post-delta_q_lf_flags: off=%td\n",
  818|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  819|       |#endif
  820|       |
  821|       |    // derive lossless flags
  822|  64.5k|    const int delta_lossless = !hdr->quant.ydc_delta && !hdr->quant.udc_delta &&
  ------------------
  |  Branch (822:32): [True: 57.1k, False: 7.36k]
  |  Branch (822:57): [True: 55.7k, False: 1.48k]
  ------------------
  823|  55.7k|        !hdr->quant.uac_delta && !hdr->quant.vdc_delta && !hdr->quant.vac_delta;
  ------------------
  |  Branch (823:9): [True: 54.8k, False: 899]
  |  Branch (823:34): [True: 54.1k, False: 653]
  |  Branch (823:59): [True: 54.0k, False: 60]
  ------------------
  824|  64.5k|    hdr->all_lossless = 1;
  825|   580k|    for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|   580k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (825:21): [True: 516k, False: 64.5k]
  ------------------
  826|   516k|        hdr->segmentation.qidx[i] = hdr->segmentation.enabled ?
  ------------------
  |  Branch (826:37): [True: 85.3k, False: 431k]
  ------------------
  827|  85.3k|            iclip_u8(hdr->quant.yac + hdr->segmentation.seg_data.d[i].delta_q) :
  828|   516k|            hdr->quant.yac;
  829|   516k|        hdr->segmentation.lossless[i] =
  830|   516k|            !hdr->segmentation.qidx[i] && delta_lossless;
  ------------------
  |  Branch (830:13): [True: 137k, False: 379k]
  |  Branch (830:43): [True: 134k, False: 3.16k]
  ------------------
  831|   516k|        hdr->all_lossless &= hdr->segmentation.lossless[i];
  832|   516k|    }
  833|       |
  834|       |    // loopfilter
  835|  64.5k|    if (hdr->all_lossless || hdr->allow_intrabc) {
  ------------------
  |  Branch (835:9): [True: 16.5k, False: 48.0k]
  |  Branch (835:30): [True: 19.9k, False: 28.0k]
  ------------------
  836|  36.4k|        hdr->loopfilter.mode_ref_delta_enabled = 1;
  837|  36.4k|        hdr->loopfilter.mode_ref_delta_update = 1;
  838|  36.4k|        hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  839|  36.4k|    } else {
  840|  28.0k|        hdr->loopfilter.level_y[0] = dav1d_get_bits(gb, 6);
  841|  28.0k|        hdr->loopfilter.level_y[1] = dav1d_get_bits(gb, 6);
  842|  28.0k|        if (!seqhdr->monochrome &&
  ------------------
  |  Branch (842:13): [True: 10.5k, False: 17.5k]
  ------------------
  843|  10.5k|            (hdr->loopfilter.level_y[0] || hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (843:14): [True: 3.86k, False: 6.69k]
  |  Branch (843:44): [True: 1.20k, False: 5.49k]
  ------------------
  844|  5.07k|        {
  845|  5.07k|            hdr->loopfilter.level_u = dav1d_get_bits(gb, 6);
  846|  5.07k|            hdr->loopfilter.level_v = dav1d_get_bits(gb, 6);
  847|  5.07k|        }
  848|  28.0k|        hdr->loopfilter.sharpness = dav1d_get_bits(gb, 3);
  849|       |
  850|  28.0k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  28.0k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (850:13): [True: 14.1k, False: 13.8k]
  ------------------
  851|  14.1k|            hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  852|  14.1k|        } else {
  853|  13.8k|            const int ref = hdr->refidx[hdr->primary_ref_frame];
  854|  13.8k|            if (!c->refs[ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (854:17): [True: 392, False: 13.4k]
  ------------------
  855|  13.4k|            hdr->loopfilter.mode_ref_deltas =
  856|  13.4k|                c->refs[ref].p.p.frame_hdr->loopfilter.mode_ref_deltas;
  857|  13.4k|        }
  858|  27.6k|        hdr->loopfilter.mode_ref_delta_enabled = dav1d_get_bit(gb);
  859|  27.6k|        if (hdr->loopfilter.mode_ref_delta_enabled) {
  ------------------
  |  Branch (859:13): [True: 15.2k, False: 12.4k]
  ------------------
  860|  15.2k|            hdr->loopfilter.mode_ref_delta_update = dav1d_get_bit(gb);
  861|  15.2k|            if (hdr->loopfilter.mode_ref_delta_update) {
  ------------------
  |  Branch (861:17): [True: 1.59k, False: 13.6k]
  ------------------
  862|  14.3k|                for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (862:33): [True: 12.7k, False: 1.59k]
  ------------------
  863|  12.7k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (863:25): [True: 3.45k, False: 9.30k]
  ------------------
  864|  3.45k|                        hdr->loopfilter.mode_ref_deltas.ref_delta[i] =
  865|  3.45k|                            dav1d_get_sbits(gb, 7);
  866|  4.78k|                for (int i = 0; i < 2; i++)
  ------------------
  |  Branch (866:33): [True: 3.19k, False: 1.59k]
  ------------------
  867|  3.19k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (867:25): [True: 642, False: 2.54k]
  ------------------
  868|    642|                        hdr->loopfilter.mode_ref_deltas.mode_delta[i] =
  869|    642|                            dav1d_get_sbits(gb, 7);
  870|  1.59k|            }
  871|  15.2k|        }
  872|  27.6k|    }
  873|       |#if DEBUG_FRAME_HDR
  874|       |    printf("HDR: post-lpf: off=%td\n",
  875|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  876|       |#endif
  877|       |
  878|       |    // cdef
  879|  64.1k|    if (!hdr->all_lossless && seqhdr->cdef && !hdr->allow_intrabc) {
  ------------------
  |  Branch (879:9): [True: 47.6k, False: 16.5k]
  |  Branch (879:31): [True: 17.4k, False: 30.2k]
  |  Branch (879:47): [True: 7.53k, False: 9.89k]
  ------------------
  880|  7.53k|        hdr->cdef.damping = dav1d_get_bits(gb, 2) + 3;
  881|  7.53k|        hdr->cdef.n_bits = dav1d_get_bits(gb, 2);
  882|  18.3k|        for (int i = 0; i < (1 << hdr->cdef.n_bits); i++) {
  ------------------
  |  Branch (882:25): [True: 10.8k, False: 7.53k]
  ------------------
  883|  10.8k|            hdr->cdef.y_strength[i] = dav1d_get_bits(gb, 6);
  884|  10.8k|            if (!seqhdr->monochrome)
  ------------------
  |  Branch (884:17): [True: 7.81k, False: 2.98k]
  ------------------
  885|  7.81k|                hdr->cdef.uv_strength[i] = dav1d_get_bits(gb, 6);
  886|  10.8k|        }
  887|  7.53k|    }
  888|       |#if DEBUG_FRAME_HDR
  889|       |    printf("HDR: post-cdef: off=%td\n",
  890|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  891|       |#endif
  892|       |
  893|       |    // restoration
  894|  64.1k|    if ((!hdr->all_lossless || hdr->super_res.enabled) &&
  ------------------
  |  Branch (894:10): [True: 47.6k, False: 16.5k]
  |  Branch (894:32): [True: 4.45k, False: 12.0k]
  ------------------
  895|  52.0k|        seqhdr->restoration && !hdr->allow_intrabc)
  ------------------
  |  Branch (895:9): [True: 34.5k, False: 17.5k]
  |  Branch (895:32): [True: 25.3k, False: 9.15k]
  ------------------
  896|  25.3k|    {
  897|  25.3k|        hdr->restoration.type[0] = dav1d_get_bits(gb, 2);
  898|  25.3k|        if (!seqhdr->monochrome) {
  ------------------
  |  Branch (898:13): [True: 9.71k, False: 15.6k]
  ------------------
  899|  9.71k|            hdr->restoration.type[1] = dav1d_get_bits(gb, 2);
  900|  9.71k|            hdr->restoration.type[2] = dav1d_get_bits(gb, 2);
  901|  9.71k|        }
  902|       |
  903|  25.3k|        if (hdr->restoration.type[0] || hdr->restoration.type[1] ||
  ------------------
  |  Branch (903:13): [True: 18.1k, False: 7.25k]
  |  Branch (903:41): [True: 490, False: 6.76k]
  ------------------
  904|  6.76k|            hdr->restoration.type[2])
  ------------------
  |  Branch (904:13): [True: 494, False: 6.26k]
  ------------------
  905|  19.0k|        {
  906|       |            // Log2 of the restoration unit size.
  907|  19.0k|            hdr->restoration.unit_size[0] = 6 + seqhdr->sb128;
  908|  19.0k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (908:17): [True: 12.5k, False: 6.56k]
  ------------------
  909|  12.5k|                hdr->restoration.unit_size[0]++;
  910|  12.5k|                if (!seqhdr->sb128)
  ------------------
  |  Branch (910:21): [True: 2.28k, False: 10.2k]
  ------------------
  911|  2.28k|                    hdr->restoration.unit_size[0] += dav1d_get_bit(gb);
  912|  12.5k|            }
  913|  19.0k|            hdr->restoration.unit_size[1] = hdr->restoration.unit_size[0];
  914|  19.0k|            if ((hdr->restoration.type[1] || hdr->restoration.type[2]) &&
  ------------------
  |  Branch (914:18): [True: 3.12k, False: 15.9k]
  |  Branch (914:46): [True: 2.27k, False: 13.6k]
  ------------------
  915|  5.40k|                seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1)
  ------------------
  |  Branch (915:17): [True: 3.39k, False: 2.01k]
  |  Branch (915:40): [True: 1.32k, False: 2.06k]
  ------------------
  916|  1.32k|            {
  917|  1.32k|                hdr->restoration.unit_size[1] -= dav1d_get_bit(gb);
  918|  1.32k|            }
  919|  19.0k|        } else {
  920|  6.26k|            hdr->restoration.unit_size[0] = 8;
  921|  6.26k|        }
  922|  25.3k|    }
  923|       |#if DEBUG_FRAME_HDR
  924|       |    printf("HDR: post-restoration: off=%td\n",
  925|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  926|       |#endif
  927|       |
  928|  64.1k|    if (!hdr->all_lossless)
  ------------------
  |  Branch (928:9): [True: 47.6k, False: 16.5k]
  ------------------
  929|  47.6k|        hdr->txfm_mode = dav1d_get_bit(gb) ? DAV1D_TX_SWITCHABLE : DAV1D_TX_LARGEST;
  ------------------
  |  Branch (929:26): [True: 8.29k, False: 39.3k]
  ------------------
  930|       |#if DEBUG_FRAME_HDR
  931|       |    printf("HDR: post-txfmmode: off=%td\n",
  932|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  933|       |#endif
  934|  64.1k|    if (IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|  64.1k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 21.7k, False: 42.4k]
  |  |  ------------------
  ------------------
  935|  21.7k|        hdr->switchable_comp_refs = dav1d_get_bit(gb);
  936|       |#if DEBUG_FRAME_HDR
  937|       |    printf("HDR: post-refmode: off=%td\n",
  938|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  939|       |#endif
  940|  64.1k|    if (hdr->switchable_comp_refs && IS_INTER_OR_SWITCH(hdr) && seqhdr->order_hint) {
  ------------------
  |  |   36|  77.2k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 13.1k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (940:9): [True: 13.1k, False: 51.0k]
  |  Branch (940:65): [True: 11.7k, False: 1.37k]
  ------------------
  941|  11.7k|        const int poc = hdr->frame_offset;
  942|  11.7k|        int off_before = -1, off_after = -1;
  943|  11.7k|        int off_before_idx, off_after_idx;
  944|  92.1k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (944:25): [True: 80.6k, False: 11.4k]
  ------------------
  945|  80.6k|            if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (945:17): [True: 301, False: 80.3k]
  ------------------
  946|  80.3k|            const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  947|       |
  948|  80.3k|            const int diff = get_poc_diff(seqhdr->order_hint_n_bits, refpoc, poc);
  949|  80.3k|            if (diff > 0) {
  ------------------
  |  Branch (949:17): [True: 16.8k, False: 63.5k]
  ------------------
  950|  16.8k|                if (off_after < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (950:21): [True: 6.40k, False: 10.4k]
  |  Branch (950:38): [True: 658, False: 9.79k]
  ------------------
  951|  10.4k|                                                  off_after, refpoc) > 0)
  952|  7.05k|                {
  953|  7.05k|                    off_after = refpoc;
  954|  7.05k|                    off_after_idx = i;
  955|  7.05k|                }
  956|  63.5k|            } else if (diff < 0 && (off_before < 0 ||
  ------------------
  |  Branch (956:24): [True: 25.0k, False: 38.4k]
  |  Branch (956:37): [True: 5.30k, False: 19.7k]
  ------------------
  957|  19.7k|                                    get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (957:37): [True: 1.00k, False: 18.7k]
  ------------------
  958|  19.7k|                                                 refpoc, off_before) > 0))
  959|  6.31k|            {
  960|  6.31k|                off_before = refpoc;
  961|  6.31k|                off_before_idx = i;
  962|  6.31k|            }
  963|  80.3k|        }
  964|       |
  965|  11.4k|        if ((off_before | off_after) >= 0) {
  ------------------
  |  Branch (965:13): [True: 1.52k, False: 9.91k]
  ------------------
  966|  1.52k|            hdr->skip_mode_refs[0] = imin(off_before_idx, off_after_idx);
  967|  1.52k|            hdr->skip_mode_refs[1] = imax(off_before_idx, off_after_idx);
  968|  1.52k|            hdr->skip_mode_allowed = 1;
  969|  9.91k|        } else if (off_before >= 0) {
  ------------------
  |  Branch (969:20): [True: 3.72k, False: 6.18k]
  ------------------
  970|  3.72k|            int off_before2 = -1;
  971|  3.72k|            int off_before2_idx;
  972|  29.8k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (972:29): [True: 26.0k, False: 3.72k]
  ------------------
  973|  26.0k|                if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (973:21): [True: 0, False: 26.0k]
  ------------------
  974|  26.0k|                const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  975|  26.0k|                if (get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (975:21): [True: 9.15k, False: 16.9k]
  ------------------
  976|  26.0k|                                 refpoc, off_before) < 0) {
  977|  9.15k|                    if (off_before2 < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (977:25): [True: 2.01k, False: 7.13k]
  |  Branch (977:44): [True: 411, False: 6.72k]
  ------------------
  978|  7.13k|                                                        refpoc, off_before2) > 0)
  979|  2.42k|                    {
  980|  2.42k|                        off_before2 = refpoc;
  981|  2.42k|                        off_before2_idx = i;
  982|  2.42k|                    }
  983|  9.15k|                }
  984|  26.0k|            }
  985|       |
  986|  3.72k|            if (off_before2 >= 0) {
  ------------------
  |  Branch (986:17): [True: 2.01k, False: 1.71k]
  ------------------
  987|  2.01k|                hdr->skip_mode_refs[0] = imin(off_before_idx, off_before2_idx);
  988|  2.01k|                hdr->skip_mode_refs[1] = imax(off_before_idx, off_before2_idx);
  989|  2.01k|                hdr->skip_mode_allowed = 1;
  990|  2.01k|            }
  991|  3.72k|        }
  992|  11.4k|    }
  993|  63.8k|    if (hdr->skip_mode_allowed)
  ------------------
  |  Branch (993:9): [True: 3.54k, False: 60.3k]
  ------------------
  994|  3.54k|        hdr->skip_mode_enabled = dav1d_get_bit(gb);
  995|       |#if DEBUG_FRAME_HDR
  996|       |    printf("HDR: post-extskip: off=%td\n",
  997|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  998|       |#endif
  999|  63.8k|    if (!hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) && seqhdr->warped_motion)
  ------------------
  |  |   36|  85.2k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 19.1k, False: 2.26k]
  |  |  ------------------
  ------------------
  |  Branch (999:9): [True: 21.3k, False: 42.4k]
  |  Branch (999:66): [True: 14.0k, False: 5.11k]
  ------------------
 1000|  14.0k|        hdr->warp_motion = dav1d_get_bit(gb);
 1001|       |#if DEBUG_FRAME_HDR
 1002|       |    printf("HDR: post-warpmotionbit: off=%td\n",
 1003|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1004|       |#endif
 1005|  63.8k|    hdr->reduced_txtp_set = dav1d_get_bit(gb);
 1006|       |#if DEBUG_FRAME_HDR
 1007|       |    printf("HDR: post-reducedtxtpset: off=%td\n",
 1008|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1009|       |#endif
 1010|       |
 1011|   510k|    for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (1011:21): [True: 447k, False: 63.8k]
  ------------------
 1012|   447k|        hdr->gmv[i] = dav1d_default_wm_params;
 1013|       |
 1014|  63.8k|    if (IS_INTER_OR_SWITCH(hdr)) {
  ------------------
  |  |   36|  63.8k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 21.4k, False: 42.4k]
  |  |  ------------------
  ------------------
 1015|   170k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (1015:25): [True: 149k, False: 21.2k]
  ------------------
 1016|   149k|            hdr->gmv[i].type = !dav1d_get_bit(gb) ? DAV1D_WM_TYPE_IDENTITY :
  ------------------
  |  Branch (1016:32): [True: 139k, False: 10.0k]
  ------------------
 1017|   149k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_ROT_ZOOM :
  ------------------
  |  Branch (1017:33): [True: 5.91k, False: 4.09k]
  ------------------
 1018|  10.0k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_TRANSLATION :
  ------------------
  |  Branch (1018:33): [True: 1.56k, False: 2.52k]
  ------------------
 1019|  4.09k|                                                    DAV1D_WM_TYPE_AFFINE;
 1020|       |
 1021|   149k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_IDENTITY) continue;
  ------------------
  |  Branch (1021:17): [True: 139k, False: 10.0k]
  ------------------
 1022|       |
 1023|  10.0k|            const Dav1dWarpedMotionParams *ref_gmv;
 1024|  10.0k|            if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  10.0k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (1024:17): [True: 882, False: 9.12k]
  ------------------
 1025|    882|                ref_gmv = &dav1d_default_wm_params;
 1026|  9.12k|            } else {
 1027|  9.12k|                const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
 1028|  9.12k|                if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1028:21): [True: 201, False: 8.92k]
  ------------------
 1029|  8.92k|                ref_gmv = &c->refs[pri_ref].p.p.frame_hdr->gmv[i];
 1030|  8.92k|            }
 1031|  9.80k|            int32_t *const mat = hdr->gmv[i].matrix;
 1032|  9.80k|            const int32_t *const ref_mat = ref_gmv->matrix;
 1033|  9.80k|            int bits, shift;
 1034|       |
 1035|  9.80k|            if (hdr->gmv[i].type >= DAV1D_WM_TYPE_ROT_ZOOM) {
  ------------------
  |  Branch (1035:17): [True: 8.24k, False: 1.56k]
  ------------------
 1036|  8.24k|                mat[2] = (1 << 16) + 2 *
 1037|  8.24k|                    dav1d_get_bits_subexp(gb, (ref_mat[2] - (1 << 16)) >> 1, 12);
 1038|  8.24k|                mat[3] = 2 * dav1d_get_bits_subexp(gb, ref_mat[3] >> 1, 12);
 1039|       |
 1040|  8.24k|                bits = 12;
 1041|  8.24k|                shift = 10;
 1042|  8.24k|            } else {
 1043|  1.56k|                bits = 9 - !hdr->hp;
 1044|  1.56k|                shift = 13 + !hdr->hp;
 1045|  1.56k|            }
 1046|       |
 1047|  9.80k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1047:17): [True: 2.34k, False: 7.45k]
  ------------------
 1048|  2.34k|                mat[4] = 2 * dav1d_get_bits_subexp(gb, ref_mat[4] >> 1, 12);
 1049|  2.34k|                mat[5] = (1 << 16) + 2 *
 1050|  2.34k|                    dav1d_get_bits_subexp(gb, (ref_mat[5] - (1 << 16)) >> 1, 12);
 1051|  7.45k|            } else {
 1052|  7.45k|                mat[4] = -mat[3];
 1053|  7.45k|                mat[5] = mat[2];
 1054|  7.45k|            }
 1055|       |
 1056|  9.80k|            mat[0] = dav1d_get_bits_subexp(gb, ref_mat[0] >> shift, bits) * (1 << shift);
 1057|  9.80k|            mat[1] = dav1d_get_bits_subexp(gb, ref_mat[1] >> shift, bits) * (1 << shift);
 1058|  9.80k|        }
 1059|  21.4k|    }
 1060|       |#if DEBUG_FRAME_HDR
 1061|       |    printf("HDR: post-gmv: off=%td\n",
 1062|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1063|       |#endif
 1064|       |
 1065|  63.6k|    if (seqhdr->film_grain_present && (hdr->show_frame || hdr->showable_frame)) {
  ------------------
  |  Branch (1065:9): [True: 19.2k, False: 44.3k]
  |  Branch (1065:40): [True: 15.7k, False: 3.58k]
  |  Branch (1065:59): [True: 2.57k, False: 1.01k]
  ------------------
 1066|  18.2k|        hdr->film_grain.present = dav1d_get_bit(gb);
 1067|  18.2k|        if (hdr->film_grain.present) {
  ------------------
  |  Branch (1067:13): [True: 6.47k, False: 11.8k]
  ------------------
 1068|  6.47k|            const unsigned seed = dav1d_get_bits(gb, 16);
 1069|  6.47k|            hdr->film_grain.update = hdr->frame_type != DAV1D_FRAME_TYPE_INTER || dav1d_get_bit(gb);
  ------------------
  |  Branch (1069:38): [True: 4.66k, False: 1.81k]
  |  Branch (1069:83): [True: 122, False: 1.68k]
  ------------------
 1070|  6.47k|            if (!hdr->film_grain.update) {
  ------------------
  |  Branch (1070:17): [True: 1.68k, False: 4.78k]
  ------------------
 1071|  1.68k|                const int refidx = dav1d_get_bits(gb, 3);
 1072|  1.68k|                int i;
 1073|  6.71k|                for (i = 0; i < 7; i++)
  ------------------
  |  Branch (1073:29): [True: 6.44k, False: 267]
  ------------------
 1074|  6.44k|                    if (hdr->refidx[i] == refidx)
  ------------------
  |  Branch (1074:25): [True: 1.42k, False: 5.02k]
  ------------------
 1075|  1.42k|                        break;
 1076|  1.68k|                if (i == 7 || !c->refs[refidx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1076:21): [True: 267, False: 1.42k]
  |  Branch (1076:31): [True: 206, False: 1.21k]
  ------------------
 1077|  1.21k|                hdr->film_grain.data = c->refs[refidx].p.p.frame_hdr->film_grain.data;
 1078|  1.21k|                hdr->film_grain.data.seed = seed;
 1079|  4.78k|            } else {
 1080|  4.78k|                Dav1dFilmGrainData *const fgd = &hdr->film_grain.data;
 1081|  4.78k|                fgd->seed = seed;
 1082|       |
 1083|  4.78k|                fgd->num_y_points = dav1d_get_bits(gb, 4);
 1084|  4.78k|                if (fgd->num_y_points > 14) goto error;
  ------------------
  |  Branch (1084:21): [True: 234, False: 4.55k]
  ------------------
 1085|  7.62k|                for (int i = 0; i < fgd->num_y_points; i++) {
  ------------------
  |  Branch (1085:33): [True: 3.50k, False: 4.12k]
  ------------------
 1086|  3.50k|                    fgd->y_points[i][0] = dav1d_get_bits(gb, 8);
 1087|  3.50k|                    if (i && fgd->y_points[i - 1][0] >= fgd->y_points[i][0])
  ------------------
  |  Branch (1087:25): [True: 1.10k, False: 2.39k]
  |  Branch (1087:30): [True: 424, False: 679]
  ------------------
 1088|    424|                        goto error;
 1089|  3.07k|                    fgd->y_points[i][1] = dav1d_get_bits(gb, 8);
 1090|  3.07k|                }
 1091|       |
 1092|  4.12k|                if (!seqhdr->monochrome)
  ------------------
  |  Branch (1092:21): [True: 3.35k, False: 771]
  ------------------
 1093|  3.35k|                    fgd->chroma_scaling_from_luma = dav1d_get_bit(gb);
 1094|  4.12k|                if (seqhdr->monochrome || fgd->chroma_scaling_from_luma ||
  ------------------
  |  Branch (1094:21): [True: 771, False: 3.35k]
  |  Branch (1094:43): [True: 1.10k, False: 2.25k]
  ------------------
 1095|  2.25k|                    (seqhdr->ss_ver == 1 && seqhdr->ss_hor == 1 && !fgd->num_y_points))
  ------------------
  |  Branch (1095:22): [True: 655, False: 1.60k]
  |  Branch (1095:45): [True: 655, False: 0]
  |  Branch (1095:68): [True: 89, False: 566]
  ------------------
 1096|  1.96k|                {
 1097|  1.96k|                    fgd->num_uv_points[0] = fgd->num_uv_points[1] = 0;
 1098|  5.42k|                } else for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1098:41): [True: 3.99k, False: 1.42k]
  ------------------
 1099|  3.99k|                    fgd->num_uv_points[pl] = dav1d_get_bits(gb, 4);
 1100|  3.99k|                    if (fgd->num_uv_points[pl] > 10) goto error;
  ------------------
  |  Branch (1100:25): [True: 250, False: 3.74k]
  ------------------
 1101|  6.46k|                    for (int i = 0; i < fgd->num_uv_points[pl]; i++) {
  ------------------
  |  Branch (1101:37): [True: 3.21k, False: 3.25k]
  ------------------
 1102|  3.21k|                        fgd->uv_points[pl][i][0] = dav1d_get_bits(gb, 8);
 1103|  3.21k|                        if (i && fgd->uv_points[pl][i - 1][0] >= fgd->uv_points[pl][i][0])
  ------------------
  |  Branch (1103:29): [True: 1.68k, False: 1.52k]
  |  Branch (1103:34): [True: 490, False: 1.19k]
  ------------------
 1104|    490|                            goto error;
 1105|  2.72k|                        fgd->uv_points[pl][i][1] = dav1d_get_bits(gb, 8);
 1106|  2.72k|                    }
 1107|  3.74k|                }
 1108|       |
 1109|  3.38k|                if (seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1 &&
  ------------------
  |  Branch (1109:21): [True: 2.85k, False: 532]
  |  Branch (1109:44): [True: 1.37k, False: 1.48k]
  ------------------
 1110|  1.37k|                    !!fgd->num_uv_points[0] != !!fgd->num_uv_points[1])
  ------------------
  |  Branch (1110:21): [True: 195, False: 1.17k]
  ------------------
 1111|    195|                {
 1112|    195|                    goto error;
 1113|    195|                }
 1114|       |
 1115|  3.19k|                fgd->scaling_shift = dav1d_get_bits(gb, 2) + 8;
 1116|  3.19k|                fgd->ar_coeff_lag = dav1d_get_bits(gb, 2);
 1117|  3.19k|                const int num_y_pos = 2 * fgd->ar_coeff_lag * (fgd->ar_coeff_lag + 1);
 1118|  3.19k|                if (fgd->num_y_points)
  ------------------
  |  Branch (1118:21): [True: 1.69k, False: 1.50k]
  ------------------
 1119|  12.4k|                    for (int i = 0; i < num_y_pos; i++)
  ------------------
  |  Branch (1119:37): [True: 10.7k, False: 1.69k]
  ------------------
 1120|  10.7k|                        fgd->ar_coeffs_y[i] = dav1d_get_bits(gb, 8) - 128;
 1121|  9.57k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1121:34): [True: 6.38k, False: 3.19k]
  ------------------
 1122|  6.38k|                    if (fgd->num_uv_points[pl] || fgd->chroma_scaling_from_luma) {
  ------------------
  |  Branch (1122:25): [True: 765, False: 5.61k]
  |  Branch (1122:51): [True: 2.20k, False: 3.41k]
  ------------------
 1123|  2.96k|                        const int num_uv_pos = num_y_pos + !!fgd->num_y_points;
 1124|  32.0k|                        for (int i = 0; i < num_uv_pos; i++)
  ------------------
  |  Branch (1124:41): [True: 29.0k, False: 2.96k]
  ------------------
 1125|  29.0k|                            fgd->ar_coeffs_uv[pl][i] = dav1d_get_bits(gb, 8) - 128;
 1126|  2.96k|                        if (!fgd->num_y_points)
  ------------------
  |  Branch (1126:29): [True: 1.45k, False: 1.50k]
  ------------------
 1127|  1.45k|                            fgd->ar_coeffs_uv[pl][num_uv_pos] = 0;
 1128|  2.96k|                    }
 1129|  3.19k|                fgd->ar_coeff_shift = dav1d_get_bits(gb, 2) + 6;
 1130|  3.19k|                fgd->grain_scale_shift = dav1d_get_bits(gb, 2);
 1131|  9.57k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1131:34): [True: 6.38k, False: 3.19k]
  ------------------
 1132|  6.38k|                    if (fgd->num_uv_points[pl]) {
  ------------------
  |  Branch (1132:25): [True: 765, False: 5.61k]
  ------------------
 1133|    765|                        fgd->uv_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1134|    765|                        fgd->uv_luma_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1135|    765|                        fgd->uv_offset[pl] = dav1d_get_bits(gb, 9) - 256;
 1136|    765|                    }
 1137|  3.19k|                fgd->overlap_flag = dav1d_get_bit(gb);
 1138|  3.19k|                fgd->clip_to_restricted_range = dav1d_get_bit(gb);
 1139|  3.19k|            }
 1140|  6.47k|        }
 1141|  18.2k|    }
 1142|       |#if DEBUG_FRAME_HDR
 1143|       |    printf("HDR: post-filmgrain: off=%td\n",
 1144|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1145|       |#endif
 1146|       |
 1147|  61.5k|    return 0;
 1148|       |
 1149|  5.41k|error:
 1150|  5.41k|    dav1d_log(c, "Error parsing frame header\n");
  ------------------
  |  |   44|  5.41k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 5.41k]
  |  |  ------------------
  ------------------
 1151|  5.41k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  5.41k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1152|  63.6k|}
obu.c:read_frame_size:
  343|  65.4k|{
  344|  65.4k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  345|  65.4k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  346|       |
  347|  65.4k|    if (use_ref) {
  ------------------
  |  Branch (347:9): [True: 12.0k, False: 53.4k]
  ------------------
  348|  26.9k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (348:25): [True: 25.8k, False: 1.15k]
  ------------------
  349|  25.8k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (349:17): [True: 10.9k, False: 14.9k]
  ------------------
  350|  10.9k|                const Dav1dThreadPicture *const ref =
  351|  10.9k|                    &c->refs[c->frame_hdr->refidx[i]].p;
  352|  10.9k|                if (!ref->p.frame_hdr) return -1;
  ------------------
  |  Branch (352:21): [True: 202, False: 10.7k]
  ------------------
  353|  10.7k|                hdr->width[1] = ref->p.frame_hdr->width[1];
  354|  10.7k|                hdr->height = ref->p.frame_hdr->height;
  355|  10.7k|                hdr->render_width = ref->p.frame_hdr->render_width;
  356|  10.7k|                hdr->render_height = ref->p.frame_hdr->render_height;
  357|  10.7k|                hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (357:42): [True: 1.45k, False: 9.25k]
  |  Branch (357:63): [True: 964, False: 487]
  ------------------
  358|  10.7k|                if (hdr->super_res.enabled) {
  ------------------
  |  Branch (358:21): [True: 964, False: 9.73k]
  ------------------
  359|    964|                    const int d = hdr->super_res.width_scale_denominator =
  360|    964|                        9 + dav1d_get_bits(gb, 3);
  361|    964|                    hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,
  362|    964|                                         imin(16, hdr->width[1]));
  363|  9.73k|                } else {
  364|  9.73k|                    hdr->super_res.width_scale_denominator = 8;
  365|  9.73k|                    hdr->width[0] = hdr->width[1];
  366|  9.73k|                }
  367|  10.7k|                return 0;
  368|  10.9k|            }
  369|  25.8k|        }
  370|  12.0k|    }
  371|       |
  372|  54.5k|    if (hdr->frame_size_override) {
  ------------------
  |  Branch (372:9): [True: 8.05k, False: 46.5k]
  ------------------
  373|  8.05k|        hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
  374|  8.05k|        hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;
  375|  46.5k|    } else {
  376|  46.5k|        hdr->width[1] = seqhdr->max_width;
  377|  46.5k|        hdr->height = seqhdr->max_height;
  378|  46.5k|    }
  379|  54.5k|    hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (379:30): [True: 29.9k, False: 24.6k]
  |  Branch (379:51): [True: 7.19k, False: 22.7k]
  ------------------
  380|  54.5k|    if (hdr->super_res.enabled) {
  ------------------
  |  Branch (380:9): [True: 7.19k, False: 47.3k]
  ------------------
  381|  7.19k|        const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);
  382|  7.19k|        hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));
  383|  47.3k|    } else {
  384|  47.3k|        hdr->super_res.width_scale_denominator = 8;
  385|  47.3k|        hdr->width[0] = hdr->width[1];
  386|  47.3k|    }
  387|  54.5k|    hdr->have_render_size = dav1d_get_bit(gb);
  388|  54.5k|    if (hdr->have_render_size) {
  ------------------
  |  Branch (388:9): [True: 2.14k, False: 52.4k]
  ------------------
  389|  2.14k|        hdr->render_width = dav1d_get_bits(gb, 16) + 1;
  390|  2.14k|        hdr->render_height = dav1d_get_bits(gb, 16) + 1;
  391|  52.4k|    } else {
  392|  52.4k|        hdr->render_width = hdr->width[1];
  393|  52.4k|        hdr->render_height = hdr->height;
  394|  52.4k|    }
  395|  54.5k|    return 0;
  396|  65.4k|}
obu.c:tile_log2:
  398|   313k|static inline int tile_log2(const int sz, const int tgt) {
  399|   313k|    int k;
  400|   454k|    for (k = 0; (sz << k) < tgt; k++) ;
  ------------------
  |  Branch (400:17): [True: 140k, False: 313k]
  ------------------
  401|   313k|    return k;
  402|   313k|}
obu.c:check_trailing_bits:
   50|  46.5k|{
   51|  46.5k|    const int trailing_one_bit = dav1d_get_bit(gb);
   52|       |
   53|  46.5k|    if (gb->error)
  ------------------
  |  Branch (53:9): [True: 8.75k, False: 37.8k]
  ------------------
   54|  8.75k|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  8.75k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   55|       |
   56|  37.8k|    if (!strict_std_compliance)
  ------------------
  |  Branch (56:9): [True: 37.8k, False: 0]
  ------------------
   57|  37.8k|        return 0;
   58|       |
   59|      0|    if (!trailing_one_bit || gb->state)
  ------------------
  |  Branch (59:9): [True: 0, False: 0]
  |  Branch (59:30): [True: 0, False: 0]
  ------------------
   60|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   61|       |
   62|      0|    ptrdiff_t size = gb->ptr_end - gb->ptr;
   63|      0|    while (size > 0 && gb->ptr[size - 1] == 0)
  ------------------
  |  Branch (63:12): [True: 0, False: 0]
  |  Branch (63:24): [True: 0, False: 0]
  ------------------
   64|      0|        size--;
   65|       |
   66|      0|    if (size)
  ------------------
  |  Branch (66:9): [True: 0, False: 0]
  ------------------
   67|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   68|       |
   69|      0|    return 0;
   70|      0|}
obu.c:parse_tile_hdr:
 1154|  53.8k|static void parse_tile_hdr(Dav1dContext *const c, GetBits *const gb) {
 1155|  53.8k|    const int n_tiles = c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows;
 1156|  53.8k|    const int have_tile_pos = n_tiles > 1 ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (1156:31): [True: 6.35k, False: 47.4k]
  ------------------
 1157|       |
 1158|  53.8k|    if (have_tile_pos) {
  ------------------
  |  Branch (1158:9): [True: 2.44k, False: 51.3k]
  ------------------
 1159|  2.44k|        const int n_bits = c->frame_hdr->tiling.log2_cols +
 1160|  2.44k|                           c->frame_hdr->tiling.log2_rows;
 1161|  2.44k|        c->tile[c->n_tile_data].start = dav1d_get_bits(gb, n_bits);
 1162|  2.44k|        c->tile[c->n_tile_data].end = dav1d_get_bits(gb, n_bits);
 1163|  51.3k|    } else {
 1164|  51.3k|        c->tile[c->n_tile_data].start = 0;
 1165|  51.3k|        c->tile[c->n_tile_data].end = n_tiles - 1;
 1166|  51.3k|    }
 1167|  53.8k|}

dav1d_pal_dsp_init:
   71|  9.69k|COLD void dav1d_pal_dsp_init(Dav1dPalDSPContext *const c) {
   72|  9.69k|    c->pal_idx_finish = pal_idx_finish_c;
   73|       |
   74|  9.69k|#if HAVE_ASM
   75|       |#if ARCH_RISCV
   76|       |    pal_dsp_init_riscv(c);
   77|       |#elif ARCH_X86
   78|       |    pal_dsp_init_x86(c);
   79|  9.69k|#endif
   80|  9.69k|#endif
   81|  9.69k|}

dav1d_default_picture_alloc:
   46|  49.8k|int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) {
   47|  49.8k|    const int hbd = p->p.bpc > 8;
   48|  49.8k|    const int aligned_w = (p->p.w + 127) & ~127;
   49|  49.8k|    const int aligned_h = (p->p.h + 127) & ~127;
   50|  49.8k|    const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
   51|  49.8k|    const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
   52|  49.8k|    const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
   53|  49.8k|    ptrdiff_t y_stride = aligned_w << hbd;
   54|  49.8k|    ptrdiff_t uv_stride = has_chroma ? y_stride >> ss_hor : 0;
  ------------------
  |  Branch (54:27): [True: 27.4k, False: 22.4k]
  ------------------
   55|       |    /* Due to how mapping of addresses to sets works in most L1 and L2 cache
   56|       |     * implementations, strides of multiples of certain power-of-two numbers
   57|       |     * may cause multiple rows of the same superblock to map to the same set,
   58|       |     * causing evictions of previous rows resulting in a reduction in cache
   59|       |     * hit rate. Avoid that by slightly padding the stride when necessary. */
   60|  49.8k|    if (!(y_stride & 1023))
  ------------------
  |  Branch (60:9): [True: 4.65k, False: 45.1k]
  ------------------
   61|  4.65k|        y_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  4.65k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   62|  49.8k|    if (!(uv_stride & 1023) && has_chroma)
  ------------------
  |  Branch (62:9): [True: 25.5k, False: 24.3k]
  |  Branch (62:32): [True: 3.05k, False: 22.4k]
  ------------------
   63|  3.05k|        uv_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  3.05k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   64|  49.8k|    p->stride[0] = y_stride;
   65|  49.8k|    p->stride[1] = uv_stride;
   66|  49.8k|    const size_t y_sz = y_stride * aligned_h;
   67|  49.8k|    const size_t uv_sz = uv_stride * (aligned_h >> ss_ver);
   68|  49.8k|    const size_t pic_size = y_sz + 2 * uv_sz;
   69|       |
   70|  49.8k|    uint8_t *const buf = dav1d_mem_pool_pop(cookie, pic_size + DAV1D_PICTURE_ALIGNMENT);
  ------------------
  |  |   44|  49.8k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   71|  49.8k|    if (!buf) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (71:9): [True: 0, False: 49.8k]
  ------------------
   72|  49.8k|    p->allocator_data = buf;
   73|  49.8k|    p->data[0] = buf;
   74|  49.8k|    p->data[1] = has_chroma ? buf + y_sz : NULL;
  ------------------
  |  Branch (74:18): [True: 27.4k, False: 22.4k]
  ------------------
   75|  49.8k|    p->data[2] = has_chroma ? buf + y_sz + uv_sz : NULL;
  ------------------
  |  Branch (75:18): [True: 27.4k, False: 22.4k]
  ------------------
   76|       |
   77|  49.8k|    return 0;
   78|  49.8k|}
dav1d_default_picture_release:
   80|  49.8k|void dav1d_default_picture_release(Dav1dPicture *const p, void *const cookie) {
   81|  49.8k|    dav1d_mem_pool_push(cookie, p->allocator_data);
   82|  49.8k|}
dav1d_picture_free_itut_t35:
   99|    575|void dav1d_picture_free_itut_t35(const uint8_t *const data, void *const user_data) {
  100|    575|    struct itut_t35_ctx_context *itut_t35_ctx = user_data;
  101|       |
  102|  1.69k|    for (size_t i = 0; i < itut_t35_ctx->n_itut_t35; i++)
  ------------------
  |  Branch (102:24): [True: 1.12k, False: 575]
  ------------------
  103|  1.12k|        dav1d_free(itut_t35_ctx->itut_t35[i].payload);
  ------------------
  |  |  135|  1.12k|#define dav1d_free(ptr) free(ptr)
  ------------------
  104|    575|    dav1d_free(itut_t35_ctx->itut_t35);
  ------------------
  |  |  135|    575|#define dav1d_free(ptr) free(ptr)
  ------------------
  105|    575|    dav1d_free(itut_t35_ctx);
  ------------------
  |  |  135|    575|#define dav1d_free(ptr) free(ptr)
  ------------------
  106|    575|}
dav1d_picture_copy_props:
  164|  52.0k|{
  165|  52.0k|    dav1d_data_props_copy(&p->m, props);
  166|       |
  167|  52.0k|    dav1d_ref_dec(&p->content_light_ref);
  168|  52.0k|    p->content_light_ref = content_light_ref;
  169|  52.0k|    p->content_light = content_light;
  170|  52.0k|    if (content_light_ref) dav1d_ref_inc(content_light_ref);
  ------------------
  |  Branch (170:9): [True: 2.61k, False: 49.3k]
  ------------------
  171|       |
  172|  52.0k|    dav1d_ref_dec(&p->mastering_display_ref);
  173|  52.0k|    p->mastering_display_ref = mastering_display_ref;
  174|  52.0k|    p->mastering_display = mastering_display;
  175|  52.0k|    if (mastering_display_ref) dav1d_ref_inc(mastering_display_ref);
  ------------------
  |  Branch (175:9): [True: 1.35k, False: 50.6k]
  ------------------
  176|       |
  177|  52.0k|    dav1d_ref_dec(&p->itut_t35_ref);
  178|  52.0k|    p->itut_t35_ref = itut_t35_ref;
  179|  52.0k|    p->itut_t35 = itut_t35;
  180|  52.0k|    p->n_itut_t35 = n_itut_t35;
  181|  52.0k|    if (itut_t35_ref) dav1d_ref_inc(itut_t35_ref);
  ------------------
  |  Branch (181:9): [True: 922, False: 51.0k]
  ------------------
  182|  52.0k|}
dav1d_thread_picture_alloc:
  186|  41.2k|{
  187|  41.2k|    Dav1dThreadPicture *const p = &f->sr_cur;
  188|       |
  189|  41.2k|    const int res = picture_alloc(c, &p->p, f->frame_hdr->width[1], f->frame_hdr->height,
  190|  41.2k|                                  f->seq_hdr, f->seq_hdr_ref,
  191|  41.2k|                                  f->frame_hdr, f->frame_hdr_ref,
  192|  41.2k|                                  bpc, &f->tile[0].data.m, &c->allocator,
  193|  41.2k|                                  (void **) &p->progress);
  194|  41.2k|    if (res) return res;
  ------------------
  |  Branch (194:9): [True: 0, False: 41.2k]
  ------------------
  195|       |
  196|       |    // Don't clear these flags from c->frame_flags if the frame is not going to be output.
  197|       |    // This way they will be added to the next visible frame too.
  198|  41.2k|    const int flags_mask = ((f->frame_hdr->show_frame || c->output_invisible_frames) &&
  ------------------
  |  Branch (198:30): [True: 37.8k, False: 3.37k]
  |  Branch (198:58): [True: 0, False: 3.37k]
  ------------------
  199|  37.8k|                            c->max_spatial_id == f->frame_hdr->spatial_id)
  ------------------
  |  Branch (199:29): [True: 32.6k, False: 5.22k]
  ------------------
  200|  41.2k|                           ? 0 : (PICTURE_FLAG_NEW_SEQUENCE | PICTURE_FLAG_NEW_OP_PARAMS_INFO);
  201|  41.2k|    p->flags = c->frame_flags;
  202|  41.2k|    c->frame_flags &= flags_mask;
  203|       |
  204|  41.2k|    p->visible = f->frame_hdr->show_frame;
  205|  41.2k|    p->showable = f->frame_hdr->showable_frame;
  206|       |
  207|  41.2k|    if (p->visible) {
  ------------------
  |  Branch (207:9): [True: 37.8k, False: 3.37k]
  ------------------
  208|       |        // Only add HDR10+ and T35 metadata when show frame flag is enabled
  209|  37.8k|        dav1d_picture_copy_props(&p->p, c->content_light, c->content_light_ref,
  210|  37.8k|                                 c->mastering_display, c->mastering_display_ref,
  211|  37.8k|                                 c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
  212|  37.8k|                                 &f->tile[0].data.m);
  213|       |
  214|       |        // Must be removed from the context after being attached to the frame
  215|  37.8k|        dav1d_ref_dec(&c->itut_t35_ref);
  216|  37.8k|        c->itut_t35 = NULL;
  217|  37.8k|        c->n_itut_t35 = 0;
  218|  37.8k|    } else {
  219|  3.37k|        dav1d_data_props_copy(&p->p.m, &f->tile[0].data.m);
  220|  3.37k|    }
  221|       |
  222|  41.2k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (222:9): [True: 0, False: 41.2k]
  ------------------
  223|      0|        atomic_init(&p->progress[0], 0);
  224|       |        atomic_init(&p->progress[1], 0);
  225|      0|    }
  226|  41.2k|    return res;
  227|  41.2k|}
dav1d_picture_alloc_copy:
  231|  8.61k|{
  232|  8.61k|    struct pic_ctx_context *const pic_ctx = (struct pic_ctx_context*)src->ref->const_data;
  233|  8.61k|    const int res = picture_alloc(c, dst, w, src->p.h,
  234|  8.61k|                                  src->seq_hdr, src->seq_hdr_ref,
  235|  8.61k|                                  src->frame_hdr, src->frame_hdr_ref,
  236|  8.61k|                                  src->p.bpc, &src->m, &pic_ctx->allocator,
  237|  8.61k|                                  NULL);
  238|  8.61k|    if (res) return res;
  ------------------
  |  Branch (238:9): [True: 0, False: 8.61k]
  ------------------
  239|       |
  240|  8.61k|    dav1d_picture_copy_props(dst, src->content_light, src->content_light_ref,
  241|  8.61k|                             src->mastering_display, src->mastering_display_ref,
  242|  8.61k|                             src->itut_t35, src->itut_t35_ref, src->n_itut_t35,
  243|  8.61k|                             &src->m);
  244|       |
  245|  8.61k|    return 0;
  246|  8.61k|}
dav1d_picture_ref:
  248|   473k|void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
  249|   473k|    assert(dst != NULL);
  ------------------
  |  Branch (249:5): [True: 473k, False: 0]
  ------------------
  250|   473k|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (250:5): [True: 473k, False: 0]
  ------------------
  251|   473k|    assert(src != NULL);
  ------------------
  |  Branch (251:5): [True: 473k, False: 0]
  ------------------
  252|       |
  253|   473k|    if (src->ref) {
  ------------------
  |  Branch (253:9): [True: 473k, False: 0]
  ------------------
  254|   473k|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (254:9): [True: 473k, False: 0]
  ------------------
  255|   473k|        dav1d_ref_inc(src->ref);
  256|   473k|    }
  257|   473k|    if (src->frame_hdr_ref) dav1d_ref_inc(src->frame_hdr_ref);
  ------------------
  |  Branch (257:9): [True: 473k, False: 0]
  ------------------
  258|   473k|    if (src->seq_hdr_ref) dav1d_ref_inc(src->seq_hdr_ref);
  ------------------
  |  Branch (258:9): [True: 473k, False: 0]
  ------------------
  259|   473k|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (259:9): [True: 0, False: 473k]
  ------------------
  260|   473k|    if (src->content_light_ref) dav1d_ref_inc(src->content_light_ref);
  ------------------
  |  Branch (260:9): [True: 11.3k, False: 461k]
  ------------------
  261|   473k|    if (src->mastering_display_ref) dav1d_ref_inc(src->mastering_display_ref);
  ------------------
  |  Branch (261:9): [True: 6.28k, False: 466k]
  ------------------
  262|   473k|    if (src->itut_t35_ref) dav1d_ref_inc(src->itut_t35_ref);
  ------------------
  |  Branch (262:9): [True: 2.43k, False: 470k]
  ------------------
  263|   473k|    *dst = *src;
  264|   473k|}
dav1d_picture_move_ref:
  266|  15.9k|void dav1d_picture_move_ref(Dav1dPicture *const dst, Dav1dPicture *const src) {
  267|  15.9k|    assert(dst != NULL);
  ------------------
  |  Branch (267:5): [True: 15.9k, False: 0]
  ------------------
  268|  15.9k|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (268:5): [True: 15.9k, False: 0]
  ------------------
  269|  15.9k|    assert(src != NULL);
  ------------------
  |  Branch (269:5): [True: 15.9k, False: 0]
  ------------------
  270|       |
  271|  15.9k|    if (src->ref)
  ------------------
  |  Branch (271:9): [True: 15.9k, False: 0]
  ------------------
  272|  15.9k|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (272:9): [True: 15.9k, False: 0]
  ------------------
  273|       |
  274|  15.9k|    *dst = *src;
  275|  15.9k|    memset(src, 0, sizeof(*src));
  276|  15.9k|}
dav1d_thread_picture_ref:
  280|   435k|{
  281|   435k|    dav1d_picture_ref(&dst->p, &src->p);
  282|   435k|    dst->visible = src->visible;
  283|   435k|    dst->showable = src->showable;
  284|   435k|    dst->progress = src->progress;
  285|   435k|    dst->flags = src->flags;
  286|   435k|}
dav1d_picture_unref_internal:
  299|   621k|void dav1d_picture_unref_internal(Dav1dPicture *const p) {
  300|   621k|    validate_input(p != NULL);
  ------------------
  |  |   59|   621k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   621k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 621k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  301|       |
  302|   621k|    if (p->ref) {
  ------------------
  |  Branch (302:9): [True: 523k, False: 98.6k]
  ------------------
  303|   523k|        validate_input(p->data[0] != NULL);
  ------------------
  |  |   59|   523k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   523k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 523k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  304|   523k|        dav1d_ref_dec(&p->ref);
  305|   523k|    }
  306|   621k|    dav1d_ref_dec(&p->seq_hdr_ref);
  307|   621k|    dav1d_ref_dec(&p->frame_hdr_ref);
  308|   621k|    dav1d_ref_dec(&p->m.user_data.ref);
  309|   621k|    dav1d_ref_dec(&p->content_light_ref);
  310|   621k|    dav1d_ref_dec(&p->mastering_display_ref);
  311|   621k|    dav1d_ref_dec(&p->itut_t35_ref);
  312|   621k|    memset(p, 0, sizeof(*p));
  313|   621k|    dav1d_data_props_set_defaults(&p->m);
  314|   621k|}
dav1d_thread_picture_unref:
  316|   532k|void dav1d_thread_picture_unref(Dav1dThreadPicture *const p) {
  317|   532k|    dav1d_picture_unref_internal(&p->p);
  318|       |
  319|       |    p->progress = NULL;
  320|   532k|}
dav1d_picture_get_event_flags:
  322|  43.4k|enum Dav1dEventFlags dav1d_picture_get_event_flags(const Dav1dThreadPicture *const p) {
  323|  43.4k|    if (!p->flags)
  ------------------
  |  Branch (323:9): [True: 24.1k, False: 19.2k]
  ------------------
  324|  24.1k|        return 0;
  325|       |
  326|  19.2k|    enum Dav1dEventFlags flags = 0;
  327|  19.2k|    if (p->flags & PICTURE_FLAG_NEW_SEQUENCE)
  ------------------
  |  Branch (327:9): [True: 16.7k, False: 2.50k]
  ------------------
  328|  16.7k|       flags |= DAV1D_EVENT_FLAG_NEW_SEQUENCE;
  329|  19.2k|    if (p->flags & PICTURE_FLAG_NEW_OP_PARAMS_INFO)
  ------------------
  |  Branch (329:9): [True: 236, False: 19.0k]
  ------------------
  330|    236|       flags |= DAV1D_EVENT_FLAG_NEW_OP_PARAMS_INFO;
  331|       |
  332|  19.2k|    return flags;
  333|  43.4k|}
picture.c:picture_alloc:
  117|  49.8k|{
  118|  49.8k|    if (p->data[0]) {
  ------------------
  |  Branch (118:9): [True: 0, False: 49.8k]
  ------------------
  119|      0|        dav1d_log(c, "Picture already allocated!\n");
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  120|      0|        return -1;
  121|      0|    }
  122|  49.8k|    assert(bpc > 0 && bpc <= 16);
  ------------------
  |  Branch (122:5): [True: 49.8k, False: 0]
  |  Branch (122:5): [True: 49.8k, False: 0]
  ------------------
  123|       |
  124|  49.8k|    size_t extra = c->n_fc > 1 ? sizeof(atomic_int) * 2 : 0;
  ------------------
  |  Branch (124:20): [True: 0, False: 49.8k]
  ------------------
  125|  49.8k|    struct pic_ctx_context *pic_ctx = dav1d_mem_pool_pop(c->pic_ctx_pool, extra +
  126|  49.8k|                                                         sizeof(struct pic_ctx_context));
  127|  49.8k|    if (!pic_ctx)
  ------------------
  |  Branch (127:9): [True: 0, False: 49.8k]
  ------------------
  128|      0|        return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  129|       |
  130|  49.8k|    p->p.w = w;
  131|  49.8k|    p->p.h = h;
  132|  49.8k|    p->seq_hdr = seq_hdr;
  133|  49.8k|    p->frame_hdr = frame_hdr;
  134|  49.8k|    p->p.layout = seq_hdr->layout;
  135|  49.8k|    p->p.bpc = bpc;
  136|  49.8k|    dav1d_data_props_set_defaults(&p->m);
  137|  49.8k|    const int res = p_allocator->alloc_picture_callback(p, p_allocator->cookie);
  138|  49.8k|    if (res < 0) {
  ------------------
  |  Branch (138:9): [True: 0, False: 49.8k]
  ------------------
  139|      0|        dav1d_mem_pool_push(c->pic_ctx_pool, pic_ctx);
  140|      0|        return res;
  141|      0|    }
  142|       |
  143|  49.8k|    pic_ctx->allocator = *p_allocator;
  144|  49.8k|    pic_ctx->pic = *p;
  145|  49.8k|    p->ref = dav1d_ref_init(&pic_ctx->ref, pic_ctx, free_buffer, c->pic_ctx_pool, 0);
  146|       |
  147|  49.8k|    p->seq_hdr_ref = seq_hdr_ref;
  148|  49.8k|    if (seq_hdr_ref) dav1d_ref_inc(seq_hdr_ref);
  ------------------
  |  Branch (148:9): [True: 49.8k, False: 0]
  ------------------
  149|       |
  150|  49.8k|    p->frame_hdr_ref = frame_hdr_ref;
  151|  49.8k|    if (frame_hdr_ref) dav1d_ref_inc(frame_hdr_ref);
  ------------------
  |  Branch (151:9): [True: 49.8k, False: 0]
  ------------------
  152|       |
  153|  49.8k|    if (extra && extra_ptr)
  ------------------
  |  Branch (153:9): [True: 0, False: 49.8k]
  |  Branch (153:18): [True: 0, False: 0]
  ------------------
  154|      0|        *extra_ptr = &pic_ctx->extra_data;
  155|       |
  156|  49.8k|    return 0;
  157|  49.8k|}
picture.c:free_buffer:
   91|  49.8k|static void free_buffer(const uint8_t *const data, void *const user_data) {
   92|  49.8k|    struct pic_ctx_context *pic_ctx = (struct pic_ctx_context*)data;
   93|       |
   94|  49.8k|    pic_ctx->allocator.release_picture_callback(&pic_ctx->pic,
   95|  49.8k|                                                pic_ctx->allocator.cookie);
   96|  49.8k|    dav1d_mem_pool_push(user_data, pic_ctx);
   97|  49.8k|}

dav1d_init_qm_tables:
 1648|      1|COLD void dav1d_init_qm_tables(void) {
 1649|       |    // This function is guaranteed to be called only once
 1650|       |
 1651|     16|    for (int i = 0; i < 15; i++)
  ------------------
  |  Branch (1651:21): [True: 15, False: 1]
  ------------------
 1652|     45|        for (int j = 0; j < 2; j++) {
  ------------------
  |  Branch (1652:25): [True: 30, False: 15]
  ------------------
 1653|       |            // note that the w/h in the assignment is inverted, this is on purpose
 1654|       |            // because we store coefficients transposed
 1655|     30|            dav1d_qm_tbl[i][j][RTX_4X8  ] = qm_tbl_8x4[i][j];
 1656|     30|            dav1d_qm_tbl[i][j][RTX_8X4  ] = qm_tbl_4x8[i][j];
 1657|     30|            dav1d_qm_tbl[i][j][RTX_4X16 ] = qm_tbl_16x4[i][j];
 1658|     30|            dav1d_qm_tbl[i][j][RTX_16X4 ] = qm_tbl_4x16[i][j];
 1659|     30|            dav1d_qm_tbl[i][j][RTX_8X16 ] = qm_tbl_16x8[i][j];
 1660|     30|            dav1d_qm_tbl[i][j][RTX_16X8 ] = qm_tbl_8x16[i][j];
 1661|     30|            dav1d_qm_tbl[i][j][RTX_8X32 ] = qm_tbl_32x8[i][j];
 1662|     30|            dav1d_qm_tbl[i][j][RTX_32X8 ] = qm_tbl_8x32[i][j];
 1663|     30|            dav1d_qm_tbl[i][j][RTX_16X32] = qm_tbl_32x16[i][j];
 1664|     30|            dav1d_qm_tbl[i][j][RTX_32X16] = qm_tbl_16x32[i][j];
 1665|       |
 1666|     30|            dav1d_qm_tbl[i][j][ TX_4X4  ] = qm_tbl_4x4[i][j];
 1667|     30|            dav1d_qm_tbl[i][j][ TX_8X8  ] = qm_tbl_8x8[i][j];
 1668|     30|            dav1d_qm_tbl[i][j][ TX_16X16] = qm_tbl_16x16[i][j];
 1669|     30|            dav1d_qm_tbl[i][j][ TX_32X32] = qm_tbl_32x32[i][j];
 1670|       |
 1671|     30|            untriangle(qm_tbl_32x32[i][j], qm_tbl_32x32_t[i][j], 32);
 1672|     30|            subsample(qm_tbl_4x4[i][j],   &qm_tbl_32x32[i][j][32*3+3], 32, 8, 8);
 1673|     30|            subsample(qm_tbl_8x4[i][j],   &qm_tbl_32x16[i][j][32*1+1], 16, 4, 4);
 1674|     30|            subsample(qm_tbl_8x8[i][j],   &qm_tbl_32x32[i][j][32*1+1], 32, 4, 4);
 1675|     30|            subsample(qm_tbl_16x4[i][j],  &qm_tbl_32x16[i][j][32*1+0], 16, 2, 4);
 1676|     30|            subsample(qm_tbl_16x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 2, 2);
 1677|     30|            subsample(qm_tbl_16x16[i][j], &qm_tbl_32x32[i][j][32*0+0], 32, 2, 2);
 1678|     30|            subsample(qm_tbl_32x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 1, 2);
 1679|     30|            transpose(qm_tbl_4x8[i][j], qm_tbl_8x4[i][j], 8, 4);
 1680|     30|            transpose(qm_tbl_4x16[i][j], qm_tbl_16x4[i][j], 16, 4);
 1681|     30|            transpose(qm_tbl_8x16[i][j], qm_tbl_16x8[i][j], 16, 8);
 1682|     30|            transpose(qm_tbl_8x32[i][j], qm_tbl_32x8[i][j], 32, 8);
 1683|     30|            transpose(qm_tbl_16x32[i][j], qm_tbl_32x16[i][j], 32, 16);
 1684|       |
 1685|     30|            dav1d_qm_tbl[i][j][ TX_64X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1686|     30|            dav1d_qm_tbl[i][j][RTX_64X32] = dav1d_qm_tbl[i][j][ TX_32X32];
 1687|     30|            dav1d_qm_tbl[i][j][RTX_64X16] = dav1d_qm_tbl[i][j][RTX_32X16];
 1688|     30|            dav1d_qm_tbl[i][j][RTX_32X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1689|     30|            dav1d_qm_tbl[i][j][RTX_16X64] = dav1d_qm_tbl[i][j][RTX_16X32];
 1690|     30|        }
 1691|       |
 1692|       |    // dav1d_qm_tbl[15][*][*] == NULL
 1693|      1|}
qm.c:untriangle:
 1635|     30|static void untriangle(uint8_t *dst, const uint8_t *src, const int sz) {
 1636|    990|    for (int y = 0; y < sz; y++) {
  ------------------
  |  Branch (1636:21): [True: 960, False: 30]
  ------------------
 1637|    960|        memcpy(dst, src, y + 1);
 1638|    960|        const uint8_t *src_ptr = &src[y];
 1639|  15.8k|        for (int x = y + 1; x < sz; x++) {
  ------------------
  |  Branch (1639:29): [True: 14.8k, False: 960]
  ------------------
 1640|  14.8k|            src_ptr += x;
 1641|  14.8k|            dst[x] = *src_ptr;
 1642|  14.8k|        }
 1643|    960|        dst += sz;
 1644|    960|        src += y + 1;
 1645|    960|    }
 1646|     30|}
qm.c:subsample:
 1621|    210|{
 1622|  1.77k|    for (int y = 0; y < h; y += vstep)
  ------------------
  |  Branch (1622:21): [True: 1.56k, False: 210]
  ------------------
 1623|  26.0k|        for (int x = 0; x < 32; x += hstep)
  ------------------
  |  Branch (1623:25): [True: 24.4k, False: 1.56k]
  ------------------
 1624|  24.4k|            *dst++ = src[y * 32 + x];
 1625|    210|}
qm.c:transpose:
 1629|    150|{
 1630|  1.35k|    for (int y = 0, y_off = 0; y < h; y++, y_off += w)
  ------------------
  |  Branch (1630:32): [True: 1.20k, False: 150]
  ------------------
 1631|  30.9k|        for (int x = 0, x_off = 0; x < w; x++, x_off += h)
  ------------------
  |  Branch (1631:36): [True: 29.7k, False: 1.20k]
  ------------------
 1632|  29.7k|            dst[x_off + y] = src[y_off + x];
 1633|    150|}

dav1d_recon_b_intra_8bpc:
 1179|   772k|{
 1180|   772k|    Dav1dTileState *const ts = t->ts;
 1181|   772k|    const Dav1dFrameContext *const f = t->f;
 1182|   772k|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|   772k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|   772k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|   772k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|   772k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|   772k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|   772k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|   772k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|   772k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|   772k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 657k, False: 114k]
  ------------------
 1192|   657k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 619k, False: 37.9k]
  |  Branch (1192:45): [True: 18.8k, False: 19.0k]
  ------------------
 1193|   638k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 609k, False: 29.5k]
  |  Branch (1193:45): [True: 14.6k, False: 14.8k]
  ------------------
 1194|   772k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|   772k|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|   772k|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   51|   772k|#define bitfn(x) x##_8bpc
  ------------------
 1199|   772k|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|   772k|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  1.58M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 807k, False: 772k]
  ------------------
 1204|   807k|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|   807k|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  1.68M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 876k, False: 807k]
  ------------------
 1207|   876k|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 17.3k, False: 859k]
  ------------------
 1208|  17.3k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  17.3k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|  17.3k|#define PXSTRIDE(x) (x)
  ------------------
 1210|  17.3k|                const uint8_t *pal_idx;
 1211|  17.3k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 0, False: 17.3k]
  ------------------
 1212|      0|                    const int p = t->frame_thread.pass & 1;
 1213|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 0, False: 0]
  ------------------
 1214|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|      0|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  17.3k|                } else {
 1217|  17.3k|                    pal_idx = t->scratch.pal_idx_y;
 1218|  17.3k|                }
 1219|  17.3k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 0, False: 17.3k]
  ------------------
 1220|      0|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|      0|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  17.3k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  17.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  17.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1223|  17.3k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  17.3k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  17.3k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  17.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 17.3k]
  |  |  ------------------
  |  |   35|  17.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  17.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  17.3k|            }
 1229|       |
 1230|   876k|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|   876k|                                     sm_flag(&t->l, by4) |
 1232|   876k|                                     intra_edge_filter_flag);
 1233|   876k|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 68.6k, False: 807k]
  |  Branch (1233:58): [True: 35.7k, False: 772k]
  ------------------
 1234|   807k|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|   876k|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 68.6k, False: 807k]
  |  Branch (1235:48): [True: 35.7k, False: 772k]
  ------------------
 1236|   807k|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|   876k|            int y, x;
 1238|   876k|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  2.06M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.19M, False: 876k]
  ------------------
 1240|  1.19M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.19M|            {
 1242|  1.19M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.19M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
  ------------------
  |  |   53|  1.19M|#define PXSTRIDE(x) (x)
  ------------------
 1244|  1.19M|                                    t->bx + init_x);
 1245|  3.92M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 2.73M, False: 1.19M]
  ------------------
 1246|  2.73M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  2.73M|                {
 1248|  2.73M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 24.1k, False: 2.70M]
  ------------------
 1249|       |
 1250|  2.70M|                    int angle = b->y_angle;
 1251|  2.70M|                    const enum EdgeFlags edge_flags =
 1252|  2.70M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 1.58M, False: 1.12M]
  |  Branch (1252:42): [True: 377k, False: 746k]
  |  Branch (1252:57): [True: 598k, False: 1.36M]
  ------------------
 1253|  2.11M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  2.70M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 1.53M, False: 1.17M]
  |  Branch (1254:42): [True: 757k, False: 414k]
  |  Branch (1254:56): [True: 517k, False: 239k]
  ------------------
 1255|  2.05M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  2.70M|                    const pixel *top_sb_edge = NULL;
 1257|  2.70M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 431k, False: 2.27M]
  ------------------
 1258|   431k|                        top_sb_edge = f->ipred_edge[0];
 1259|   431k|                        const int sby = t->by >> f->sb_shift;
 1260|   431k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   431k|                    }
 1262|  2.70M|                    const enum IntraPredMode m =
 1263|  2.70M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  2.70M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  2.70M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1264|  2.70M|                                                          t->bx > ts->tiling.col_start,
 1265|  2.70M|                                                          t->by,
 1266|  2.70M|                                                          t->by > ts->tiling.row_start,
 1267|  2.70M|                                                          ts->tiling.col_end,
 1268|  2.70M|                                                          ts->tiling.row_end,
 1269|  2.70M|                                                          edge_flags, dst,
 1270|  2.70M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  2.70M|                                                          b->y_mode, &angle,
 1272|  2.70M|                                                          t_dim->w, t_dim->h,
 1273|  2.70M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  2.70M|                                                          edge HIGHBD_CALL_SUFFIX);
 1275|  2.70M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  2.70M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  2.70M|                                             angle | intra_flags,
 1278|  2.70M|                                             4 * f->bw - 4 * t->bx,
 1279|  2.70M|                                             4 * f->bh - 4 * t->by
 1280|  2.70M|                                             HIGHBD_CALL_SUFFIX);
 1281|       |
 1282|  2.70M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  2.70M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.70M]
  |  |  ------------------
  |  |   35|  2.70M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.70M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  2.73M|                skip_y_pred: {}
 1293|  2.73M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 1.04M, False: 1.69M]
  ------------------
 1294|  1.04M|                        coef *cf;
 1295|  1.04M|                        int eob;
 1296|  1.04M|                        enum TxfmType txtp;
 1297|  1.04M|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 0, False: 1.04M]
  ------------------
 1298|      0|                            const int p = t->frame_thread.pass & 1;
 1299|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|      0|                            cf = ts->frame_thread[p].cf;
 1301|      0|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|      0|                            eob  = cbi >> 5;
 1303|      0|                            txtp = cbi & 0x1f;
 1304|  1.04M|                        } else {
 1305|  1.04M|                            uint8_t cf_ctx;
 1306|  1.04M|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.04M|#define bitfn(x) x##_8bpc
  ------------------
 1307|  1.04M|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  1.04M|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  1.04M|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  1.04M|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.04M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.04M]
  |  |  ------------------
  |  |   35|  1.04M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.04M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  1.04M|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  1.04M|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  1.04M|                        }
 1316|  1.04M|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 680k, False: 361k]
  ------------------
 1317|   680k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   680k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 680k]
  |  |  ------------------
  |  |   35|   680k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   680k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|   680k|                            dsp->itx.itxfm_add[b->tx]
 1321|   680k|                                              [txtp](dst,
 1322|   680k|                                                     f->cur.stride[0],
 1323|   680k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1324|   680k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   680k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 680k]
  |  |  ------------------
  |  |   35|   680k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   680k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|   680k|                        }
 1328|  1.69M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 1.69M, False: 0]
  ------------------
 1329|  1.69M|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|  1.69M|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|  1.69M|                    }
 1332|  2.73M|                    dst += 4 * t_dim->w;
 1333|  2.73M|                }
 1334|  1.19M|                t->bx -= x;
 1335|  1.19M|            }
 1336|   876k|            t->by -= y;
 1337|       |
 1338|   876k|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 170k, False: 706k]
  ------------------
 1339|       |
 1340|   706k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   706k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 116k, False: 589k]
  ------------------
 1343|   116k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 116k, False: 0]
  |  Branch (1343:17): [True: 116k, False: 0]
  ------------------
 1344|       |
 1345|   116k|                int16_t *const ac = t->scratch.ac;
 1346|   116k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|   116k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   116k|#define PXSTRIDE(x) (x)
  ------------------
 1348|   116k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|   116k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
  ------------------
  |  |   53|   116k|#define PXSTRIDE(x) (x)
  ------------------
 1350|   116k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|   116k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|   116k|                const int furthest_r =
 1354|   116k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|   116k|                const int furthest_b =
 1356|   116k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|   116k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|   116k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|   116k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|   116k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   350k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 233k, False: 116k]
  ------------------
 1362|   233k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 45.8k, False: 187k]
  ------------------
 1363|   187k|                    int angle = 0;
 1364|   187k|                    const pixel *top_sb_edge = NULL;
 1365|   187k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 52.5k, False: 135k]
  ------------------
 1366|  52.5k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  52.5k|                        const int sby = t->by >> f->sb_shift;
 1368|  52.5k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  52.5k|                    }
 1370|   187k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|   187k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|   187k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|   187k|                    const enum IntraPredMode m =
 1374|   187k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|   187k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   187k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1375|   187k|                                                          ypos, ypos > ystart,
 1376|   187k|                                                          ts->tiling.col_end >> ss_hor,
 1377|   187k|                                                          ts->tiling.row_end >> ss_ver,
 1378|   187k|                                                          0, uv_dst[pl], stride,
 1379|   187k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|   187k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|   187k|                                                          edge HIGHBD_CALL_SUFFIX);
 1382|   187k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|   187k|                                           uv_t_dim->w * 4,
 1384|   187k|                                           uv_t_dim->h * 4,
 1385|   187k|                                           ac, b->cfl_alpha[pl]
 1386|   187k|                                           HIGHBD_CALL_SUFFIX);
 1387|   187k|                }
 1388|   116k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   116k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 116k]
  |  |  ------------------
  |  |   35|   116k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   116k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   589k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 9.68k, False: 579k]
  ------------------
 1394|  9.68k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  9.68k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|  9.68k|#define PXSTRIDE(x) (x)
  ------------------
 1396|  9.68k|                const pixel (*pal)[8];
 1397|  9.68k|                const uint8_t *pal_idx;
 1398|  9.68k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 0, False: 9.68k]
  ------------------
 1399|      0|                    const int p = t->frame_thread.pass & 1;
 1400|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 0, False: 0]
  ------------------
 1401|      0|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|      0|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|      0|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  9.68k|                } else {
 1406|  9.68k|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|  9.68k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  9.68k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1407|  9.68k|                    pal_idx = t->scratch.pal_idx_uv;
 1408|  9.68k|                }
 1409|       |
 1410|  9.68k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  9.68k|                                       f->cur.stride[1], pal[1],
 1412|  9.68k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  9.68k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  9.68k|                                       f->cur.stride[1], pal[2],
 1415|  9.68k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  9.68k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  9.68k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 9.68k]
  |  |  ------------------
  |  |   35|  9.68k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  9.68k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  9.68k|            }
 1425|       |
 1426|   706k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   706k|                                 sm_uv_flag(&t->l, cby4);
 1428|   706k|            const int uv_sb_has_tr =
 1429|   706k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 54.2k, False: 652k]
  |  Branch (1429:55): [True: 28.1k, False: 623k]
  ------------------
 1430|   652k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   706k|            const int uv_sb_has_bl =
 1432|   706k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 54.2k, False: 652k]
  |  Branch (1432:30): [True: 28.1k, False: 623k]
  ------------------
 1433|   652k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   706k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.11M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.41M, False: 706k]
  ------------------
 1436|  3.16M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 1.75M, False: 1.41M]
  ------------------
 1437|  1.75M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  1.75M|                {
 1439|  1.75M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  1.75M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
  ------------------
  |  |   53|  1.75M|#define PXSTRIDE(x) (x)
  ------------------
 1441|  1.75M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  4.84M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 3.09M, False: 1.75M]
  ------------------
 1443|  3.09M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  3.09M|                    {
 1445|  3.09M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 233k, False: 2.86M]
  |  Branch (1445:56): [True: 187k, False: 45.8k]
  ------------------
 1446|  2.90M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 33.9k, False: 2.87M]
  ------------------
 1447|   221k|                        {
 1448|   221k|                            goto skip_uv_pred;
 1449|   221k|                        }
 1450|       |
 1451|  2.87M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  2.87M|                        const enum EdgeFlags edge_flags =
 1456|  2.87M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 1.45M, False: 1.41M]
  |  Branch (1456:58): [True: 463k, False: 953k]
  ------------------
 1457|  1.92M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 755k, False: 1.16M]
  ------------------
 1458|  2.11M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  2.87M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 1.33M, False: 1.54M]
  ------------------
 1460|  1.54M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 980k, False: 559k]
  |  Branch (1460:49): [True: 713k, False: 266k]
  ------------------
 1461|  2.04M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  2.87M|                        const pixel *top_sb_edge = NULL;
 1463|  2.87M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 536k, False: 2.33M]
  ------------------
 1464|   536k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   536k|                            const int sby = t->by >> f->sb_shift;
 1466|   536k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   536k|                        }
 1468|  2.87M|                        const enum IntraPredMode uv_mode =
 1469|  2.87M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 45.8k, False: 2.82M]
  ------------------
 1470|  2.87M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  2.87M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  2.87M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  2.87M|                        const enum IntraPredMode m =
 1474|  2.87M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  2.87M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  2.87M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1475|  2.87M|                                                              ypos, ypos > ystart,
 1476|  2.87M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  2.87M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  2.87M|                                                              edge_flags, dst, stride,
 1479|  2.87M|                                                              top_sb_edge, uv_mode,
 1480|  2.87M|                                                              &angle, uv_t_dim->w,
 1481|  2.87M|                                                              uv_t_dim->h,
 1482|  2.87M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  2.87M|                                                              edge HIGHBD_CALL_SUFFIX);
 1484|  2.87M|                        angle |= intra_edge_filter_flag;
 1485|  2.87M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  2.87M|                                                 uv_t_dim->w * 4,
 1487|  2.87M|                                                 uv_t_dim->h * 4,
 1488|  2.87M|                                                 angle | sm_uv_fl,
 1489|  2.87M|                                                 (4 * f->bw + ss_hor -
 1490|  2.87M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  2.87M|                                                 (4 * f->bh + ss_ver -
 1492|  2.87M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  2.87M|                                                 HIGHBD_CALL_SUFFIX);
 1494|  2.87M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  2.87M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.87M]
  |  |  ------------------
  |  |   35|  2.87M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.87M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  3.09M|                    skip_uv_pred: {}
 1505|  3.09M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.18M, False: 1.90M]
  ------------------
 1506|  1.18M|                            enum TxfmType txtp;
 1507|  1.18M|                            int eob;
 1508|  1.18M|                            coef *cf;
 1509|  1.18M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 0, False: 1.18M]
  ------------------
 1510|      0|                                const int p = t->frame_thread.pass & 1;
 1511|      0|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|      0|                                cf = ts->frame_thread[p].cf;
 1513|      0|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|      0|                                eob  = cbi >> 5;
 1515|      0|                                txtp = cbi & 0x1f;
 1516|  1.18M|                            } else {
 1517|  1.18M|                                uint8_t cf_ctx;
 1518|  1.18M|                                cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.18M|#define bitfn(x) x##_8bpc
  ------------------
 1519|  1.18M|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  1.18M|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  1.18M|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  1.18M|                                                   &txtp, &cf_ctx);
 1523|  1.18M|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.18M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.18M]
  |  |  ------------------
  |  |   35|  1.18M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.18M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  1.18M|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  1.18M|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  1.18M|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  1.18M|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  1.18M|                            }
 1532|  1.18M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 342k, False: 845k]
  ------------------
 1533|   342k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   342k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 342k]
  |  |  ------------------
  |  |   35|   342k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   342k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   342k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   342k|                                                  [txtp](dst, stride,
 1538|   342k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
 1539|   342k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   342k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 342k]
  |  |  ------------------
  |  |   35|   342k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   342k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   342k|                            }
 1543|  1.90M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 1.90M, False: 0]
  ------------------
 1544|  1.90M|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|  1.90M|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|  1.90M|                        }
 1547|  3.09M|                        dst += uv_t_dim->w * 4;
 1548|  3.09M|                    }
 1549|  1.75M|                    t->bx -= x << ss_hor;
 1550|  1.75M|                }
 1551|  1.41M|                t->by -= y << ss_ver;
 1552|  1.41M|            }
 1553|   706k|        }
 1554|   807k|    }
 1555|   772k|}
dav1d_recon_b_inter_8bpc:
 1559|   706k|{
 1560|   706k|    Dav1dTileState *const ts = t->ts;
 1561|   706k|    const Dav1dFrameContext *const f = t->f;
 1562|   706k|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|   706k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|   706k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|   706k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|   706k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|   706k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|   706k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|   706k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|   706k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 478k, False: 227k]
  ------------------
 1571|   478k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 430k, False: 48.1k]
  |  Branch (1571:45): [True: 24.0k, False: 24.1k]
  ------------------
 1572|   454k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 426k, False: 27.7k]
  |  Branch (1572:45): [True: 13.9k, False: 13.7k]
  ------------------
 1573|   706k|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 227k, False: 478k]
  ------------------
 1574|   706k|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|   706k|    int res;
 1576|       |
 1577|       |    // prediction
 1578|   706k|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|   706k|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|   706k|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|   706k|#define PXSTRIDE(x) (x)
  ------------------
 1581|   706k|    const ptrdiff_t uvdstoff =
 1582|   706k|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|   706k|#define PXSTRIDE(x) (x)
  ------------------
 1583|   706k|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|   706k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   706k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 142k, False: 564k]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|   142k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 142k, False: 0]
  ------------------
 1586|   142k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|   142k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|   142k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 142k]
  ------------------
 1589|   340k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 113k, False: 28.6k]
  |  Branch (1589:42): [True: 227k, False: 113k]
  ------------------
 1590|   227k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|   227k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|   227k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|   227k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|   227k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 227k]
  ------------------
 1595|   227k|        }
 1596|   564k|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 460k, False: 104k]
  ------------------
 1597|   460k|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|   460k|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|   460k|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 290k, False: 169k]
  ------------------
 1601|   290k|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 82.3k, False: 208k]
  |  Branch (1601:44): [True: 5.98k, False: 76.3k]
  ------------------
 1602|   284k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 32.9k, False: 251k]
  |  Branch (1602:44): [True: 29.7k, False: 3.23k]
  ------------------
 1603|  35.7k|        {
 1604|  35.7k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|  35.7k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 29.7k, False: 5.98k]
  ------------------
 1606|  35.7k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|  35.7k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 35.7k]
  ------------------
 1608|   424k|        } else {
 1609|   424k|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|   424k|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|   424k|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 424k]
  ------------------
 1612|   424k|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 92.6k, False: 331k]
  ------------------
 1613|  92.6k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|  92.6k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 92.6k]
  ------------------
 1615|  92.6k|            }
 1616|   424k|        }
 1617|   460k|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 29.0k, False: 431k]
  ------------------
 1618|  29.0k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|  29.0k|#define bitfn(x) x##_8bpc
  ------------------
 1619|  29.0k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 4.60k, False: 24.4k]
  ------------------
 1620|  24.4k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  29.0k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|  29.0k|#define bitfn(x) x##_8bpc
  ------------------
 1622|  29.0k|            int angle = 0;
 1623|  29.0k|            const pixel *top_sb_edge = NULL;
 1624|  29.0k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 4.24k, False: 24.7k]
  ------------------
 1625|  4.24k|                top_sb_edge = f->ipred_edge[0];
 1626|  4.24k|                const int sby = t->by >> f->sb_shift;
 1627|  4.24k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  4.24k|            }
 1629|  29.0k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  29.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  29.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1630|  29.0k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  29.0k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  29.0k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  29.0k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  29.0k|                                                  HIGHBD_CALL_SUFFIX);
 1635|  29.0k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  29.0k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  29.0k|                                     HIGHBD_CALL_SUFFIX);
 1638|  29.0k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  29.0k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  29.0k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  29.0k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 21.6k, False: 7.34k]
  |  |  ------------------
  |  |   85|  29.0k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  29.0k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  29.0k|        }
 1641|       |
 1642|   460k|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 190k, False: 269k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   269k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 11.9k, False: 257k]
  |  Branch (1645:42): [True: 8.11k, False: 249k]
  ------------------
 1646|   269k|        refmvs_block *const *r;
 1647|   269k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 20.1k, False: 249k]
  ------------------
 1648|  20.1k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 20.1k, False: 0]
  ------------------
 1649|  20.1k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  20.1k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 11.9k, False: 8.11k]
  ------------------
 1651|  20.1k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 11.4k, False: 8.60k]
  ------------------
 1652|  20.1k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 11.9k, False: 8.11k]
  |  Branch (1652:29): [True: 3.37k, False: 8.60k]
  ------------------
 1653|  3.37k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  20.1k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   269k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 18.0k, False: 251k]
  ------------------
 1658|  18.0k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 18.0k, False: 0]
  ------------------
 1659|  18.0k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  18.0k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 10.7k, False: 7.30k]
  |  Branch (1660:29): [True: 2.86k, False: 7.86k]
  ------------------
 1661|  8.59k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 5.72k, False: 2.86k]
  ------------------
 1662|  5.72k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  5.72k|                             NULL, f->cur.stride[1],
 1664|  5.72k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  5.72k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  5.72k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  5.72k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  5.72k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 5.72k, False: 0]
  ------------------
 1669|  5.72k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  5.72k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 5.72k]
  ------------------
 1671|  5.72k|                }
 1672|  2.86k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  2.86k|#define PXSTRIDE(x) (x)
  ------------------
 1673|  2.86k|                h_off = 2;
 1674|  2.86k|            }
 1675|  18.0k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 10.7k, False: 7.30k]
  ------------------
 1676|  10.7k|                const enum Filter2d left_filter_2d =
 1677|  10.7k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|  32.1k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 21.4k, False: 10.7k]
  ------------------
 1679|  21.4k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  21.4k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  21.4k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  21.4k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  21.4k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  21.4k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 21.4k, False: 0]
  ------------------
 1685|  21.4k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  21.4k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 21.4k]
  ------------------
 1687|  21.4k|                }
 1688|  10.7k|                h_off = 2;
 1689|  10.7k|            }
 1690|  18.0k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 10.1k, False: 7.86k]
  ------------------
 1691|  10.1k|                const enum Filter2d top_filter_2d =
 1692|  10.1k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|  30.5k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 20.3k, False: 10.1k]
  ------------------
 1694|  20.3k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  20.3k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  20.3k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  20.3k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  20.3k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  20.3k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 20.3k, False: 0]
  ------------------
 1700|  20.3k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  20.3k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 20.3k]
  ------------------
 1702|  20.3k|                }
 1703|  10.1k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  10.1k|#define PXSTRIDE(x) (x)
  ------------------
 1704|  10.1k|            }
 1705|  54.1k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 36.0k, False: 18.0k]
  ------------------
 1706|  36.0k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|  36.0k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|  36.0k|                         refp, b->ref[0], filter_2d);
 1709|  36.0k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 36.0k]
  ------------------
 1710|  36.0k|            }
 1711|   251k|        } else {
 1712|   251k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 140k, False: 110k]
  ------------------
 1713|   140k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 25.1k, False: 115k]
  |  Branch (1713:48): [True: 1.38k, False: 23.7k]
  ------------------
 1714|   139k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 13.4k, False: 125k]
  |  Branch (1714:48): [True: 12.6k, False: 839]
  ------------------
 1715|  13.9k|            {
 1716|  41.9k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 27.9k, False: 13.9k]
  ------------------
 1717|  27.9k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|  27.9k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|  27.9k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 25.2k, False: 2.76k]
  ------------------
 1720|  27.9k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|  27.9k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 27.9k]
  ------------------
 1722|  27.9k|                }
 1723|   237k|            } else {
 1724|   711k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 474k, False: 237k]
  ------------------
 1725|   474k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|   474k|                             NULL, f->cur.stride[1],
 1727|   474k|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|   474k|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|   474k|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|   474k|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 474k]
  ------------------
 1731|   474k|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 131k, False: 342k]
  ------------------
 1732|   131k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|   131k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|   131k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 131k]
  ------------------
 1735|   131k|                    }
 1736|   474k|                }
 1737|   237k|            }
 1738|   251k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 24.0k, False: 227k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  24.0k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  24.0k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  24.0k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 17.9k, False: 6.11k]
  |  |  ------------------
  |  |   85|  24.0k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  24.0k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|  72.0k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 48.0k, False: 24.0k]
  ------------------
 1745|  48.0k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|  48.0k|#define bitfn(x) x##_8bpc
  ------------------
 1746|  48.0k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|  48.0k|#define bitfn(x) x##_8bpc
  ------------------
 1747|  48.0k|                    enum IntraPredMode m =
 1748|  48.0k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 7.36k, False: 40.6k]
  ------------------
 1749|  40.6k|                        SMOOTH_PRED : b->interintra_mode;
 1750|  48.0k|                    int angle = 0;
 1751|  48.0k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|  48.0k|                    const pixel *top_sb_edge = NULL;
 1753|  48.0k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 7.53k, False: 40.5k]
  ------------------
 1754|  7.53k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  7.53k|                        const int sby = t->by >> f->sb_shift;
 1756|  7.53k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  7.53k|                    }
 1758|  48.0k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|  48.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  48.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1759|  48.0k|                                                          (t->bx >> ss_hor) >
 1760|  48.0k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|  48.0k|                                                          t->by >> ss_ver,
 1762|  48.0k|                                                          (t->by >> ss_ver) >
 1763|  48.0k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|  48.0k|                                                          ts->tiling.col_end >> ss_hor,
 1765|  48.0k|                                                          ts->tiling.row_end >> ss_ver,
 1766|  48.0k|                                                          0, uvdst, f->cur.stride[1],
 1767|  48.0k|                                                          top_sb_edge, m,
 1768|  48.0k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|  48.0k|                                                          HIGHBD_CALL_SUFFIX);
 1770|  48.0k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|  48.0k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|  48.0k|                                             HIGHBD_CALL_SUFFIX);
 1773|  48.0k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|  48.0k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|  48.0k|                }
 1776|  24.0k|            }
 1777|   251k|        }
 1778|       |
 1779|   460k|    skip_inter_chroma_pred: {}
 1780|   460k|        t->tl_4x4_filter = filter_2d;
 1781|   460k|    } else {
 1782|   104k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|   104k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|   104k|        int jnt_weight;
 1786|   104k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|   104k|        const uint8_t *mask;
 1788|       |
 1789|   313k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 208k, False: 104k]
  ------------------
 1790|   208k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   208k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 16.6k, False: 192k]
  |  Branch (1792:55): [True: 1.39k, False: 15.2k]
  ------------------
 1793|  1.39k|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|  1.39k|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|  1.39k|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 1.39k]
  ------------------
 1796|   207k|            } else {
 1797|   207k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   207k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   207k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 207k]
  ------------------
 1800|   207k|            }
 1801|   208k|        }
 1802|   104k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 104k, False: 0]
  ------------------
 1803|  66.1k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 66.1k, False: 38.3k]
  ------------------
 1804|  66.1k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  66.1k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
 1806|  66.1k|            break;
 1807|  15.3k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 15.3k, False: 89.1k]
  ------------------
 1808|  15.3k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  15.3k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  15.3k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
 1811|  15.3k|            break;
 1812|  15.7k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 15.7k, False: 88.7k]
  ------------------
 1813|  15.7k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  15.7k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  15.7k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  15.7k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
 1817|  15.7k|            mask = seg_mask;
 1818|  15.7k|            break;
 1819|  7.24k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 7.24k, False: 97.2k]
  ------------------
 1820|  7.24k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  7.24k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  7.24k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  7.24k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  7.24k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  7.24k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
 1824|  7.24k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 4.58k, False: 2.66k]
  ------------------
 1825|  4.58k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  4.58k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  4.58k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  7.24k|            break;
 1827|   104k|        }
 1828|       |
 1829|       |        // chroma
 1830|   174k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 58.1k, False: 46.3k]
  |  Branch (1830:42): [True: 116k, False: 58.1k]
  ------------------
 1831|   348k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 232k, False: 116k]
  ------------------
 1832|   232k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   232k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 21.6k, False: 210k]
  ------------------
 1834|  21.6k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 20.1k, False: 1.50k]
  |  Branch (1834:45): [True: 1.19k, False: 18.9k]
  ------------------
 1835|  1.19k|                {
 1836|  1.19k|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|  1.19k|                                      b_dim, 1 + pl,
 1838|  1.19k|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|  1.19k|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 1.19k]
  ------------------
 1840|   231k|                } else {
 1841|   231k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|   231k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|   231k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 231k]
  ------------------
 1844|   231k|                }
 1845|   232k|            }
 1846|   116k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|   116k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 116k, False: 0]
  ------------------
 1848|  67.1k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 67.1k, False: 49.1k]
  ------------------
 1849|  67.1k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|  67.1k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|  67.1k|                            HIGHBD_CALL_SUFFIX);
 1852|  67.1k|                break;
 1853|  22.7k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 22.7k, False: 93.5k]
  ------------------
 1854|  22.7k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  22.7k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  22.7k|                              HIGHBD_CALL_SUFFIX);
 1857|  22.7k|                break;
 1858|  9.17k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 9.17k, False: 107k]
  ------------------
 1859|  26.3k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 17.2k, False: 99.0k]
  ------------------
 1860|  26.3k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  26.3k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  26.3k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  26.3k|                             HIGHBD_CALL_SUFFIX);
 1864|  26.3k|                break;
 1865|   116k|            }
 1866|   116k|        }
 1867|   104k|    }
 1868|       |
 1869|   706k|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   706k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 706k]
  |  |  ------------------
  |  |   35|   706k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   706k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|   706k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|   706k|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 395k, False: 311k]
  ------------------
 1882|       |        // reset coef contexts
 1883|   395k|        BlockContext *const a = t->a;
 1884|   395k|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|   395k|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|   395k|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 212k, False: 182k]
  ------------------
 1887|   212k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   212k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   212k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   212k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   212k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   212k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   212k|        }
 1894|   395k|        return 0;
 1895|   395k|    }
 1896|       |
 1897|   311k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   311k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   311k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   626k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 315k, False: 311k]
  ------------------
 1902|   637k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 322k, False: 315k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   322k|            int y_off = !!init_y, y;
 1905|   322k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
  ------------------
  |  |   53|   322k|#define PXSTRIDE(x) (x)
  ------------------
 1906|   673k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 350k, False: 322k]
  ------------------
 1907|   350k|                 y += ytx->h, y_off++)
 1908|   350k|            {
 1909|   350k|                int x, x_off = !!init_x;
 1910|   857k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 506k, False: 350k]
  ------------------
 1911|   506k|                     x += ytx->w, x_off++)
 1912|   506k|                {
 1913|   506k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   506k|                                   x_off, y_off, &dst[x * 4]);
 1915|   506k|                    t->bx += ytx->w;
 1916|   506k|                }
 1917|   350k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
  ------------------
  |  |   53|   350k|#define PXSTRIDE(x) (x)
  ------------------
 1918|   350k|                t->bx -= x;
 1919|   350k|                t->by += ytx->h;
 1920|   350k|            }
 1921|   322k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
  ------------------
  |  |   53|   322k|#define PXSTRIDE(x) (x)
  ------------------
 1922|   322k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|   702k|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 234k, False: 88.3k]
  |  Branch (1925:46): [True: 468k, False: 234k]
  ------------------
 1926|   468k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   468k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
  ------------------
  |  |   53|   468k|#define PXSTRIDE(x) (x)
  ------------------
 1928|   468k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|   969k|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 500k, False: 468k]
  ------------------
 1930|   500k|                {
 1931|   500k|                    int x;
 1932|   500k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|  1.10M|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 605k, False: 500k]
  ------------------
 1934|   605k|                    {
 1935|   605k|                        coef *cf;
 1936|   605k|                        int eob;
 1937|   605k|                        enum TxfmType txtp;
 1938|   605k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 0, False: 605k]
  ------------------
 1939|      0|                            const int p = t->frame_thread.pass & 1;
 1940|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|      0|                            cf = ts->frame_thread[p].cf;
 1942|      0|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|      0|                            eob  = cbi >> 5;
 1944|      0|                            txtp = cbi & 0x1f;
 1945|   605k|                        } else {
 1946|   605k|                            uint8_t cf_ctx;
 1947|   605k|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|   605k|#define bitfn(x) x##_8bpc
  ------------------
 1948|   605k|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|   605k|                                                        bx4 + (x << ss_hor)];
 1950|   605k|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|   605k|                                               &t->l.ccoef[pl][cby4 + y],
 1952|   605k|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|   605k|                                               cf, &txtp, &cf_ctx);
 1954|   605k|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   605k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 605k]
  |  |  ------------------
  |  |   35|   605k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   605k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|   605k|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|   605k|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|   605k|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|   605k|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|   605k|                        }
 1963|   605k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 197k, False: 407k]
  ------------------
 1964|   197k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   197k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 197k]
  |  |  ------------------
  |  |   35|   197k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   197k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   197k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   197k|                                              [txtp](&uvdst[4 * x],
 1968|   197k|                                                     f->cur.stride[1],
 1969|   197k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1970|   197k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   197k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 197k]
  |  |  ------------------
  |  |   35|   197k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   197k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   197k|                        }
 1974|   605k|                        t->bx += uvtx->w << ss_hor;
 1975|   605k|                    }
 1976|   500k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
  ------------------
  |  |   53|   500k|#define PXSTRIDE(x) (x)
  ------------------
 1977|   500k|                    t->bx -= x << ss_hor;
 1978|   500k|                    t->by += uvtx->h << ss_ver;
 1979|   500k|                }
 1980|   468k|                t->by -= y << ss_ver;
 1981|   468k|            }
 1982|   322k|        }
 1983|   315k|    }
 1984|   311k|    return 0;
 1985|   706k|}
dav1d_filter_sbrow_deblock_cols_8bpc:
 1987|  72.1k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|  72.1k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 0, False: 72.1k]
  ------------------
 1989|  72.1k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 44.8k, False: 27.2k]
  |  Branch (1989:50): [True: 39.7k, False: 5.12k]
  ------------------
 1990|  39.7k|    {
 1991|  39.7k|        return;
 1992|  39.7k|    }
 1993|  32.3k|    const int y = sby * f->sb_step * 4;
 1994|  32.3k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|  32.3k|    pixel *const p[3] = {
 1996|  32.3k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  32.3k|#define PXSTRIDE(x) (x)
  ------------------
 1997|  32.3k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  32.3k|#define PXSTRIDE(x) (x)
  ------------------
 1998|  32.3k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  32.3k|#define PXSTRIDE(x) (x)
  ------------------
 1999|  32.3k|    };
 2000|  32.3k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|  32.3k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|  32.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  32.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2002|  32.3k|                                        f->lf.start_of_tile_row[sby]);
 2003|  32.3k|}
dav1d_filter_sbrow_deblock_rows_8bpc:
 2005|  72.1k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|  72.1k|    const int y = sby * f->sb_step * 4;
 2007|  72.1k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|  72.1k|    pixel *const p[3] = {
 2009|  72.1k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  72.1k|#define PXSTRIDE(x) (x)
  ------------------
 2010|  72.1k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  72.1k|#define PXSTRIDE(x) (x)
  ------------------
 2011|  72.1k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  72.1k|#define PXSTRIDE(x) (x)
  ------------------
 2012|  72.1k|    };
 2013|  72.1k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|  72.1k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 72.1k, False: 0]
  ------------------
 2015|  72.1k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 27.2k, False: 44.8k]
  |  Branch (2015:49): [True: 5.12k, False: 39.7k]
  ------------------
 2016|  32.3k|    {
 2017|  32.3k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|  32.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  32.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2018|  32.3k|    }
 2019|  72.1k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 32.6k, False: 39.4k]
  |  Branch (2019:29): [True: 11.2k, False: 28.2k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|  43.9k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|  43.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  43.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2022|  43.9k|    }
 2023|  72.1k|}
dav1d_filter_sbrow_cdef_8bpc:
 2025|  32.6k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|  32.6k|    const Dav1dFrameContext *const f = tc->f;
 2027|  32.6k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 32.6k]
  ------------------
 2028|  32.6k|    const int sbsz = f->sb_step;
 2029|  32.6k|    const int y = sby * sbsz * 4;
 2030|  32.6k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|  32.6k|    pixel *const p[3] = {
 2032|  32.6k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  32.6k|#define PXSTRIDE(x) (x)
  ------------------
 2033|  32.6k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  32.6k|#define PXSTRIDE(x) (x)
  ------------------
 2034|  32.6k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  32.6k|#define PXSTRIDE(x) (x)
  ------------------
 2035|  32.6k|    };
 2036|  32.6k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|  32.6k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|  32.6k|    const int start = sby * sbsz;
 2039|  32.6k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 30.2k, False: 2.44k]
  ------------------
 2040|  30.2k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|  30.2k|        pixel *p_up[3] = {
 2042|  30.2k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  30.2k|#define PXSTRIDE(x) (x)
  ------------------
 2043|  30.2k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  30.2k|#define PXSTRIDE(x) (x)
  ------------------
 2044|  30.2k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  30.2k|#define PXSTRIDE(x) (x)
  ------------------
 2045|  30.2k|        };
 2046|  30.2k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|  30.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  30.2k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2047|  30.2k|    }
 2048|  32.6k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|  32.6k|    const int end = imin(start + n_blks, f->bh);
 2050|  32.6k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|  32.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  32.6k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2051|  32.6k|}
dav1d_filter_sbrow_resize_8bpc:
 2053|  3.29k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  3.29k|    const int sbsz = f->sb_step;
 2055|  3.29k|    const int y = sby * sbsz * 4;
 2056|  3.29k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  3.29k|    const pixel *const p[3] = {
 2058|  3.29k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2059|  3.29k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2060|  3.29k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2061|  3.29k|    };
 2062|  3.29k|    pixel *const sr_p[3] = {
 2063|  3.29k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2064|  3.29k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2065|  3.29k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  3.29k|#define PXSTRIDE(x) (x)
  ------------------
 2066|  3.29k|    };
 2067|  3.29k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  12.2k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 8.91k, False: 3.29k]
  ------------------
 2069|  8.91k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 5.62k, False: 3.29k]
  |  Branch (2069:34): [True: 1.73k, False: 3.89k]
  ------------------
 2070|  8.91k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  8.91k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  8.91k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  8.91k|#define PXSTRIDE(x) (x)
  ------------------
 2073|  8.91k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  8.91k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  8.91k|#define PXSTRIDE(x) (x)
  ------------------
 2075|  8.91k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  8.91k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 5.62k, False: 3.29k]
  |  Branch (2076:34): [True: 2.82k, False: 2.80k]
  ------------------
 2077|  8.91k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  8.91k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  8.91k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  8.91k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  8.91k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  8.91k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  8.91k|                          HIGHBD_CALL_SUFFIX);
 2085|  8.91k|    }
 2086|  3.29k|}
dav1d_filter_sbrow_lr_8bpc:
 2088|  20.1k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|  20.1k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 20.1k]
  ------------------
 2090|  20.1k|    const int y = sby * f->sb_step * 4;
 2091|  20.1k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|  20.1k|    pixel *const sr_p[3] = {
 2093|  20.1k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|  20.1k|#define PXSTRIDE(x) (x)
  ------------------
 2094|  20.1k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  20.1k|#define PXSTRIDE(x) (x)
  ------------------
 2095|  20.1k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  20.1k|#define PXSTRIDE(x) (x)
  ------------------
 2096|  20.1k|    };
 2097|  20.1k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|  20.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  20.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2098|  20.1k|}
dav1d_filter_sbrow_8bpc:
 2100|  72.1k|void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
 2101|  72.1k|    bytefn(dav1d_filter_sbrow_deblock_cols)(f, sby);
  ------------------
  |  |   87|  72.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  72.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2102|  72.1k|    bytefn(dav1d_filter_sbrow_deblock_rows)(f, sby);
  ------------------
  |  |   87|  72.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  72.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2103|  72.1k|    if (f->seq_hdr->cdef)
  ------------------
  |  Branch (2103:9): [True: 32.6k, False: 39.4k]
  ------------------
 2104|  32.6k|        bytefn(dav1d_filter_sbrow_cdef)(f->c->tc, sby);
  ------------------
  |  |   87|  32.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  32.6k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2105|  72.1k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
  ------------------
  |  Branch (2105:9): [True: 3.29k, False: 68.8k]
  ------------------
 2106|  3.29k|        bytefn(dav1d_filter_sbrow_resize)(f, sby);
  ------------------
  |  |   87|  3.29k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  3.29k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2107|  72.1k|    if (f->lf.restore_planes)
  ------------------
  |  Branch (2107:9): [True: 20.1k, False: 51.9k]
  ------------------
 2108|  20.1k|        bytefn(dav1d_filter_sbrow_lr)(f, sby);
  ------------------
  |  |   87|  20.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  20.1k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2109|  72.1k|}
dav1d_backup_ipred_edge_8bpc:
 2111|  78.2k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|  78.2k|    const Dav1dFrameContext *const f = t->f;
 2113|  78.2k|    Dav1dTileState *const ts = t->ts;
 2114|  78.2k|    const int sby = t->by >> f->sb_shift;
 2115|  78.2k|    const int sby_off = f->sb128w * 128 * sby;
 2116|  78.2k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|  78.2k|    const pixel *const y =
 2119|  78.2k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|  78.2k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  78.2k|#define PXSTRIDE(x) (x)
  ------------------
 2121|  78.2k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   47|  78.2k|#define pixel_copy memcpy
  ------------------
 2122|  78.2k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|  78.2k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 37.0k, False: 41.2k]
  ------------------
 2125|  37.0k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|  37.0k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|  37.0k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|  37.0k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  37.0k|#define PXSTRIDE(x) (x)
  ------------------
 2130|   111k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 74.0k, False: 37.0k]
  ------------------
 2131|  74.0k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   47|  74.0k|#define pixel_copy memcpy
  ------------------
 2132|  74.0k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|  74.0k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|  37.0k|    }
 2135|  78.2k|}
dav1d_copy_pal_block_y_8bpc:
 2141|  17.3k|{
 2142|  17.3k|    const Dav1dFrameContext *const f = t->f;
 2143|  17.3k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 0, False: 17.3k]
  ------------------
 2144|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|      0|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  17.3k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  17.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  17.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2147|  82.9k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 65.6k, False: 17.3k]
  ------------------
 2148|  65.6k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|  65.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  65.6k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2149|  71.3k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 54.0k, False: 17.3k]
  ------------------
 2150|  54.0k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|  54.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  54.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2151|  17.3k|}
dav1d_copy_pal_block_uv_8bpc:
 2157|  9.68k|{
 2158|  9.68k|    const Dav1dFrameContext *const f = t->f;
 2159|  9.68k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 0, False: 9.68k]
  ------------------
 2160|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|      0|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  9.68k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|  9.68k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  9.68k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  29.0k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 19.3k, False: 9.68k]
  ------------------
 2165|  93.4k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 74.0k, False: 19.3k]
  ------------------
 2166|  74.0k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  74.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  74.0k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2167|  87.0k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 67.7k, False: 19.3k]
  ------------------
 2168|  67.7k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  67.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  67.7k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2169|  19.3k|    }
 2170|  9.68k|}
dav1d_read_pal_plane_8bpc:
 2175|  26.9k|{
 2176|  26.9k|    Dav1dTileState *const ts = t->ts;
 2177|  26.9k|    const Dav1dFrameContext *const f = t->f;
 2178|  26.9k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  26.9k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  26.9k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  26.9k|    pixel cache[16], used_cache[8];
 2181|  26.9k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 9.68k, False: 17.3k]
  ------------------
 2182|  26.9k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  26.9k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 22.4k, False: 4.54k]
  |  Branch (2184:30): [True: 7.80k, False: 14.6k]
  ------------------
 2185|  26.9k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  26.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  26.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2186|  26.9k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  26.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  26.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|  57.1k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 39.0k, False: 18.1k]
  |  Branch (2189:23): [True: 30.1k, False: 8.85k]
  ------------------
 2190|  30.1k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 10.9k, False: 19.2k]
  ------------------
 2191|  10.9k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 1.90k, False: 9.05k]
  |  Branch (2191:29): [True: 8.83k, False: 214]
  ------------------
 2192|  10.7k|                cache[n_cache++] = *l;
 2193|  10.9k|            l++;
 2194|  10.9k|            l_cache--;
 2195|  19.2k|        } else {
 2196|  19.2k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 7.55k, False: 11.6k]
  ------------------
 2197|  7.55k|                l++;
 2198|  7.55k|                l_cache--;
 2199|  7.55k|            }
 2200|  19.2k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 3.15k, False: 16.0k]
  |  Branch (2200:29): [True: 15.1k, False: 898]
  ------------------
 2201|  18.3k|                cache[n_cache++] = *a;
 2202|  19.2k|            a++;
 2203|  19.2k|            a_cache--;
 2204|  19.2k|        }
 2205|  30.1k|    }
 2206|  26.9k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 8.85k, False: 18.1k]
  ------------------
 2207|  35.6k|        do {
 2208|  35.6k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 6.59k, False: 29.0k]
  |  Branch (2208:29): [True: 22.6k, False: 6.34k]
  ------------------
 2209|  29.2k|                cache[n_cache++] = *l;
 2210|  35.6k|            l++;
 2211|  35.6k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 26.7k, False: 8.85k]
  ------------------
 2212|  18.1k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 6.49k, False: 11.6k]
  ------------------
 2213|  27.4k|        do {
 2214|  27.4k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 4.51k, False: 22.9k]
  |  Branch (2214:29): [True: 17.1k, False: 5.82k]
  ------------------
 2215|  21.6k|                cache[n_cache++] = *a;
 2216|  27.4k|            a++;
 2217|  27.4k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 20.9k, False: 6.49k]
  ------------------
 2218|  6.49k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  26.9k|    int i = 0;
 2222|  97.2k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 73.4k, False: 23.8k]
  |  Branch (2222:36): [True: 70.2k, False: 3.14k]
  ------------------
 2223|  70.2k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|  70.2k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 35.3k, False: 34.9k]
  ------------------
 2224|  35.3k|            used_cache[i++] = cache[n];
 2225|  26.9k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  26.9k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 0, False: 26.9k]
  ------------------
 2229|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|      0|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  26.9k|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|  26.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  26.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2232|  26.9k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 22.5k, False: 4.45k]
  ------------------
 2233|  22.5k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [True: 22.5k, Folded]
  ------------------
 2234|  22.5k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  22.5k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 19.8k, False: 2.65k]
  ------------------
 2237|  19.8k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  19.8k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|  44.9k|            do {
 2241|  44.9k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|  44.9k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|  44.9k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 9.05k, False: 35.8k]
  ------------------
 2244|  25.6k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 16.6k, False: 9.05k]
  ------------------
 2245|  16.6k|                        pal[i] = max;
 2246|  9.05k|                    break;
 2247|  9.05k|                }
 2248|  35.8k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|  35.8k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 25.0k, False: 10.8k]
  ------------------
 2250|  19.8k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  22.5k|        int n = 0, m = n_used_cache;
 2254|   129k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 106k, False: 22.5k]
  ------------------
 2255|   106k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 37.7k, False: 69.0k]
  |  Branch (2255:38): [True: 8.10k, False: 29.6k]
  |  Branch (2255:53): [True: 14.5k, False: 15.0k]
  ------------------
 2256|  22.6k|                pal[i] = used_cache[n++];
 2257|  84.0k|            } else {
 2258|  84.0k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 84.0k, False: 0]
  ------------------
 2259|  84.0k|                pal[i] = pal[m++];
 2260|  84.0k|            }
 2261|   106k|        }
 2262|  22.5k|    } else {
 2263|  4.45k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  4.45k|    }
 2265|       |
 2266|  26.9k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  26.9k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 26.9k]
  |  |  ------------------
  |  |   35|  26.9k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  26.9k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  26.9k|}
dav1d_read_pal_uv_8bpc:
 2280|  9.68k|{
 2281|  9.68k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  9.68k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  9.68k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  9.68k|    Dav1dTileState *const ts = t->ts;
 2285|  9.68k|    const Dav1dFrameContext *const f = t->f;
 2286|  9.68k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 0, False: 9.68k]
  ------------------
 2287|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|      0|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  9.68k|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  9.68k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  9.68k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2290|  9.68k|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [True: 9.68k, Folded]
  ------------------
 2291|  9.68k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  9.68k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 5.14k, False: 4.53k]
  ------------------
 2292|  5.14k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  5.14k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  5.14k|        const int max = (1 << bpc) - 1;
 2295|  20.2k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 15.0k, False: 5.14k]
  ------------------
 2296|  15.0k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  15.0k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  14.5k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 14.5k, False: 543]
  |  Branch (2297:26): [True: 6.78k, False: 7.74k]
  ------------------
 2298|  15.0k|            prev = pal[i] = (prev + delta) & max;
 2299|  15.0k|        }
 2300|  5.14k|    } else {
 2301|  21.8k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 17.2k, False: 4.53k]
  ------------------
 2302|  17.2k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  4.53k|    }
 2304|  9.68k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  9.68k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 9.68k]
  |  |  ------------------
  |  |   35|  9.68k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  9.68k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  9.68k|}
recon_tmpl.c:read_coef_tree:
  736|  1.16M|{
  737|  1.16M|    const Dav1dFrameContext *const f = t->f;
  738|  1.16M|    Dav1dTileState *const ts = t->ts;
  739|  1.16M|    const Dav1dDSPContext *const dsp = f->dsp;
  740|  1.16M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[ytx];
  741|  1.16M|    const int txw = t_dim->w, txh = t_dim->h;
  742|       |
  743|       |    /* y_off can be larger than 3 since lossless blocks use TX_4X4 but can't
  744|       |     * be splitted. Aviods an undefined left shift. */
  745|  1.16M|    if (depth < 2 && tx_split[depth] &&
  ------------------
  |  Branch (745:9): [True: 1.07M, False: 97.0k]
  |  Branch (745:22): [True: 110k, False: 959k]
  ------------------
  746|   110k|        tx_split[depth] & (1 << (y_off * 4 + x_off)))
  ------------------
  |  Branch (746:9): [True: 85.9k, False: 24.9k]
  ------------------
  747|  85.9k|    {
  748|  85.9k|        const enum RectTxfmSize sub = t_dim->sub;
  749|  85.9k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  750|  85.9k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  751|       |
  752|  85.9k|        read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  753|  85.9k|                       x_off * 2 + 0, y_off * 2 + 0, dst);
  754|  85.9k|        t->bx += txsw;
  755|  85.9k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (755:13): [True: 68.0k, False: 17.8k]
  |  Branch (755:27): [True: 67.4k, False: 624]
  ------------------
  756|  67.4k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  757|  67.4k|                           y_off * 2 + 0, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (757:43): [True: 67.4k, False: 0]
  ------------------
  758|  85.9k|        t->bx -= txsw;
  759|  85.9k|        t->by += txsh;
  760|  85.9k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (760:13): [True: 61.2k, False: 24.7k]
  |  Branch (760:27): [True: 60.0k, False: 1.19k]
  ------------------
  761|  60.0k|            if (dst)
  ------------------
  |  Branch (761:17): [True: 60.0k, False: 0]
  ------------------
  762|  60.0k|                dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  60.0k|#define PXSTRIDE(x) (x)
  ------------------
  763|  60.0k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  764|  60.0k|                           x_off * 2 + 0, y_off * 2 + 1, dst);
  765|  60.0k|            t->bx += txsw;
  766|  60.0k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (766:17): [True: 42.1k, False: 17.8k]
  |  Branch (766:31): [True: 41.5k, False: 603]
  ------------------
  767|  41.5k|                read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  768|  41.5k|                               y_off * 2 + 1, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (768:47): [True: 41.5k, False: 0]
  ------------------
  769|  60.0k|            t->bx -= txsw;
  770|  60.0k|        }
  771|  85.9k|        t->by -= txsh;
  772|  1.08M|    } else {
  773|  1.08M|        const int bx4 = t->bx & 31, by4 = t->by & 31;
  774|  1.08M|        enum TxfmType txtp;
  775|  1.08M|        uint8_t cf_ctx;
  776|  1.08M|        int eob;
  777|  1.08M|        coef *cf;
  778|       |
  779|  1.08M|        if (t->frame_thread.pass) {
  ------------------
  |  Branch (779:13): [True: 0, False: 1.08M]
  ------------------
  780|      0|            const int p = t->frame_thread.pass & 1;
  781|      0|            assert(ts->frame_thread[p].cf);
  ------------------
  |  Branch (781:13): [True: 0, False: 0]
  ------------------
  782|      0|            cf = ts->frame_thread[p].cf;
  783|      0|            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
  784|  1.08M|        } else {
  785|  1.08M|            cf = bitfn(t->cf);
  ------------------
  |  |   51|  1.08M|#define bitfn(x) x##_8bpc
  ------------------
  786|  1.08M|        }
  787|  1.08M|        if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (787:13): [True: 1.08M, False: 0]
  ------------------
  788|  1.08M|            eob = decode_coefs(t, &t->a->lcoef[bx4], &t->l.lcoef[by4],
  789|  1.08M|                               ytx, bs, b, 0, 0, cf, &txtp, &cf_ctx);
  790|  1.08M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.08M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.08M]
  |  |  ------------------
  |  |   35|  1.08M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.08M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  791|      0|                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
  792|      0|                       ytx, txtp, eob, ts->msac.rng);
  793|  1.08M|            dav1d_memset_likely_pow2(&t->a->lcoef[bx4], cf_ctx, imin(txw, f->bw - t->bx));
  794|  1.08M|            dav1d_memset_likely_pow2(&t->l.lcoef[by4], cf_ctx, imin(txh, f->bh - t->by));
  795|  1.08M|#define set_ctx(rep_macro) \
  796|  1.08M|            for (int y = 0; y < txh; y++) { \
  797|  1.08M|                rep_macro(txtp_map, 0, txtp); \
  798|  1.08M|                txtp_map += 32; \
  799|  1.08M|            }
  800|  1.08M|            uint8_t *txtp_map = &t->scratch.txtp_map[by4 * 32 + bx4];
  801|  1.08M|            case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  1.08M|    switch (var) { \
  |  |   81|   495k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  796|  1.06M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 572k, False: 495k]
  |  |  |  |  ------------------
  |  |  |  |  797|   572k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|   572k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   572k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   572k|                txtp_map += 32; \
  |  |  |  |  799|   572k|            }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 495k, False: 586k]
  |  |  ------------------
  |  |   82|   278k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  796|   932k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 654k, False: 278k]
  |  |  |  |  ------------------
  |  |  |  |  797|   654k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|   654k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   654k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   654k|                txtp_map += 32; \
  |  |  |  |  799|   654k|            }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 278k, False: 803k]
  |  |  ------------------
  |  |   83|   200k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  796|   833k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 633k, False: 200k]
  |  |  |  |  ------------------
  |  |  |  |  797|   633k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   633k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   633k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   633k|                txtp_map += 32; \
  |  |  |  |  799|   633k|            }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 200k, False: 881k]
  |  |  ------------------
  |  |   84|  64.7k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  796|   420k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 355k, False: 64.7k]
  |  |  |  |  ------------------
  |  |  |  |  797|   355k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   355k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   355k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   355k|                txtp_map += 32; \
  |  |  |  |  799|   355k|            }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 64.7k, False: 1.01M]
  |  |  ------------------
  |  |   85|  42.9k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  796|   638k|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 595k, False: 42.9k]
  |  |  |  |  ------------------
  |  |  |  |  797|   595k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   595k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   595k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   595k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   595k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 595k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   595k|                txtp_map += 32; \
  |  |  |  |  799|   595k|            }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 42.9k, False: 1.03M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 1.08M]
  |  |  ------------------
  |  |   87|  1.08M|    }
  ------------------
  |  Branch (801:13): [Folded, False: 0]
  ------------------
  802|  1.08M|#undef set_ctx
  803|  1.08M|            if (t->frame_thread.pass == 1)
  ------------------
  |  Branch (803:17): [True: 0, False: 1.08M]
  ------------------
  804|      0|                *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  805|  1.08M|        } else {
  806|      0|            const int cbi = *ts->frame_thread[0].cbi++;
  807|      0|            eob  = cbi >> 5;
  808|      0|            txtp = cbi & 0x1f;
  809|      0|        }
  810|  1.08M|        if (!(t->frame_thread.pass & 1)) {
  ------------------
  |  Branch (810:13): [True: 1.08M, False: 0]
  ------------------
  811|  1.08M|            assert(dst);
  ------------------
  |  Branch (811:13): [True: 1.08M, False: 0]
  ------------------
  812|  1.08M|            if (eob >= 0) {
  ------------------
  |  Branch (812:17): [True: 776k, False: 305k]
  ------------------
  813|   776k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   776k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 776k]
  |  |  ------------------
  |  |   35|   776k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   776k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  814|      0|                    coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
  815|   776k|                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob
  816|   776k|                                              HIGHBD_CALL_SUFFIX);
  817|   776k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   776k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 776k]
  |  |  ------------------
  |  |   35|   776k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   776k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  818|      0|                    hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
  819|   776k|            }
  820|  1.08M|        }
  821|  1.08M|    }
  822|  1.16M|}
recon_tmpl.c:decode_coefs:
  327|  7.55M|{
  328|  7.55M|    Dav1dTileState *const ts = t->ts;
  329|  7.55M|    const int chroma = !!plane;
  330|  7.55M|    const Dav1dFrameContext *const f = t->f;
  331|  7.55M|    const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id];
  332|  7.55M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  333|  7.55M|    const int dbg = DEBUG_BLOCK_INFO && plane && 0;
  ------------------
  |  |   34|  7.55M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.55M]
  |  |  ------------------
  |  |   35|  7.55M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.55M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (333:41): [True: 0, False: 0]
  |  Branch (333:50): [Folded, False: 0]
  ------------------
  334|       |
  335|  7.55M|    if (dbg)
  ------------------
  |  Branch (335:9): [Folded, False: 7.55M]
  ------------------
  336|      0|        printf("Start: r=%d\n", ts->msac.rng);
  337|       |
  338|       |    // does this block have any non-zero coefficients
  339|  7.55M|    const int sctx = get_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
  340|  7.55M|    const int all_skip = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  7.55M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  341|  7.55M|                             ts->cdf.coef.skip[t_dim->ctx][sctx]);
  342|  7.55M|    if (dbg)
  ------------------
  |  Branch (342:9): [Folded, False: 7.55M]
  ------------------
  343|      0|        printf("Post-non-zero[%d][%d][%d]: r=%d\n",
  344|      0|               t_dim->ctx, sctx, all_skip, ts->msac.rng);
  345|  7.55M|    if (all_skip) {
  ------------------
  |  Branch (345:9): [True: 3.83M, False: 3.71M]
  ------------------
  346|  3.83M|        *res_ctx = 0x40;
  347|  3.83M|        *txtp = lossless * WHT_WHT; /* lossless ? WHT_WHT : DCT_DCT */
  348|  3.83M|        return -1;
  349|  3.83M|    }
  350|       |
  351|       |    // transform type (chroma: derived, luma: explicitly coded)
  352|  3.71M|    if (lossless) {
  ------------------
  |  Branch (352:9): [True: 590k, False: 3.12M]
  ------------------
  353|   590k|        assert(t_dim->max == TX_4X4);
  ------------------
  |  Branch (353:9): [True: 590k, False: 0]
  ------------------
  354|   590k|        *txtp = WHT_WHT;
  355|  3.12M|    } else if (t_dim->max + intra >= TX_64X64) {
  ------------------
  |  Branch (355:16): [True: 699k, False: 2.42M]
  ------------------
  356|   699k|        *txtp = DCT_DCT;
  357|  2.42M|    } else if (chroma) {
  ------------------
  |  Branch (357:16): [True: 626k, False: 1.80M]
  ------------------
  358|       |        // inferred from either the luma txtp (inter) or a LUT (intra)
  359|   626k|        *txtp = intra ? dav1d_txtp_from_uvmode[b->uv_mode] :
  ------------------
  |  Branch (359:17): [True: 378k, False: 247k]
  ------------------
  360|   626k|                        get_uv_inter_txtp(t_dim, *txtp);
  361|  1.80M|    } else if (!f->frame_hdr->segmentation.qidx[b->seg_id]) {
  ------------------
  |  Branch (361:16): [True: 8.46k, False: 1.79M]
  ------------------
  362|       |        // In libaom, lossless is checked by a literal qidx == 0, but not all
  363|       |        // such blocks are actually lossless. The remainder gets an implicit
  364|       |        // transform type (for luma)
  365|  8.46k|        *txtp = DCT_DCT;
  366|  1.79M|    } else {
  367|  1.79M|        unsigned idx;
  368|  1.79M|        if (intra) {
  ------------------
  |  Branch (368:13): [True: 1.21M, False: 579k]
  ------------------
  369|  1.21M|            const enum IntraPredMode y_mode_nofilt = b->y_mode == FILTER_PRED ?
  ------------------
  |  Branch (369:54): [True: 240k, False: 972k]
  ------------------
  370|   972k|                dav1d_filter_mode_to_y_mode[b->y_angle] : b->y_mode;
  371|  1.21M|            if (f->frame_hdr->reduced_txtp_set || t_dim->min == TX_16X16) {
  ------------------
  |  Branch (371:17): [True: 228k, False: 984k]
  |  Branch (371:51): [True: 153k, False: 831k]
  ------------------
  372|   381k|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   381k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  373|   381k|                          ts->cdf.m.txtp_intra2[t_dim->min][y_mode_nofilt], 4);
  374|   381k|                *txtp = dav1d_tx_types_per_set[idx + 0];
  375|   831k|            } else {
  376|   831k|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   831k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  377|   831k|                          ts->cdf.m.txtp_intra1[t_dim->min][y_mode_nofilt], 6);
  378|   831k|                *txtp = dav1d_tx_types_per_set[idx + 5];
  379|   831k|            }
  380|  1.21M|            if (dbg)
  ------------------
  |  Branch (380:17): [Folded, False: 1.21M]
  ------------------
  381|      0|                printf("Post-txtp-intra[%d->%d][%d][%d->%d]: r=%d\n",
  382|      0|                       tx, t_dim->min, y_mode_nofilt, idx, *txtp, ts->msac.rng);
  383|  1.21M|        } else {
  384|   579k|            if (f->frame_hdr->reduced_txtp_set || t_dim->max == TX_32X32) {
  ------------------
  |  Branch (384:17): [True: 85.7k, False: 493k]
  |  Branch (384:51): [True: 62.6k, False: 430k]
  ------------------
  385|   148k|                idx = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   148k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  386|   148k|                          ts->cdf.m.txtp_inter3[t_dim->min]);
  387|   148k|                *txtp = (idx - 1) & IDTX; /* idx ? DCT_DCT : IDTX */
  388|   430k|            } else if (t_dim->min == TX_16X16) {
  ------------------
  |  Branch (388:24): [True: 62.5k, False: 368k]
  ------------------
  389|  62.5k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  62.5k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  390|  62.5k|                          ts->cdf.m.txtp_inter2, 11);
  391|  62.5k|                *txtp = dav1d_tx_types_per_set[idx + 12];
  392|   368k|            } else {
  393|   368k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   368k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  394|   368k|                          ts->cdf.m.txtp_inter1[t_dim->min], 15);
  395|   368k|                *txtp = dav1d_tx_types_per_set[idx + 24];
  396|   368k|            }
  397|   579k|            if (dbg)
  ------------------
  |  Branch (397:17): [Folded, False: 579k]
  ------------------
  398|      0|                printf("Post-txtp-inter[%d->%d][%d->%d]: r=%d\n",
  399|      0|                       tx, t_dim->min, idx, *txtp, ts->msac.rng);
  400|   579k|        }
  401|  1.79M|    }
  402|       |
  403|       |    // find end-of-block (eob)
  404|  3.71M|    int eob;
  405|  3.71M|    const int slw = imin(t_dim->lw, TX_32X32), slh = imin(t_dim->lh, TX_32X32);
  406|  3.71M|    const int tx2dszctx = slw + slh;
  407|  3.71M|    const enum TxClass tx_class = dav1d_tx_type_class[*txtp];
  408|  3.71M|    const int is_1d = tx_class != TX_CLASS_2D;
  409|  3.71M|    switch (tx2dszctx) {
  ------------------
  |  Branch (409:13): [True: 3.71M, False: 0]
  ------------------
  410|      0|#define case_sz(sz, bin, ns, is_1d) \
  411|      0|    case sz: { \
  412|      0|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  413|      0|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  414|      0|        break; \
  415|      0|    }
  416|   965k|    case_sz(0,   16,  8, [is_1d]);
  ------------------
  |  |  411|   965k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 965k, False: 2.75M]
  |  |  ------------------
  |  |  412|   965k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   965k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   965k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   965k|        break; \
  |  |  415|   965k|    }
  ------------------
  417|   334k|    case_sz(1,   32,  8, [is_1d]);
  ------------------
  |  |  411|   334k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 334k, False: 3.38M]
  |  |  ------------------
  |  |  412|   334k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   334k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   334k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   334k|        break; \
  |  |  415|   334k|    }
  ------------------
  418|   816k|    case_sz(2,   64,  8, [is_1d]);
  ------------------
  |  |  411|   816k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 816k, False: 2.90M]
  |  |  ------------------
  |  |  412|   816k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   816k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   816k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   816k|        break; \
  |  |  415|   816k|    }
  ------------------
  419|   431k|    case_sz(3,  128,  8, [is_1d]);
  ------------------
  |  |  411|   431k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 431k, False: 3.28M]
  |  |  ------------------
  |  |  412|   431k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   431k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   431k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   431k|        break; \
  |  |  415|   431k|    }
  ------------------
  420|   493k|    case_sz(4,  256, 16, [is_1d]);
  ------------------
  |  |  411|   493k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 493k, False: 3.22M]
  |  |  ------------------
  |  |  412|   493k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   493k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   493k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   493k|        break; \
  |  |  415|   493k|    }
  ------------------
  421|   221k|    case_sz(5,  512, 16,        );
  ------------------
  |  |  411|   221k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 221k, False: 3.49M]
  |  |  ------------------
  |  |  412|   221k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   221k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   221k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   221k|        break; \
  |  |  415|   221k|    }
  ------------------
  422|   454k|    case_sz(6, 1024, 16,        );
  ------------------
  |  |  411|   454k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 454k, False: 3.26M]
  |  |  ------------------
  |  |  412|   454k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   454k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   454k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   454k|        break; \
  |  |  415|   454k|    }
  ------------------
  423|  3.71M|#undef case_sz
  424|  3.71M|    }
  425|  3.71M|    if (dbg)
  ------------------
  |  Branch (425:9): [Folded, False: 3.71M]
  ------------------
  426|      0|        printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
  427|      0|               16 << tx2dszctx, chroma, is_1d, eob, ts->msac.rng);
  428|  3.71M|    if (eob > 1) {
  ------------------
  |  Branch (428:9): [True: 2.65M, False: 1.05M]
  ------------------
  429|  2.65M|        const int eob_bin = eob - 2;
  430|  2.65M|        uint16_t *const eob_hi_bit_cdf =
  431|  2.65M|            ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
  432|  2.65M|        const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
  ------------------
  |  |   52|  2.65M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  433|  2.65M|        if (dbg)
  ------------------
  |  Branch (433:13): [Folded, False: 2.65M]
  ------------------
  434|      0|            printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
  435|      0|                   t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
  436|  2.65M|        eob = ((eob_hi_bit | 2) << eob_bin) | dav1d_msac_decode_bools(&ts->msac, eob_bin);
  437|  2.65M|        if (dbg)
  ------------------
  |  Branch (437:13): [Folded, False: 2.65M]
  ------------------
  438|      0|            printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
  439|  2.65M|    }
  440|  3.71M|    assert(eob >= 0);
  ------------------
  |  Branch (440:5): [True: 3.71M, False: 0]
  ------------------
  441|       |
  442|       |    // base tokens
  443|  3.71M|    uint16_t (*const eob_cdf)[4] = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma];
  444|  3.71M|    uint16_t (*const hi_cdf)[4] = ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
  445|  3.71M|    unsigned rc, dc_tok;
  446|       |
  447|  3.71M|    if (eob) {
  ------------------
  |  Branch (447:9): [True: 2.80M, False: 915k]
  ------------------
  448|  2.80M|        uint16_t (*const lo_cdf)[4] = ts->cdf.coef.base_tok[t_dim->ctx][chroma];
  449|  2.80M|        uint8_t *const levels = t->scratch.levels; // bits 0-5: tok, 6-7: lo_tok
  450|       |
  451|       |        /* eob */
  452|  2.80M|        unsigned ctx = 1 + (eob > 2 << tx2dszctx) + (eob > 4 << tx2dszctx);
  453|  2.80M|        int eob_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[ctx], 2);
  ------------------
  |  |   47|  2.80M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  454|  2.80M|        int tok = eob_tok + 1;
  455|  2.80M|        int level_tok = tok * 0x41;
  456|  2.80M|        unsigned mag;
  457|       |
  458|  2.80M|#define DECODE_COEFS_CLASS(tx_class) \
  459|  2.80M|        unsigned x, y; \
  460|  2.80M|        uint8_t *level; \
  461|  2.80M|        if (tx_class == TX_CLASS_2D) \
  462|  2.80M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  463|  2.80M|        else if (tx_class == TX_CLASS_H) \
  464|       |            /* Transposing reduces the stride and padding requirements */ \
  465|  2.80M|            x = eob & mask, y = eob >> shift, rc = eob; \
  466|  2.80M|        else /* tx_class == TX_CLASS_V */ \
  467|  2.80M|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  468|  2.80M|        if (dbg) \
  469|  2.80M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  470|  2.80M|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  471|  2.80M|        if (eob_tok == 2) { \
  472|  2.80M|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  473|  2.80M|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  474|  2.80M|            level_tok = tok + (3 << 6); \
  475|  2.80M|            if (dbg) \
  476|  2.80M|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  477|  2.80M|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  478|  2.80M|                       ts->msac.rng); \
  479|  2.80M|        } \
  480|  2.80M|        cf[rc] = tok << 11; \
  481|  2.80M|        if (tx_class == TX_CLASS_2D) \
  482|  2.80M|            level = levels + rc; \
  483|  2.80M|        else \
  484|  2.80M|            level = levels + x * stride + y; \
  485|  2.80M|        *level = (uint8_t) level_tok; \
  486|  2.80M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  487|  2.80M|            unsigned rc_i; \
  488|  2.80M|            if (tx_class == TX_CLASS_2D) \
  489|  2.80M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  490|  2.80M|            else if (tx_class == TX_CLASS_H) \
  491|  2.80M|                x = i & mask, y = i >> shift, rc_i = i; \
  492|  2.80M|            else /* tx_class == TX_CLASS_V */ \
  493|  2.80M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  494|  2.80M|            assert(x < 32 && y < 32); \
  495|  2.80M|            if (tx_class == TX_CLASS_2D) \
  496|  2.80M|                level = levels + rc_i; \
  497|  2.80M|            else \
  498|  2.80M|                level = levels + x * stride + y; \
  499|  2.80M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  500|  2.80M|            if (tx_class == TX_CLASS_2D) \
  501|  2.80M|                y |= x; \
  502|  2.80M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  503|  2.80M|            if (dbg) \
  504|  2.80M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  505|  2.80M|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  506|  2.80M|            if (tok == 3) { \
  507|  2.80M|                mag &= 63; \
  508|  2.80M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  509|  2.80M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  510|  2.80M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  511|  2.80M|                if (dbg) \
  512|  2.80M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  513|  2.80M|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  514|  2.80M|                           ts->msac.rng); \
  515|  2.80M|                *level = (uint8_t) (tok + (3 << 6)); \
  516|  2.80M|                cf[rc_i] = (tok << 11) | rc; \
  517|  2.80M|                rc = rc_i; \
  518|  2.80M|            } else { \
  519|       |                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  520|  2.80M|                tok *= 0x17ff41; \
  521|  2.80M|                *level = (uint8_t) tok; \
  522|       |                /* tok ? (tok << 11) | rc : 0 */ \
  523|  2.80M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  524|  2.80M|                if (tok) rc = rc_i; \
  525|  2.80M|                cf[rc_i] = tok; \
  526|  2.80M|            } \
  527|  2.80M|        } \
  528|       |        /* dc */ \
  529|  2.80M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  530|  2.80M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  531|  2.80M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  532|  2.80M|        if (dbg) \
  533|  2.80M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  534|  2.80M|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  535|  2.80M|        if (dc_tok == 3) { \
  536|  2.80M|            if (tx_class == TX_CLASS_2D) \
  537|  2.80M|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  538|  2.80M|                      levels[1 * stride + 1]; \
  539|  2.80M|            mag &= 63; \
  540|  2.80M|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  541|  2.80M|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  542|  2.80M|            if (dbg) \
  543|  2.80M|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  544|  2.80M|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  545|  2.80M|        } \
  546|  2.80M|        break
  547|       |
  548|  2.80M|        const uint16_t *scan;
  549|  2.80M|        switch (tx_class) {
  550|  2.54M|        case TX_CLASS_2D: {
  ------------------
  |  Branch (550:9): [True: 2.54M, False: 259k]
  ------------------
  551|  2.54M|            const unsigned nonsquare_tx = tx >= RTX_4X8;
  552|  2.54M|            const uint8_t (*const lo_ctx_offsets)[5] =
  553|  2.54M|                dav1d_lo_ctx_offsets[nonsquare_tx + (tx & nonsquare_tx)];
  554|  2.54M|            scan = dav1d_scans[tx];
  555|  2.54M|            const ptrdiff_t stride = 4 << slh;
  556|  2.54M|            const unsigned shift = slh + 2, shift2 = 0;
  557|  2.54M|            const unsigned mask = (4 << slh) - 1;
  558|  2.54M|            memset(levels, 0, stride * ((4 << slw) + 2));
  559|  2.54M|            DECODE_COEFS_CLASS(TX_CLASS_2D);
  ------------------
  |  |  459|  2.54M|        unsigned x, y; \
  |  |  460|  2.54M|        uint8_t *level; \
  |  |  461|  2.54M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [True: 2.54M, Folded]
  |  |  ------------------
  |  |  462|  2.54M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|  2.54M|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 0]
  |  |  ------------------
  |  |  464|      0|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|      0|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|      0|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|      0|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|  2.54M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 2.54M]
  |  |  ------------------
  |  |  469|  2.54M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|  2.54M|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 57.5k, False: 2.48M]
  |  |  ------------------
  |  |  472|  57.5k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 53.2k, False: 4.29k]
  |  |  |  Branch (472:20): [True: 57.5k, Folded]
  |  |  ------------------
  |  |  473|  57.5k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  57.5k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  57.5k|            level_tok = tok + (3 << 6); \
  |  |  475|  57.5k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 57.5k]
  |  |  ------------------
  |  |  476|  57.5k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  57.5k|        } \
  |  |  480|  2.54M|        cf[rc] = tok << 11; \
  |  |  481|  2.54M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [True: 2.54M, Folded]
  |  |  ------------------
  |  |  482|  2.54M|            level = levels + rc; \
  |  |  483|  2.54M|        else \
  |  |  484|  2.54M|            level = levels + x * stride + y; \
  |  |  485|  2.54M|        *level = (uint8_t) level_tok; \
  |  |  486|  77.4M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 74.9M, False: 2.54M]
  |  |  ------------------
  |  |  487|  74.9M|            unsigned rc_i; \
  |  |  488|  74.9M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [True: 74.9M, Folded]
  |  |  ------------------
  |  |  489|  74.9M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  74.9M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 0]
  |  |  ------------------
  |  |  491|      0|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|      0|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|      0|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  74.9M|            assert(x < 32 && y < 32); \
  |  |  495|  74.9M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [True: 74.9M, Folded]
  |  |  ------------------
  |  |  496|  74.9M|                level = levels + rc_i; \
  |  |  497|  74.9M|            else \
  |  |  498|  74.9M|                level = levels + x * stride + y; \
  |  |  499|  74.9M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  74.9M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [True: 74.9M, Folded]
  |  |  ------------------
  |  |  501|  74.9M|                y |= x; \
  |  |  502|  74.9M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  74.9M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  74.9M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 74.9M]
  |  |  ------------------
  |  |  504|  74.9M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  74.9M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 6.43M, False: 68.5M]
  |  |  ------------------
  |  |  507|  6.43M|                mag &= 63; \
  |  |  508|  6.43M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 5.36M, False: 1.07M]
  |  |  ------------------
  |  |  509|  6.43M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 1.58M, False: 4.84M]
  |  |  ------------------
  |  |  510|  6.43M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  6.43M|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|  6.43M|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 6.43M]
  |  |  ------------------
  |  |  512|  6.43M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|  6.43M|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|  6.43M|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|  6.43M|                rc = rc_i; \
  |  |  518|  68.5M|            } else { \
  |  |  519|  68.5M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  68.5M|                tok *= 0x17ff41; \
  |  |  521|  68.5M|                *level = (uint8_t) tok; \
  |  |  522|  68.5M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  68.5M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  68.5M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 19.7M, False: 48.7M]
  |  |  ------------------
  |  |  525|  68.5M|                cf[rc_i] = tok; \
  |  |  526|  68.5M|            } \
  |  |  527|  74.9M|        } \
  |  |  528|  2.54M|        /* dc */ \
  |  |  529|  2.54M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [True: 2.54M, Folded]
  |  |  ------------------
  |  |  530|  2.54M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|  2.54M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  2.54M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|  2.54M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 2.54M]
  |  |  ------------------
  |  |  533|  2.54M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|  2.54M|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 973k, False: 1.56M]
  |  |  ------------------
  |  |  536|   973k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [True: 973k, Folded]
  |  |  ------------------
  |  |  537|   973k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|   973k|                      levels[1 * stride + 1]; \
  |  |  539|   973k|            mag &= 63; \
  |  |  540|   973k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 123k, False: 849k]
  |  |  ------------------
  |  |  541|   973k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   973k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|   973k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 973k]
  |  |  ------------------
  |  |  543|   973k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|   973k|        } \
  |  |  546|  2.54M|        break
  ------------------
  |  Branch (559:13): [True: 74.9M, False: 0]
  |  Branch (559:13): [True: 74.9M, False: 0]
  ------------------
  560|  2.54M|        }
  561|   172k|        case TX_CLASS_H: {
  ------------------
  |  Branch (561:9): [True: 172k, False: 2.62M]
  ------------------
  562|   172k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  563|   172k|            const ptrdiff_t stride = 16;
  564|   172k|            const unsigned shift = slh + 2, shift2 = 0;
  565|   172k|            const unsigned mask = (4 << slh) - 1;
  566|   172k|            memset(levels, 0, stride * ((4 << slh) + 2));
  567|   172k|            DECODE_COEFS_CLASS(TX_CLASS_H);
  ------------------
  |  |  459|   172k|        unsigned x, y; \
  |  |  460|   172k|        uint8_t *level; \
  |  |  461|   172k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 172k]
  |  |  ------------------
  |  |  462|   172k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|   172k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [True: 172k, Folded]
  |  |  ------------------
  |  |  464|   172k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|   172k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|   172k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|   172k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|   172k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 172k]
  |  |  ------------------
  |  |  469|   172k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|   172k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 4.37k, False: 168k]
  |  |  ------------------
  |  |  472|  4.37k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 3.61k, False: 752]
  |  |  |  Branch (472:20): [Folded, False: 4.37k]
  |  |  ------------------
  |  |  473|  4.37k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  4.37k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  4.37k|            level_tok = tok + (3 << 6); \
  |  |  475|  4.37k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 4.37k]
  |  |  ------------------
  |  |  476|  4.37k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  4.37k|        } \
  |  |  480|   172k|        cf[rc] = tok << 11; \
  |  |  481|   172k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 172k]
  |  |  ------------------
  |  |  482|   172k|            level = levels + rc; \
  |  |  483|   172k|        else \
  |  |  484|   172k|            level = levels + x * stride + y; \
  |  |  485|   172k|        *level = (uint8_t) level_tok; \
  |  |  486|  3.55M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 3.38M, False: 172k]
  |  |  ------------------
  |  |  487|  3.38M|            unsigned rc_i; \
  |  |  488|  3.38M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 3.38M]
  |  |  ------------------
  |  |  489|  3.38M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  3.38M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [True: 3.38M, Folded]
  |  |  ------------------
  |  |  491|  3.38M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  3.38M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  3.38M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  3.38M|            assert(x < 32 && y < 32); \
  |  |  495|  3.38M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 3.38M]
  |  |  ------------------
  |  |  496|  3.38M|                level = levels + rc_i; \
  |  |  497|  3.38M|            else \
  |  |  498|  3.38M|                level = levels + x * stride + y; \
  |  |  499|  3.38M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  3.38M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 3.38M]
  |  |  ------------------
  |  |  501|  3.38M|                y |= x; \
  |  |  502|  3.38M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  3.38M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  3.38M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 3.38M]
  |  |  ------------------
  |  |  504|  3.38M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  3.38M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 194k, False: 3.19M]
  |  |  ------------------
  |  |  507|   194k|                mag &= 63; \
  |  |  508|   194k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 120k, False: 74.2k]
  |  |  ------------------
  |  |  509|   194k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 27.2k, False: 167k]
  |  |  ------------------
  |  |  510|   194k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   194k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|   194k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 194k]
  |  |  ------------------
  |  |  512|   194k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|   194k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|   194k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|   194k|                rc = rc_i; \
  |  |  518|  3.19M|            } else { \
  |  |  519|  3.19M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  3.19M|                tok *= 0x17ff41; \
  |  |  521|  3.19M|                *level = (uint8_t) tok; \
  |  |  522|  3.19M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  3.19M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  3.19M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 847k, False: 2.34M]
  |  |  ------------------
  |  |  525|  3.19M|                cf[rc_i] = tok; \
  |  |  526|  3.19M|            } \
  |  |  527|  3.38M|        } \
  |  |  528|   172k|        /* dc */ \
  |  |  529|   172k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 172k]
  |  |  ------------------
  |  |  530|   172k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|   172k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|   172k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|   172k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 172k]
  |  |  ------------------
  |  |  533|   172k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|   172k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 24.6k, False: 148k]
  |  |  ------------------
  |  |  536|  24.6k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 24.6k]
  |  |  ------------------
  |  |  537|  24.6k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  24.6k|            mag &= 63; \
  |  |  540|  24.6k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 4.94k, False: 19.7k]
  |  |  ------------------
  |  |  541|  24.6k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  24.6k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  24.6k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 24.6k]
  |  |  ------------------
  |  |  543|  24.6k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  24.6k|        } \
  |  |  546|   172k|        break
  ------------------
  |  Branch (567:13): [True: 3.38M, False: 0]
  |  Branch (567:13): [True: 3.38M, False: 0]
  ------------------
  568|   172k|        }
  569|  86.4k|        case TX_CLASS_V: {
  ------------------
  |  Branch (569:9): [True: 86.4k, False: 2.71M]
  ------------------
  570|  86.4k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  571|  86.4k|            const ptrdiff_t stride = 16;
  572|  86.4k|            const unsigned shift = slw + 2, shift2 = slh + 2;
  573|  86.4k|            const unsigned mask = (4 << slw) - 1;
  574|  86.4k|            memset(levels, 0, stride * ((4 << slw) + 2));
  575|  86.4k|            DECODE_COEFS_CLASS(TX_CLASS_V);
  ------------------
  |  |  459|  86.4k|        unsigned x, y; \
  |  |  460|  86.4k|        uint8_t *level; \
  |  |  461|  86.4k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  462|  86.4k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|  86.4k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  464|  86.4k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|  86.4k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|  86.4k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|  86.4k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|  86.4k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  469|  86.4k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|  86.4k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 2.80k, False: 83.5k]
  |  |  ------------------
  |  |  472|  2.80k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 2.10k, False: 697]
  |  |  |  Branch (472:20): [Folded, False: 2.80k]
  |  |  ------------------
  |  |  473|  2.80k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  2.80k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  2.80k|            level_tok = tok + (3 << 6); \
  |  |  475|  2.80k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 2.80k]
  |  |  ------------------
  |  |  476|  2.80k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  2.80k|        } \
  |  |  480|  86.4k|        cf[rc] = tok << 11; \
  |  |  481|  86.4k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  482|  86.4k|            level = levels + rc; \
  |  |  483|  86.4k|        else \
  |  |  484|  86.4k|            level = levels + x * stride + y; \
  |  |  485|  86.4k|        *level = (uint8_t) level_tok; \
  |  |  486|  1.71M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 1.62M, False: 86.4k]
  |  |  ------------------
  |  |  487|  1.62M|            unsigned rc_i; \
  |  |  488|  1.62M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 1.62M]
  |  |  ------------------
  |  |  489|  1.62M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  1.62M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 1.62M]
  |  |  ------------------
  |  |  491|  1.62M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  1.62M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  1.62M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  1.62M|            assert(x < 32 && y < 32); \
  |  |  495|  1.62M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 1.62M]
  |  |  ------------------
  |  |  496|  1.62M|                level = levels + rc_i; \
  |  |  497|  1.62M|            else \
  |  |  498|  1.62M|                level = levels + x * stride + y; \
  |  |  499|  1.62M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  1.62M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 1.62M]
  |  |  ------------------
  |  |  501|  1.62M|                y |= x; \
  |  |  502|  1.62M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  1.62M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  1.62M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 1.62M]
  |  |  ------------------
  |  |  504|  1.62M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  1.62M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 61.7k, False: 1.56M]
  |  |  ------------------
  |  |  507|  61.7k|                mag &= 63; \
  |  |  508|  61.7k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 35.9k, False: 25.8k]
  |  |  ------------------
  |  |  509|  61.7k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 7.10k, False: 54.6k]
  |  |  ------------------
  |  |  510|  61.7k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  61.7k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|  61.7k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 61.7k]
  |  |  ------------------
  |  |  512|  61.7k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|  61.7k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|  61.7k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|  61.7k|                rc = rc_i; \
  |  |  518|  1.56M|            } else { \
  |  |  519|  1.56M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  1.56M|                tok *= 0x17ff41; \
  |  |  521|  1.56M|                *level = (uint8_t) tok; \
  |  |  522|  1.56M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  1.56M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  1.56M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 371k, False: 1.19M]
  |  |  ------------------
  |  |  525|  1.56M|                cf[rc_i] = tok; \
  |  |  526|  1.56M|            } \
  |  |  527|  1.62M|        } \
  |  |  528|  86.4k|        /* dc */ \
  |  |  529|  86.4k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  530|  86.4k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|  86.4k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  86.4k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|  86.4k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 86.4k]
  |  |  ------------------
  |  |  533|  86.4k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|  86.4k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 8.37k, False: 78.0k]
  |  |  ------------------
  |  |  536|  8.37k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 8.37k]
  |  |  ------------------
  |  |  537|  8.37k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  8.37k|            mag &= 63; \
  |  |  540|  8.37k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 1.60k, False: 6.76k]
  |  |  ------------------
  |  |  541|  8.37k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  8.37k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  8.37k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 8.37k]
  |  |  ------------------
  |  |  543|  8.37k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  8.37k|        } \
  |  |  546|  86.4k|        break
  ------------------
  |  Branch (575:13): [True: 1.62M, False: 0]
  |  Branch (575:13): [True: 1.62M, False: 0]
  ------------------
  576|  86.4k|        }
  577|      0|#undef DECODE_COEFS_CLASS
  578|      0|        default: assert(0);
  ------------------
  |  Branch (578:9): [True: 0, False: 2.80M]
  |  Branch (578:18): [Folded, False: 0]
  ------------------
  579|  2.80M|        }
  580|  2.80M|    } else { // dc-only
  581|   915k|        int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[0], 2);
  ------------------
  |  |   47|   915k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  582|   915k|        dc_tok = 1 + tok_br;
  583|   915k|        if (dbg)
  ------------------
  |  Branch (583:13): [Folded, False: 915k]
  ------------------
  584|      0|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
  585|      0|                   t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
  586|   915k|        if (tok_br == 2) {
  ------------------
  |  Branch (586:13): [True: 62.0k, False: 853k]
  ------------------
  587|  62.0k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[0]);
  ------------------
  |  |   49|  62.0k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  ------------------
  588|  62.0k|            if (dbg)
  ------------------
  |  Branch (588:17): [Folded, False: 62.0k]
  ------------------
  589|      0|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n",
  590|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng);
  591|  62.0k|        }
  592|   915k|        rc = 0;
  593|   915k|    }
  594|       |
  595|       |    // residual and sign
  596|  3.71M|    const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
  597|  3.71M|    const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL;
  ------------------
  |  Branch (597:35): [True: 2.65M, False: 1.05M]
  ------------------
  598|  3.71M|    const int dq_shift = imax(0, t_dim->ctx - 2);
  599|  3.71M|    const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
  ------------------
  |  Branch (599:36): [True: 1.67M, Folded]
  ------------------
  600|  3.71M|    unsigned cul_level, dc_sign_level;
  601|       |
  602|  3.71M|    if (!dc_tok) {
  ------------------
  |  Branch (602:9): [True: 703k, False: 3.01M]
  ------------------
  603|   703k|        cul_level = 0;
  604|   703k|        dc_sign_level = 1 << 6;
  605|   703k|        if (qm_tbl) goto ac_qm;
  ------------------
  |  Branch (605:13): [True: 78.6k, False: 624k]
  ------------------
  606|   624k|        goto ac_noqm;
  607|   703k|    }
  608|       |
  609|  3.01M|    const int dc_sign_ctx = get_dc_sign_ctx(tx, a, l);
  610|  3.01M|    uint16_t *const dc_sign_cdf = ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
  611|  3.01M|    const int dc_sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
  ------------------
  |  |   52|  3.01M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  612|  3.01M|    if (dbg)
  ------------------
  |  Branch (612:9): [Folded, False: 3.01M]
  ------------------
  613|      0|        printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
  614|      0|               chroma, dc_sign_ctx, dc_sign, ts->msac.rng);
  615|       |
  616|  3.01M|    int dc_dq = dq_tbl[0];
  617|  3.01M|    dc_sign_level = (dc_sign - 1) & (2 << 6);
  618|       |
  619|  3.01M|    if (qm_tbl) {
  ------------------
  |  Branch (619:9): [True: 400k, False: 2.61M]
  ------------------
  620|   400k|        dc_dq = (dc_dq * qm_tbl[0] + 16) >> 5;
  621|       |
  622|   400k|        if (dc_tok == 15) {
  ------------------
  |  Branch (622:13): [True: 22.8k, False: 377k]
  ------------------
  623|  22.8k|            dc_tok = read_golomb(&ts->msac) + 15;
  624|  22.8k|            if (dbg)
  ------------------
  |  Branch (624:17): [Folded, False: 22.8k]
  ------------------
  625|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  626|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  627|       |
  628|  22.8k|            dc_tok &= 0xfffff;
  629|  22.8k|            dc_dq = (dc_dq * dc_tok) & 0xffffff;
  630|   377k|        } else {
  631|   377k|            dc_dq *= dc_tok;
  632|   377k|            assert(dc_dq <= 0xffffff);
  ------------------
  |  Branch (632:13): [True: 377k, False: 0]
  ------------------
  633|   377k|        }
  634|   400k|        cul_level = dc_tok;
  635|   400k|        dc_dq >>= dq_shift;
  636|   400k|        dc_dq = umin(dc_dq, cf_max + dc_sign);
  637|   400k|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (637:25): [True: 184k, False: 216k]
  ------------------
  638|       |
  639|   702k|        if (rc) ac_qm: {
  ------------------
  |  Branch (639:13): [True: 311k, False: 88.7k]
  ------------------
  640|   702k|            const unsigned ac_dq = dq_tbl[1];
  641|  8.05M|            do {
  642|  8.05M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  8.05M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  643|  8.05M|                if (dbg)
  ------------------
  |  Branch (643:21): [Folded, False: 8.05M]
  ------------------
  644|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  645|  8.05M|                const unsigned rc_tok = cf[rc];
  646|  8.05M|                unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5;
  647|  8.05M|                int dq_sat;
  648|       |
  649|  8.05M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (649:21): [True: 510k, False: 7.53M]
  ------------------
  650|   510k|                    tok = read_golomb(&ts->msac) + 15;
  651|   510k|                    if (dbg)
  ------------------
  |  Branch (651:25): [Folded, False: 510k]
  ------------------
  652|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  653|      0|                               rc, tok - 15, tok, ts->msac.rng);
  654|       |
  655|   510k|                    tok &= 0xfffff;
  656|   510k|                    dq = (dq * tok) & 0xffffff;
  657|  7.53M|                } else {
  658|  7.53M|                    tok = rc_tok >> 11;
  659|  7.53M|                    dq *= tok;
  660|  7.53M|                    assert(dq <= 0xffffff);
  ------------------
  |  Branch (660:21): [True: 7.53M, False: 0]
  ------------------
  661|  7.53M|                }
  662|  8.05M|                cul_level += tok;
  663|  8.05M|                dq >>= dq_shift;
  664|  8.05M|                dq_sat = umin(dq, cf_max + sign);
  665|  8.05M|                cf[rc] = (coef) (sign ? -dq_sat : dq_sat);
  ------------------
  |  Branch (665:34): [True: 4.13M, False: 3.91M]
  ------------------
  666|       |
  667|  8.05M|                rc = rc_tok & 0x3ff;
  668|  8.05M|            } while (rc);
  ------------------
  |  Branch (668:22): [True: 7.65M, False: 390k]
  ------------------
  669|   702k|        }
  670|  2.61M|    } else {
  671|       |        // non-qmatrix is the common case and allows for additional optimizations
  672|  2.61M|        if (dc_tok == 15) {
  ------------------
  |  Branch (672:13): [True: 94.6k, False: 2.51M]
  ------------------
  673|  94.6k|            dc_tok = read_golomb(&ts->msac) + 15;
  674|  94.6k|            if (dbg)
  ------------------
  |  Branch (674:17): [Folded, False: 94.6k]
  ------------------
  675|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  676|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  677|       |
  678|  94.6k|            dc_tok &= 0xfffff;
  679|  94.6k|            dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift;
  680|  94.6k|            dc_dq = umin(dc_dq, cf_max + dc_sign);
  681|  2.51M|        } else {
  682|  2.51M|            dc_dq = ((dc_dq * dc_tok) >> dq_shift);
  683|  2.51M|            assert(dc_dq <= cf_max);
  ------------------
  |  Branch (683:13): [True: 2.51M, False: 0]
  ------------------
  684|  2.51M|        }
  685|  2.61M|        cul_level = dc_tok;
  686|  2.61M|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (686:25): [True: 1.31M, False: 1.29M]
  ------------------
  687|       |
  688|  4.19M|        if (rc) ac_noqm: {
  ------------------
  |  Branch (688:13): [True: 1.78M, False: 826k]
  ------------------
  689|  4.19M|            const unsigned ac_dq = dq_tbl[1];
  690|  22.3M|            do {
  691|  22.3M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  22.3M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  692|  22.3M|                if (dbg)
  ------------------
  |  Branch (692:21): [Folded, False: 22.3M]
  ------------------
  693|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  694|  22.3M|                const unsigned rc_tok = cf[rc];
  695|  22.3M|                unsigned tok;
  696|  22.3M|                int dq;
  697|       |
  698|       |                // residual
  699|  22.3M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (699:21): [True: 522k, False: 21.8M]
  ------------------
  700|   522k|                    tok = read_golomb(&ts->msac) + 15;
  701|   522k|                    if (dbg)
  ------------------
  |  Branch (701:25): [Folded, False: 522k]
  ------------------
  702|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  703|      0|                               rc, tok - 15, tok, ts->msac.rng);
  704|       |
  705|       |                    // coefficient parsing, see 5.11.39
  706|   522k|                    tok &= 0xfffff;
  707|       |
  708|       |                    // dequant, see 7.12.3
  709|   522k|                    dq = ((ac_dq * tok) & 0xffffff) >> dq_shift;
  710|   522k|                    dq = umin(dq, cf_max + sign);
  711|  21.8M|                } else {
  712|       |                    // cannot exceed cf_max, so we can avoid the clipping
  713|  21.8M|                    tok = rc_tok >> 11;
  714|  21.8M|                    dq = ((ac_dq * tok) >> dq_shift);
  715|  21.8M|                    assert(dq <= cf_max);
  ------------------
  |  Branch (715:21): [True: 21.8M, False: 0]
  ------------------
  716|  21.8M|                }
  717|  22.3M|                cul_level += tok;
  718|  22.3M|                cf[rc] = (coef) (sign ? -dq : dq);
  ------------------
  |  Branch (718:34): [True: 11.3M, False: 11.0M]
  ------------------
  719|       |
  720|  22.3M|                rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob
  721|  22.3M|            } while (rc);
  ------------------
  |  Branch (721:22): [True: 19.9M, False: 2.41M]
  ------------------
  722|  4.19M|        }
  723|  2.61M|    }
  724|       |
  725|       |    // context
  726|  3.71M|    *res_ctx = umin(cul_level, 63) | dc_sign_level;
  727|       |
  728|  3.71M|    return eob;
  729|  3.01M|}
recon_tmpl.c:get_skip_ctx:
   65|  7.55M|{
   66|  7.55M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
   67|       |
   68|  7.55M|    if (chroma) {
  ------------------
  |  Branch (68:9): [True: 4.04M, False: 3.50M]
  ------------------
   69|  4.04M|        const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   70|  4.04M|        const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   71|  4.04M|        const int not_one_blk = b_dim[2] - (!!b_dim[2] && ss_hor) > t_dim->lw ||
  ------------------
  |  Branch (71:33): [True: 1.21M, False: 2.82M]
  |  Branch (71:45): [True: 3.66M, False: 377k]
  |  Branch (71:59): [True: 682k, False: 2.98M]
  ------------------
   72|  2.82M|                                b_dim[3] - (!!b_dim[3] && ss_ver) > t_dim->lh;
  ------------------
  |  Branch (72:33): [True: 81.0k, False: 2.74M]
  |  Branch (72:45): [True: 2.20M, False: 623k]
  |  Branch (72:59): [True: 367k, False: 1.83M]
  ------------------
   73|  4.04M|        unsigned ca, cl;
   74|       |
   75|  4.04M|#define MERGE_CTX(dir, type, no_val) \
   76|  4.04M|        c##dir = *(const type *) dir != no_val; \
   77|  4.04M|        break
   78|       |
   79|  4.04M|        switch (t_dim->lw) {
   80|       |        /* For some reason the MSVC CRT _wassert() function is not flagged as
   81|       |         * __declspec(noreturn), so when using those headers the compiler will
   82|       |         * expect execution to continue after an assertion has been triggered
   83|       |         * and will therefore complain about the use of uninitialized variables
   84|       |         * when compiled in debug mode if we put the default case at the end. */
   85|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (85:9): [True: 0, False: 4.04M]
  |  Branch (85:18): [Folded, False: 0]
  ------------------
   86|   992k|        case TX_4X4:   MERGE_CTX(a, uint8_t,  0x40);
  ------------------
  |  |   76|   992k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   992k|        break
  ------------------
  |  Branch (86:9): [True: 992k, False: 3.05M]
  ------------------
   87|   903k|        case TX_8X8:   MERGE_CTX(a, uint16_t, 0x4040);
  ------------------
  |  |   76|   903k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   903k|        break
  ------------------
  |  Branch (87:9): [True: 903k, False: 3.14M]
  ------------------
   88|   938k|        case TX_16X16: MERGE_CTX(a, uint32_t, 0x40404040U);
  ------------------
  |  |   76|   938k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   938k|        break
  ------------------
  |  Branch (88:9): [True: 938k, False: 3.10M]
  ------------------
   89|  1.21M|        case TX_32X32: MERGE_CTX(a, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  1.21M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.21M|        break
  ------------------
  |  Branch (89:9): [True: 1.21M, False: 2.83M]
  ------------------
   90|  4.04M|        }
   91|  4.04M|        switch (t_dim->lh) {
   92|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (92:9): [True: 0, False: 4.04M]
  |  Branch (92:18): [Folded, False: 0]
  ------------------
   93|  1.27M|        case TX_4X4:   MERGE_CTX(l, uint8_t,  0x40);
  ------------------
  |  |   76|  1.27M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.27M|        break
  ------------------
  |  Branch (93:9): [True: 1.27M, False: 2.77M]
  ------------------
   94|  1.00M|        case TX_8X8:   MERGE_CTX(l, uint16_t, 0x4040);
  ------------------
  |  |   76|  1.00M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.00M|        break
  ------------------
  |  Branch (94:9): [True: 1.00M, False: 3.04M]
  ------------------
   95|   749k|        case TX_16X16: MERGE_CTX(l, uint32_t, 0x40404040U);
  ------------------
  |  |   76|   749k|        c##dir = *(const type *) dir != no_val; \
  |  |   77|   749k|        break
  ------------------
  |  Branch (95:9): [True: 749k, False: 3.29M]
  ------------------
   96|  1.02M|        case TX_32X32: MERGE_CTX(l, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  1.02M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.02M|        break
  ------------------
  |  Branch (96:9): [True: 1.02M, False: 3.02M]
  ------------------
   97|  4.04M|        }
   98|  4.04M|#undef MERGE_CTX
   99|       |
  100|  4.04M|        return 7 + not_one_blk * 3 + ca + cl;
  101|  4.04M|    } else if (b_dim[2] == t_dim->lw && b_dim[3] == t_dim->lh) {
  ------------------
  |  Branch (101:16): [True: 1.86M, False: 1.63M]
  |  Branch (101:41): [True: 1.78M, False: 88.4k]
  ------------------
  102|  1.78M|        return 0;
  103|  1.78M|    } else {
  104|  1.72M|        unsigned la, ll;
  105|       |
  106|  1.72M|#define MERGE_CTX(dir, type, tx) \
  107|  1.72M|        if (tx == TX_64X64) { \
  108|  1.72M|            uint64_t tmp = *(const uint64_t *) dir; \
  109|  1.72M|            tmp |= *(const uint64_t *) &dir[8]; \
  110|  1.72M|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  111|  1.72M|        } else \
  112|  1.72M|            l##dir = *(const type *) dir; \
  113|  1.72M|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  114|  1.72M|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  115|  1.72M|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  116|  1.72M|        break
  117|       |
  118|  1.72M|        switch (t_dim->lw) {
  119|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (119:9): [True: 0, False: 1.72M]
  |  Branch (119:18): [Folded, False: 0]
  ------------------
  120|   883k|        case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4);
  ------------------
  |  |  107|   883k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 883k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   883k|            l##dir = *(const type *) dir; \
  |  |  113|   883k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 883k]
  |  |  ------------------
  |  |  114|   883k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 883k]
  |  |  ------------------
  |  |  115|   883k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 883k]
  |  |  ------------------
  |  |  116|   883k|        break
  ------------------
  |  Branch (120:9): [True: 883k, False: 842k]
  ------------------
  121|   452k|        case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8);
  ------------------
  |  |  107|   452k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 452k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   452k|            l##dir = *(const type *) dir; \
  |  |  113|   452k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 452k]
  |  |  ------------------
  |  |  114|   452k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 452k]
  |  |  ------------------
  |  |  115|   452k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 452k, Folded]
  |  |  ------------------
  |  |  116|   452k|        break
  ------------------
  |  Branch (121:9): [True: 452k, False: 1.27M]
  ------------------
  122|   244k|        case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16);
  ------------------
  |  |  107|   244k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 244k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   244k|            l##dir = *(const type *) dir; \
  |  |  113|   244k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 244k]
  |  |  ------------------
  |  |  114|   244k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 244k, Folded]
  |  |  ------------------
  |  |  115|   244k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 244k, Folded]
  |  |  ------------------
  |  |  116|   244k|        break
  ------------------
  |  Branch (122:9): [True: 244k, False: 1.48M]
  ------------------
  123|  26.8k|        case TX_32X32: MERGE_CTX(a, uint32_t, TX_32X32);
  ------------------
  |  |  107|  26.8k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 26.8k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  26.8k|            l##dir = *(const type *) dir; \
  |  |  113|  26.8k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 26.8k, Folded]
  |  |  ------------------
  |  |  114|  26.8k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 26.8k, Folded]
  |  |  ------------------
  |  |  115|  26.8k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 26.8k, Folded]
  |  |  ------------------
  |  |  116|  26.8k|        break
  ------------------
  |  Branch (123:9): [True: 26.8k, False: 1.69M]
  ------------------
  124|   119k|        case TX_64X64: MERGE_CTX(a, uint32_t, TX_64X64);
  ------------------
  |  |  107|   119k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  108|   119k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   119k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   119k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   119k|        } else \
  |  |  112|   119k|            l##dir = *(const type *) dir; \
  |  |  113|   119k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 119k]
  |  |  ------------------
  |  |  114|   119k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  115|   119k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  116|   119k|        break
  ------------------
  |  Branch (124:9): [True: 119k, False: 1.60M]
  ------------------
  125|  1.72M|        }
  126|  1.72M|        switch (t_dim->lh) {
  127|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (127:9): [True: 0, False: 1.72M]
  |  Branch (127:18): [Folded, False: 0]
  ------------------
  128|   892k|        case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4);
  ------------------
  |  |  107|   892k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 892k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   892k|            l##dir = *(const type *) dir; \
  |  |  113|   892k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 892k]
  |  |  ------------------
  |  |  114|   892k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 892k]
  |  |  ------------------
  |  |  115|   892k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 892k]
  |  |  ------------------
  |  |  116|   892k|        break
  ------------------
  |  Branch (128:9): [True: 892k, False: 833k]
  ------------------
  129|   447k|        case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8);
  ------------------
  |  |  107|   447k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 447k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   447k|            l##dir = *(const type *) dir; \
  |  |  113|   447k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 447k]
  |  |  ------------------
  |  |  114|   447k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 447k]
  |  |  ------------------
  |  |  115|   447k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 447k, Folded]
  |  |  ------------------
  |  |  116|   447k|        break
  ------------------
  |  Branch (129:9): [True: 447k, False: 1.27M]
  ------------------
  130|   239k|        case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16);
  ------------------
  |  |  107|   239k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 239k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   239k|            l##dir = *(const type *) dir; \
  |  |  113|   239k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 239k]
  |  |  ------------------
  |  |  114|   239k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 239k, Folded]
  |  |  ------------------
  |  |  115|   239k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 239k, Folded]
  |  |  ------------------
  |  |  116|   239k|        break
  ------------------
  |  Branch (130:9): [True: 239k, False: 1.48M]
  ------------------
  131|  26.7k|        case TX_32X32: MERGE_CTX(l, uint32_t, TX_32X32);
  ------------------
  |  |  107|  26.7k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 26.7k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  26.7k|            l##dir = *(const type *) dir; \
  |  |  113|  26.7k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 26.7k, Folded]
  |  |  ------------------
  |  |  114|  26.7k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 26.7k, Folded]
  |  |  ------------------
  |  |  115|  26.7k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 26.7k, Folded]
  |  |  ------------------
  |  |  116|  26.7k|        break
  ------------------
  |  Branch (131:9): [True: 26.7k, False: 1.69M]
  ------------------
  132|   119k|        case TX_64X64: MERGE_CTX(l, uint32_t, TX_64X64);
  ------------------
  |  |  107|   119k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  108|   119k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   119k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   119k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   119k|        } else \
  |  |  112|   119k|            l##dir = *(const type *) dir; \
  |  |  113|   119k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 119k]
  |  |  ------------------
  |  |  114|   119k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  115|   119k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 119k, Folded]
  |  |  ------------------
  |  |  116|   119k|        break
  ------------------
  |  Branch (132:9): [True: 119k, False: 1.60M]
  ------------------
  133|  1.72M|        }
  134|  1.72M|#undef MERGE_CTX
  135|       |
  136|  1.72M|        return dav1d_skip_ctx[umin(la & 0x3F, 4)][umin(ll & 0x3F, 4)];
  137|  1.72M|    }
  138|  7.55M|}
recon_tmpl.c:get_lo_ctx:
  304|  80.2M|{
  305|  80.2M|    unsigned mag = levels[0 * stride + 1] + levels[1 * stride + 0];
  306|  80.2M|    unsigned offset;
  307|  80.2M|    if (tx_class == TX_CLASS_2D) {
  ------------------
  |  Branch (307:9): [True: 74.9M, False: 5.27M]
  ------------------
  308|  74.9M|        mag += levels[1 * stride + 1];
  309|  74.9M|        *hi_mag = mag;
  310|  74.9M|        mag += levels[0 * stride + 2] + levels[2 * stride + 0];
  311|  74.9M|        offset = ctx_offsets[umin(y, 4)][umin(x, 4)];
  312|  74.9M|    } else {
  313|  5.27M|        mag += levels[0 * stride + 2];
  314|  5.27M|        *hi_mag = mag;
  315|  5.27M|        mag += levels[0 * stride + 3] + levels[0 * stride + 4];
  316|  5.27M|        offset = 26 + (y > 1 ? 10 : y * 5);
  ------------------
  |  Branch (316:24): [True: 2.44M, False: 2.83M]
  ------------------
  317|  5.27M|    }
  318|  80.2M|    return offset + (mag > 512 ? 4 : (mag + 64) >> 7);
  ------------------
  |  Branch (318:22): [True: 5.72M, False: 74.4M]
  ------------------
  319|  80.2M|}
recon_tmpl.c:get_dc_sign_ctx:
  143|  3.01M|{
  144|  3.01M|    uint64_t mask = 0xC0C0C0C0C0C0C0C0ULL, mul = 0x0101010101010101ULL;
  145|  3.01M|    int s;
  146|       |
  147|  3.01M|#if ARCH_X86_64 && defined(__GNUC__)
  148|       |    /* Coerce compilers into producing better code. For some reason
  149|       |     * every x86-64 compiler is awful at handling 64-bit constants. */
  150|  3.01M|    __asm__("" : "+r"(mask), "+r"(mul));
  151|  3.01M|#endif
  152|       |
  153|  3.01M|    switch(tx) {
  154|      0|    default: assert(0); /* fall-through */
  ------------------
  |  Branch (154:5): [True: 0, False: 3.01M]
  |  Branch (154:14): [Folded, False: 0]
  ------------------
  155|   806k|    case TX_4X4: {
  ------------------
  |  Branch (155:5): [True: 806k, False: 2.20M]
  ------------------
  156|   806k|        int t = *(const uint8_t *) a >> 6;
  157|   806k|        t    += *(const uint8_t *) l >> 6;
  158|   806k|        s = t - 1 - 1;
  159|   806k|        break;
  160|      0|    }
  161|   399k|    case TX_8X8: {
  ------------------
  |  Branch (161:5): [True: 399k, False: 2.61M]
  ------------------
  162|   399k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  163|   399k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  164|   399k|        t *= 0x04040404U;
  165|   399k|        s = (int) (t >> 24) - 2 - 2;
  166|   399k|        break;
  167|      0|    }
  168|   266k|    case TX_16X16: {
  ------------------
  |  Branch (168:5): [True: 266k, False: 2.74M]
  ------------------
  169|   266k|        uint32_t t = (*(const uint32_t *) a & (uint32_t) mask) >> 6;
  170|   266k|        t         += (*(const uint32_t *) l & (uint32_t) mask) >> 6;
  171|   266k|        t *= (uint32_t) mul;
  172|   266k|        s = (int) (t >> 24) - 4 - 4;
  173|   266k|        break;
  174|      0|    }
  175|   262k|    case TX_32X32: {
  ------------------
  |  Branch (175:5): [True: 262k, False: 2.75M]
  ------------------
  176|   262k|        uint64_t t = (*(const uint64_t *) a & mask) >> 6;
  177|   262k|        t         += (*(const uint64_t *) l & mask) >> 6;
  178|   262k|        t *= mul;
  179|   262k|        s = (int) (t >> 56) - 8 - 8;
  180|   262k|        break;
  181|      0|    }
  182|   104k|    case TX_64X64: {
  ------------------
  |  Branch (182:5): [True: 104k, False: 2.90M]
  ------------------
  183|   104k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  184|   104k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  185|   104k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  186|   104k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  187|   104k|        t *= mul;
  188|   104k|        s = (int) (t >> 56) - 16 - 16;
  189|   104k|        break;
  190|      0|    }
  191|  94.0k|    case RTX_4X8: {
  ------------------
  |  Branch (191:5): [True: 94.0k, False: 2.91M]
  ------------------
  192|  94.0k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  193|  94.0k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  194|  94.0k|        t *= 0x04040404U;
  195|  94.0k|        s = (int) (t >> 24) - 1 - 2;
  196|  94.0k|        break;
  197|      0|    }
  198|   159k|    case RTX_8X4: {
  ------------------
  |  Branch (198:5): [True: 159k, False: 2.85M]
  ------------------
  199|   159k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  200|   159k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  201|   159k|        t *= 0x04040404U;
  202|   159k|        s = (int) (t >> 24) - 2 - 1;
  203|   159k|        break;
  204|      0|    }
  205|   114k|    case RTX_8X16: {
  ------------------
  |  Branch (205:5): [True: 114k, False: 2.89M]
  ------------------
  206|   114k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  207|   114k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  208|   114k|        t = (t >> 6) * (uint32_t) mul;
  209|   114k|        s = (int) (t >> 24) - 2 - 4;
  210|   114k|        break;
  211|      0|    }
  212|   215k|    case RTX_16X8: {
  ------------------
  |  Branch (212:5): [True: 215k, False: 2.79M]
  ------------------
  213|   215k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  214|   215k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  215|   215k|        t = (t >> 6) * (uint32_t) mul;
  216|   215k|        s = (int) (t >> 24) - 4 - 2;
  217|   215k|        break;
  218|      0|    }
  219|  52.2k|    case RTX_16X32: {
  ------------------
  |  Branch (219:5): [True: 52.2k, False: 2.96M]
  ------------------
  220|  52.2k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  221|  52.2k|        t         += *(const uint64_t *) l & mask;
  222|  52.2k|        t = (t >> 6) * mul;
  223|  52.2k|        s = (int) (t >> 56) - 4 - 8;
  224|  52.2k|        break;
  225|      0|    }
  226|   110k|    case RTX_32X16: {
  ------------------
  |  Branch (226:5): [True: 110k, False: 2.90M]
  ------------------
  227|   110k|        uint64_t t = *(const uint64_t *) a & mask;
  228|   110k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  229|   110k|        t = (t >> 6) * mul;
  230|   110k|        s = (int) (t >> 56) - 8 - 4;
  231|   110k|        break;
  232|      0|    }
  233|  7.45k|    case RTX_32X64: {
  ------------------
  |  Branch (233:5): [True: 7.45k, False: 3.00M]
  ------------------
  234|  7.45k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  235|  7.45k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  236|  7.45k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  237|  7.45k|        t *= mul;
  238|  7.45k|        s = (int) (t >> 56) - 8 - 16;
  239|  7.45k|        break;
  240|      0|    }
  241|  33.6k|    case RTX_64X32: {
  ------------------
  |  Branch (241:5): [True: 33.6k, False: 2.97M]
  ------------------
  242|  33.6k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  243|  33.6k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  244|  33.6k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  245|  33.6k|        t *= mul;
  246|  33.6k|        s = (int) (t >> 56) - 16 - 8;
  247|  33.6k|        break;
  248|      0|    }
  249|  64.7k|    case RTX_4X16: {
  ------------------
  |  Branch (249:5): [True: 64.7k, False: 2.94M]
  ------------------
  250|  64.7k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  251|  64.7k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  252|  64.7k|        t = (t >> 6) * (uint32_t) mul;
  253|  64.7k|        s = (int) (t >> 24) - 1 - 4;
  254|  64.7k|        break;
  255|      0|    }
  256|   161k|    case RTX_16X4: {
  ------------------
  |  Branch (256:5): [True: 161k, False: 2.85M]
  ------------------
  257|   161k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  258|   161k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  259|   161k|        t = (t >> 6) * (uint32_t) mul;
  260|   161k|        s = (int) (t >> 24) - 4 - 1;
  261|   161k|        break;
  262|      0|    }
  263|  40.4k|    case RTX_8X32: {
  ------------------
  |  Branch (263:5): [True: 40.4k, False: 2.97M]
  ------------------
  264|  40.4k|        uint64_t t = *(const uint16_t *) a & (uint32_t) mask;
  265|  40.4k|        t         += *(const uint64_t *) l & mask;
  266|  40.4k|        t = (t >> 6) * mul;
  267|  40.4k|        s = (int) (t >> 56) - 2 - 8;
  268|  40.4k|        break;
  269|      0|    }
  270|  91.1k|    case RTX_32X8: {
  ------------------
  |  Branch (270:5): [True: 91.1k, False: 2.92M]
  ------------------
  271|  91.1k|        uint64_t t = *(const uint64_t *) a & mask;
  272|  91.1k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  273|  91.1k|        t = (t >> 6) * mul;
  274|  91.1k|        s = (int) (t >> 56) - 8 - 2;
  275|  91.1k|        break;
  276|      0|    }
  277|  12.8k|    case RTX_16X64: {
  ------------------
  |  Branch (277:5): [True: 12.8k, False: 3.00M]
  ------------------
  278|  12.8k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  279|  12.8k|        t         += *(const uint64_t *) &l[0] & mask;
  280|  12.8k|        t = (t >> 6) + ((*(const uint64_t *) &l[8] & mask) >> 6);
  281|  12.8k|        t *= mul;
  282|  12.8k|        s = (int) (t >> 56) - 4 - 16;
  283|  12.8k|        break;
  284|      0|    }
  285|  14.8k|    case RTX_64X16: {
  ------------------
  |  Branch (285:5): [True: 14.8k, False: 2.99M]
  ------------------
  286|  14.8k|        uint64_t t = *(const uint64_t *) &a[0] & mask;
  287|  14.8k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  288|  14.8k|        t = (t >> 6) + ((*(const uint64_t *) &a[8] & mask) >> 6);
  289|  14.8k|        t *= mul;
  290|  14.8k|        s = (int) (t >> 56) - 16 - 4;
  291|  14.8k|        break;
  292|      0|    }
  293|  3.01M|    }
  294|       |
  295|  3.01M|    return (s != 0) + (s > 0);
  296|  3.01M|}
recon_tmpl.c:read_golomb:
   49|  1.15M|static inline unsigned read_golomb(MsacContext *const msac) {
   50|  1.15M|    int len = 0;
   51|  1.15M|    unsigned val = 1;
   52|       |
   53|  2.07M|    while (!dav1d_msac_decode_bool_equi(msac) && len < 32) len++;
  ------------------
  |  |   53|  2.07M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (53:12): [True: 927k, False: 1.15M]
  |  Branch (53:50): [True: 927k, False: 25]
  ------------------
   54|  2.07M|    while (len--) val = (val << 1) + dav1d_msac_decode_bool_equi(msac);
  ------------------
  |  |   53|   927k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (54:12): [True: 927k, False: 1.15M]
  ------------------
   55|       |
   56|  1.15M|    return val - 1;
   57|  1.15M|}
recon_tmpl.c:mc:
  944|  4.46M|{
  945|  4.46M|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (945:5): [True: 4.46M, False: 0]
  ------------------
  946|  4.46M|    const Dav1dFrameContext *const f = t->f;
  947|  4.46M|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (947:24): [True: 2.29M, False: 2.16M]
  |  Branch (947:32): [True: 510k, False: 1.78M]
  ------------------
  948|  4.46M|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (948:24): [True: 2.29M, False: 2.16M]
  |  Branch (948:32): [True: 523k, False: 1.77M]
  ------------------
  949|  4.46M|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
  950|  4.46M|    const int mvx = mv.x, mvy = mv.y;
  951|  4.46M|    const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
  952|  4.46M|    ptrdiff_t ref_stride = refp->p.stride[!!pl];
  953|  4.46M|    const pixel *ref;
  954|       |
  955|  4.46M|    if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
  ------------------
  |  Branch (955:9): [True: 3.29M, False: 1.17M]
  |  Branch (955:38): [True: 3.21M, False: 79.3k]
  ------------------
  956|  3.21M|        const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
  957|  3.21M|        const int dy = by * v_mul + (mvy >> (3 + ss_ver));
  958|  3.21M|        int w, h;
  959|       |
  960|  3.21M|        if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
  ------------------
  |  Branch (960:13): [True: 2.05M, False: 1.15M]
  ------------------
  961|  2.05M|            w = (f->cur.p.w + ss_hor) >> ss_hor;
  962|  2.05M|            h = (f->cur.p.h + ss_ver) >> ss_ver;
  963|  2.05M|        } else {
  964|  1.15M|            w = f->bw * 4 >> ss_hor;
  965|  1.15M|            h = f->bh * 4 >> ss_ver;
  966|  1.15M|        }
  967|  3.21M|        if (dx < !!mx * 3 || dy < !!my * 3 ||
  ------------------
  |  Branch (967:13): [True: 52.8k, False: 3.15M]
  |  Branch (967:30): [True: 65.6k, False: 3.09M]
  ------------------
  968|  3.09M|            dx + bw4 * h_mul + !!mx * 4 > w ||
  ------------------
  |  Branch (968:13): [True: 126k, False: 2.96M]
  ------------------
  969|  2.96M|            dy + bh4 * v_mul + !!my * 4 > h)
  ------------------
  |  Branch (969:13): [True: 341k, False: 2.62M]
  ------------------
  970|   586k|        {
  971|   586k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   586k|#define bitfn(x) x##_8bpc
  ------------------
  972|   586k|            f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
  973|   586k|                                w, h, dx - !!mx * 3, dy - !!my * 3,
  974|   586k|                                emu_edge_buf, 192 * sizeof(pixel),
  975|   586k|                                refp->p.data[pl], ref_stride);
  976|   586k|            ref = &emu_edge_buf[192 * !!my * 3 + !!mx * 3];
  977|   586k|            ref_stride = 192 * sizeof(pixel);
  978|  2.62M|        } else {
  979|  2.62M|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  2.62M|#define PXSTRIDE(x) (x)
  ------------------
  980|  2.62M|        }
  981|       |
  982|  3.21M|        if (dst8 != NULL) {
  ------------------
  |  Branch (982:13): [True: 2.67M, False: 532k]
  ------------------
  983|  2.67M|            f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
  984|  2.67M|                                     bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  985|  2.67M|                                     HIGHBD_CALL_SUFFIX);
  986|  2.67M|        } else {
  987|   532k|            f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
  988|   532k|                                      bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  989|   532k|                                      HIGHBD_CALL_SUFFIX);
  990|   532k|        }
  991|  3.21M|    } else {
  992|  1.25M|        assert(refp != &f->sr_cur);
  ------------------
  |  Branch (992:9): [True: 1.25M, False: 0]
  ------------------
  993|       |
  994|  1.25M|        const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
  995|  1.25M|        const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
  996|  1.25M|#define scale_mv(res, val, scale) do { \
  997|  1.25M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  998|  1.25M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  999|  1.25M|        } while (0)
 1000|  1.25M|        int pos_y, pos_x;
 1001|  1.25M|        scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
  ------------------
  |  |  996|  1.25M|#define scale_mv(res, val, scale) do { \
  |  |  997|  1.25M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|  1.25M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|  1.25M|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 1.25M]
  |  |  ------------------
  ------------------
 1002|  1.25M|        scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
  ------------------
  |  |  996|  1.25M|#define scale_mv(res, val, scale) do { \
  |  |  997|  1.25M|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|  1.25M|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|  1.25M|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 1.25M]
  |  |  ------------------
  ------------------
 1003|  1.25M|#undef scale_mv
 1004|  1.25M|        const int left = pos_x >> 10;
 1005|  1.25M|        const int top = pos_y >> 10;
 1006|  1.25M|        const int right =
 1007|  1.25M|            ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
 1008|  1.25M|        const int bottom =
 1009|  1.25M|            ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
 1010|       |
 1011|  1.25M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.25M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.25M]
  |  |  ------------------
  |  |   35|  1.25M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.25M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1012|      0|            printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
 1013|      0|                   left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
 1014|      0|                   right-left, bottom-top,
 1015|      0|                   f->svc[refidx][0].step, f->svc[refidx][1].step);
 1016|       |
 1017|  1.25M|        const int w = (refp->p.p.w + ss_hor) >> ss_hor;
 1018|  1.25M|        const int h = (refp->p.p.h + ss_ver) >> ss_ver;
 1019|  1.25M|        if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
  ------------------
  |  Branch (1019:13): [True: 108k, False: 1.14M]
  |  Branch (1019:25): [True: 210k, False: 935k]
  |  Branch (1019:36): [True: 71.0k, False: 864k]
  |  Branch (1019:53): [True: 69.9k, False: 794k]
  ------------------
 1020|   459k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   459k|#define bitfn(x) x##_8bpc
  ------------------
 1021|   459k|            f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
 1022|   459k|                                w, h, left - 3, top - 3,
 1023|   459k|                                emu_edge_buf, 320 * sizeof(pixel),
 1024|   459k|                                refp->p.data[pl], ref_stride);
 1025|   459k|            ref = &emu_edge_buf[320 * 3 + 3];
 1026|   459k|            ref_stride = 320 * sizeof(pixel);
 1027|   459k|            if (DEBUG_BLOCK_INFO) printf("Emu\n");
  ------------------
  |  |   34|   459k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 459k]
  |  |  ------------------
  |  |   35|   459k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   459k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1028|   794k|        } else {
 1029|   794k|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
  ------------------
  |  |   53|   794k|#define PXSTRIDE(x) (x)
  ------------------
 1030|   794k|        }
 1031|       |
 1032|  1.25M|        if (dst8 != NULL) {
  ------------------
  |  Branch (1032:13): [True: 918k, False: 334k]
  ------------------
 1033|   918k|            f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
 1034|   918k|                                            bw4 * h_mul, bh4 * v_mul,
 1035|   918k|                                            pos_x & 0x3ff, pos_y & 0x3ff,
 1036|   918k|                                            f->svc[refidx][0].step,
 1037|   918k|                                            f->svc[refidx][1].step
 1038|   918k|                                            HIGHBD_CALL_SUFFIX);
 1039|   918k|        } else {
 1040|   334k|            f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
 1041|   334k|                                             bw4 * h_mul, bh4 * v_mul,
 1042|   334k|                                             pos_x & 0x3ff, pos_y & 0x3ff,
 1043|   334k|                                             f->svc[refidx][0].step,
 1044|   334k|                                             f->svc[refidx][1].step
 1045|   334k|                                             HIGHBD_CALL_SUFFIX);
 1046|   334k|        }
 1047|  1.25M|    }
 1048|       |
 1049|  4.46M|    return 0;
 1050|  4.46M|}
recon_tmpl.c:warp_affine:
 1120|   138k|{
 1121|   138k|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (1121:5): [True: 138k, False: 0]
  ------------------
 1122|   138k|    const Dav1dFrameContext *const f = t->f;
 1123|   138k|    const Dav1dDSPContext *const dsp = f->dsp;
 1124|   138k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1124:24): [True: 46.5k, False: 92.2k]
  |  Branch (1124:32): [True: 20.3k, False: 26.2k]
  ------------------
 1125|   138k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1125:24): [True: 46.5k, False: 92.2k]
  |  Branch (1125:32): [True: 20.3k, False: 26.2k]
  ------------------
 1126|   138k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1127|   138k|    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
  ------------------
  |  Branch (1127:5): [True: 138k, False: 0]
  |  Branch (1127:5): [True: 138k, False: 0]
  ------------------
 1128|   138k|    const int32_t *const mat = wmp->matrix;
 1129|   138k|    const int width = (refp->p.p.w + ss_hor) >> ss_hor;
 1130|   138k|    const int height = (refp->p.p.h + ss_ver) >> ss_ver;
 1131|       |
 1132|   933k|    for (int y = 0; y < b_dim[1] * v_mul; y += 8) {
  ------------------
  |  Branch (1132:21): [True: 794k, False: 138k]
  ------------------
 1133|   794k|        const int src_y = t->by * 4 + ((y + 4) << ss_ver);
 1134|   794k|        const int64_t mat3_y = (int64_t) mat[3] * src_y + mat[0];
 1135|   794k|        const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
 1136|  6.42M|        for (int x = 0; x < b_dim[0] * h_mul; x += 8) {
  ------------------
  |  Branch (1136:25): [True: 5.63M, False: 794k]
  ------------------
 1137|       |            // calculate transformation relative to center of 8x8 block in
 1138|       |            // luma pixel units
 1139|  5.63M|            const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
 1140|  5.63M|            const int64_t mvx = ((int64_t) mat[2] * src_x + mat3_y) >> ss_hor;
 1141|  5.63M|            const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
 1142|       |
 1143|  5.63M|            const int dx = (int) (mvx >> 16) - 4;
 1144|  5.63M|            const int mx = (((int) mvx & 0xffff) - wmp->u.p.alpha * 4 -
 1145|  5.63M|                                                   wmp->u.p.beta  * 7) & ~0x3f;
 1146|  5.63M|            const int dy = (int) (mvy >> 16) - 4;
 1147|  5.63M|            const int my = (((int) mvy & 0xffff) - wmp->u.p.gamma * 4 -
 1148|  5.63M|                                                   wmp->u.p.delta * 4) & ~0x3f;
 1149|       |
 1150|  5.63M|            const pixel *ref_ptr;
 1151|  5.63M|            ptrdiff_t ref_stride = refp->p.stride[!!pl];
 1152|       |
 1153|  5.63M|            if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
  ------------------
  |  Branch (1153:17): [True: 597k, False: 5.03M]
  |  Branch (1153:27): [True: 1.35M, False: 3.67M]
  |  Branch (1153:49): [True: 80.7k, False: 3.59M]
  |  Branch (1153:59): [True: 139k, False: 3.45M]
  ------------------
 1154|  2.17M|                pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|  2.17M|#define bitfn(x) x##_8bpc
  ------------------
 1155|  2.17M|                f->dsp->mc.emu_edge(15, 15, width, height, dx - 3, dy - 3,
 1156|  2.17M|                                    emu_edge_buf, 32 * sizeof(pixel),
 1157|  2.17M|                                    refp->p.data[pl], ref_stride);
 1158|  2.17M|                ref_ptr = &emu_edge_buf[32 * 3 + 3];
 1159|  2.17M|                ref_stride = 32 * sizeof(pixel);
 1160|  3.45M|            } else {
 1161|  3.45M|                ref_ptr = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  3.45M|#define PXSTRIDE(x) (x)
  ------------------
 1162|  3.45M|            }
 1163|  5.63M|            if (dst16 != NULL)
  ------------------
  |  Branch (1163:17): [True: 66.7k, False: 5.56M]
  ------------------
 1164|  66.7k|                dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
 1165|  66.7k|                                 wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1166|  5.56M|            else
 1167|  5.56M|                dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
 1168|  5.56M|                                wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1169|  5.63M|        }
 1170|   794k|        if (dst8) dst8  += 8 * PXSTRIDE(dstride);
  ------------------
  |  |   53|   780k|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (1170:13): [True: 780k, False: 13.6k]
  ------------------
 1171|  13.6k|        else      dst16 += 8 * dstride;
 1172|   794k|    }
 1173|   138k|    return 0;
 1174|   138k|}
recon_tmpl.c:obmc:
 1056|   379k|{
 1057|   379k|    assert(!(t->bx & 1) && !(t->by & 1));
  ------------------
  |  Branch (1057:5): [True: 379k, False: 0]
  |  Branch (1057:5): [True: 379k, False: 0]
  ------------------
 1058|   379k|    const Dav1dFrameContext *const f = t->f;
 1059|   379k|    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
 1060|   379k|    pixel *const lap = bitfn(t->scratch.lap);
  ------------------
  |  |   51|   379k|#define bitfn(x) x##_8bpc
  ------------------
 1061|   379k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1061:24): [True: 198k, False: 181k]
  |  Branch (1061:32): [True: 55.0k, False: 142k]
  ------------------
 1062|   379k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1062:24): [True: 198k, False: 181k]
  |  Branch (1062:32): [True: 55.1k, False: 142k]
  ------------------
 1063|   379k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1064|   379k|    int res;
 1065|       |
 1066|   379k|    if (t->by > t->ts->tiling.row_start &&
  ------------------
  |  Branch (1066:9): [True: 351k, False: 28.0k]
  ------------------
 1067|   351k|        (!pl || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
  ------------------
  |  Branch (1067:10): [True: 168k, False: 182k]
  |  Branch (1067:17): [True: 147k, False: 34.9k]
  ------------------
 1068|   316k|    {
 1069|   669k|        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
  ------------------
  |  Branch (1069:32): [True: 353k, False: 315k]
  |  Branch (1069:42): [True: 352k, False: 898]
  ------------------
 1070|       |            // only odd blocks are considered for overlap handling, hence +1
 1071|   352k|            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
 1072|   352k|            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
 1073|   352k|            const int step4 = iclip(a_b_dim[0], 2, 16);
 1074|       |
 1075|   352k|            if (a_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1075:17): [True: 342k, False: 10.2k]
  ------------------
 1076|   342k|                const int ow4 = imin(step4, b_dim[0]);
 1077|   342k|                const int oh4 = imin(b_dim[1], 16) >> 1;
 1078|   342k|                res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, (oh4 * 3 + 3) >> 2,
 1079|   342k|                         t->bx + x, t->by, pl, a_r->mv.mv[0],
 1080|   342k|                         &f->refp[a_r->ref.ref[0] - 1], a_r->ref.ref[0] - 1,
 1081|   342k|                         dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
 1082|   342k|                if (res) return res;
  ------------------
  |  Branch (1082:21): [True: 0, False: 342k]
  ------------------
 1083|   342k|                f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
 1084|   342k|                                   h_mul * ow4, v_mul * oh4);
 1085|   342k|                i++;
 1086|   342k|            }
 1087|   352k|            x += step4;
 1088|   352k|        }
 1089|   316k|    }
 1090|       |
 1091|   379k|    if (t->bx > t->ts->tiling.col_start)
  ------------------
  |  Branch (1091:9): [True: 365k, False: 14.1k]
  ------------------
 1092|   769k|        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
  ------------------
  |  Branch (1092:32): [True: 406k, False: 363k]
  |  Branch (1092:42): [True: 404k, False: 1.81k]
  ------------------
 1093|       |            // only odd blocks are considered for overlap handling, hence +1
 1094|   404k|            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
 1095|   404k|            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
 1096|   404k|            const int step4 = iclip(l_b_dim[1], 2, 16);
 1097|       |
 1098|   404k|            if (l_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1098:17): [True: 387k, False: 16.9k]
  ------------------
 1099|   387k|                const int ow4 = imin(b_dim[0], 16) >> 1;
 1100|   387k|                const int oh4 = imin(step4, b_dim[1]);
 1101|   387k|                res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
 1102|   387k|                         t->bx, t->by + y, pl, l_r->mv.mv[0],
 1103|   387k|                         &f->refp[l_r->ref.ref[0] - 1], l_r->ref.ref[0] - 1,
 1104|   387k|                         dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
 1105|   387k|                if (res) return res;
  ------------------
  |  Branch (1105:21): [True: 0, False: 387k]
  ------------------
 1106|   387k|                f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
  ------------------
  |  |   53|   387k|#define PXSTRIDE(x) (x)
  ------------------
 1107|   387k|                                   dst_stride, lap, h_mul * ow4, v_mul * oh4);
 1108|   387k|                i++;
 1109|   387k|            }
 1110|   404k|            y += step4;
 1111|   404k|        }
 1112|   379k|    return 0;
 1113|   379k|}
dav1d_recon_b_intra_16bpc:
 1179|  1.44M|{
 1180|  1.44M|    Dav1dTileState *const ts = t->ts;
 1181|  1.44M|    const Dav1dFrameContext *const f = t->f;
 1182|  1.44M|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|  1.44M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|  1.44M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|  1.44M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|  1.44M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|  1.44M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|  1.44M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|  1.44M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|  1.44M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|  1.44M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 906k, False: 538k]
  ------------------
 1192|   906k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 871k, False: 34.5k]
  |  Branch (1192:45): [True: 17.1k, False: 17.3k]
  ------------------
 1193|   888k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 858k, False: 30.1k]
  |  Branch (1193:45): [True: 15.0k, False: 15.0k]
  ------------------
 1194|  1.44M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|  1.44M|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|  1.44M|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   77|  1.44M|#define bitfn(x) x##_16bpc
  ------------------
 1199|  1.44M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|  1.44M|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  2.93M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 1.49M, False: 1.44M]
  ------------------
 1204|  1.49M|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|  1.49M|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  3.09M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 1.59M, False: 1.49M]
  ------------------
 1207|  1.59M|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 39.1k, False: 1.55M]
  ------------------
 1208|  39.1k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  39.1k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1210|  39.1k|                const uint8_t *pal_idx;
 1211|  39.1k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 0, False: 39.1k]
  ------------------
 1212|      0|                    const int p = t->frame_thread.pass & 1;
 1213|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 0, False: 0]
  ------------------
 1214|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|      0|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  39.1k|                } else {
 1217|  39.1k|                    pal_idx = t->scratch.pal_idx_y;
 1218|  39.1k|                }
 1219|  39.1k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 0, False: 39.1k]
  ------------------
 1220|      0|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|      0|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  39.1k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  39.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  39.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1223|  39.1k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  39.1k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  39.1k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  39.1k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 39.1k]
  |  |  ------------------
  |  |   35|  39.1k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  39.1k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  39.1k|            }
 1229|       |
 1230|  1.59M|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|  1.59M|                                     sm_flag(&t->l, by4) |
 1232|  1.59M|                                     intra_edge_filter_flag);
 1233|  1.59M|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 102k, False: 1.49M]
  |  Branch (1233:58): [True: 50.8k, False: 1.44M]
  ------------------
 1234|  1.49M|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|  1.59M|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 102k, False: 1.49M]
  |  Branch (1235:48): [True: 50.8k, False: 1.44M]
  ------------------
 1236|  1.49M|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|  1.59M|            int y, x;
 1238|  1.59M|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  3.54M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.94M, False: 1.59M]
  ------------------
 1240|  1.94M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.94M|            {
 1242|  1.94M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.94M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
 1244|  1.94M|                                    t->bx + init_x);
 1245|  5.44M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 3.49M, False: 1.94M]
  ------------------
 1246|  3.49M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  3.49M|                {
 1248|  3.49M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 49.4k, False: 3.44M]
  ------------------
 1249|       |
 1250|  3.44M|                    int angle = b->y_angle;
 1251|  3.44M|                    const enum EdgeFlags edge_flags =
 1252|  3.44M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 1.63M, False: 1.81M]
  |  Branch (1252:42): [True: 531k, False: 1.28M]
  |  Branch (1252:57): [True: 834k, False: 1.33M]
  ------------------
 1253|  2.61M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  3.44M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 1.54M, False: 1.90M]
  |  Branch (1254:42): [True: 1.10M, False: 799k]
  |  Branch (1254:56): [True: 839k, False: 267k]
  ------------------
 1255|  2.38M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  3.44M|                    const pixel *top_sb_edge = NULL;
 1257|  3.44M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 821k, False: 2.62M]
  ------------------
 1258|   821k|                        top_sb_edge = f->ipred_edge[0];
 1259|   821k|                        const int sby = t->by >> f->sb_shift;
 1260|   821k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   821k|                    }
 1262|  3.44M|                    const enum IntraPredMode m =
 1263|  3.44M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  3.44M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  3.44M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1264|  3.44M|                                                          t->bx > ts->tiling.col_start,
 1265|  3.44M|                                                          t->by,
 1266|  3.44M|                                                          t->by > ts->tiling.row_start,
 1267|  3.44M|                                                          ts->tiling.col_end,
 1268|  3.44M|                                                          ts->tiling.row_end,
 1269|  3.44M|                                                          edge_flags, dst,
 1270|  3.44M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  3.44M|                                                          b->y_mode, &angle,
 1272|  3.44M|                                                          t_dim->w, t_dim->h,
 1273|  3.44M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  3.44M|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.44M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1275|  3.44M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  3.44M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  3.44M|                                             angle | intra_flags,
 1278|  3.44M|                                             4 * f->bw - 4 * t->bx,
 1279|  3.44M|                                             4 * f->bh - 4 * t->by
 1280|  3.44M|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.44M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1281|       |
 1282|  3.44M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  3.44M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.44M]
  |  |  ------------------
  |  |   35|  3.44M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.44M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  3.49M|                skip_y_pred: {}
 1293|  3.49M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 1.38M, False: 2.11M]
  ------------------
 1294|  1.38M|                        coef *cf;
 1295|  1.38M|                        int eob;
 1296|  1.38M|                        enum TxfmType txtp;
 1297|  1.38M|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 0, False: 1.38M]
  ------------------
 1298|      0|                            const int p = t->frame_thread.pass & 1;
 1299|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|      0|                            cf = ts->frame_thread[p].cf;
 1301|      0|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|      0|                            eob  = cbi >> 5;
 1303|      0|                            txtp = cbi & 0x1f;
 1304|  1.38M|                        } else {
 1305|  1.38M|                            uint8_t cf_ctx;
 1306|  1.38M|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|  1.38M|#define bitfn(x) x##_16bpc
  ------------------
 1307|  1.38M|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  1.38M|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  1.38M|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  1.38M|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.38M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.38M]
  |  |  ------------------
  |  |   35|  1.38M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.38M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  1.38M|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  1.38M|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  1.38M|                        }
 1316|  1.38M|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 1.08M, False: 292k]
  ------------------
 1317|  1.08M|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  1.08M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.08M]
  |  |  ------------------
  |  |   35|  1.08M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.08M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|  1.08M|                            dsp->itx.itxfm_add[b->tx]
 1321|  1.08M|                                              [txtp](dst,
 1322|  1.08M|                                                     f->cur.stride[0],
 1323|  1.08M|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  1.08M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1324|  1.08M|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  1.08M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.08M]
  |  |  ------------------
  |  |   35|  1.08M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.08M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|  1.08M|                        }
 1328|  2.11M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 2.11M, False: 0]
  ------------------
 1329|  2.11M|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|  2.11M|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|  2.11M|                    }
 1332|  3.49M|                    dst += 4 * t_dim->w;
 1333|  3.49M|                }
 1334|  1.94M|                t->bx -= x;
 1335|  1.94M|            }
 1336|  1.59M|            t->by -= y;
 1337|       |
 1338|  1.59M|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 651k, False: 946k]
  ------------------
 1339|       |
 1340|   946k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   946k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 185k, False: 761k]
  ------------------
 1343|   185k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 185k, False: 0]
  |  Branch (1343:17): [True: 185k, False: 0]
  ------------------
 1344|       |
 1345|   185k|                int16_t *const ac = t->scratch.ac;
 1346|   185k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|   185k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
 1348|   185k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|   185k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
 1350|   185k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|   185k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|   185k|                const int furthest_r =
 1354|   185k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|   185k|                const int furthest_b =
 1356|   185k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|   185k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|   185k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|   185k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|   185k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   556k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 371k, False: 185k]
  ------------------
 1362|   371k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 78.3k, False: 292k]
  ------------------
 1363|   292k|                    int angle = 0;
 1364|   292k|                    const pixel *top_sb_edge = NULL;
 1365|   292k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 97.2k, False: 195k]
  ------------------
 1366|  97.2k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  97.2k|                        const int sby = t->by >> f->sb_shift;
 1368|  97.2k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  97.2k|                    }
 1370|   292k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|   292k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|   292k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|   292k|                    const enum IntraPredMode m =
 1374|   292k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|   292k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   292k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1375|   292k|                                                          ypos, ypos > ystart,
 1376|   292k|                                                          ts->tiling.col_end >> ss_hor,
 1377|   292k|                                                          ts->tiling.row_end >> ss_ver,
 1378|   292k|                                                          0, uv_dst[pl], stride,
 1379|   292k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|   292k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|   292k|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   292k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1382|   292k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|   292k|                                           uv_t_dim->w * 4,
 1384|   292k|                                           uv_t_dim->h * 4,
 1385|   292k|                                           ac, b->cfl_alpha[pl]
 1386|   292k|                                           HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   292k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1387|   292k|                }
 1388|   185k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   185k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 185k]
  |  |  ------------------
  |  |   35|   185k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   185k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   761k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 10.0k, False: 751k]
  ------------------
 1394|  10.0k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  10.0k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1396|  10.0k|                const pixel (*pal)[8];
 1397|  10.0k|                const uint8_t *pal_idx;
 1398|  10.0k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 0, False: 10.0k]
  ------------------
 1399|      0|                    const int p = t->frame_thread.pass & 1;
 1400|      0|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 0, False: 0]
  ------------------
 1401|      0|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|      0|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|      0|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|      0|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  10.0k|                } else {
 1406|  10.0k|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|  10.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  10.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1407|  10.0k|                    pal_idx = t->scratch.pal_idx_uv;
 1408|  10.0k|                }
 1409|       |
 1410|  10.0k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  10.0k|                                       f->cur.stride[1], pal[1],
 1412|  10.0k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  10.0k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  10.0k|                                       f->cur.stride[1], pal[2],
 1415|  10.0k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  10.0k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  10.0k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 10.0k]
  |  |  ------------------
  |  |   35|  10.0k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  10.0k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  10.0k|            }
 1425|       |
 1426|   946k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   946k|                                 sm_uv_flag(&t->l, cby4);
 1428|   946k|            const int uv_sb_has_tr =
 1429|   946k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 49.0k, False: 897k]
  |  Branch (1429:55): [True: 24.1k, False: 873k]
  ------------------
 1430|   897k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   946k|            const int uv_sb_has_bl =
 1432|   946k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 49.0k, False: 897k]
  |  Branch (1432:30): [True: 24.1k, False: 873k]
  ------------------
 1433|   897k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   946k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.84M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.89M, False: 946k]
  ------------------
 1436|  4.12M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 2.23M, False: 1.89M]
  ------------------
 1437|  2.23M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  2.23M|                {
 1439|  2.23M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  2.23M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
 1441|  2.23M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  5.60M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 3.37M, False: 2.23M]
  ------------------
 1443|  3.37M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  3.37M|                    {
 1445|  3.37M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 371k, False: 3.00M]
  |  Branch (1445:56): [True: 292k, False: 78.3k]
  ------------------
 1446|  3.07M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 34.7k, False: 3.04M]
  ------------------
 1447|   327k|                        {
 1448|   327k|                            goto skip_uv_pred;
 1449|   327k|                        }
 1450|       |
 1451|  3.04M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  3.04M|                        const enum EdgeFlags edge_flags =
 1456|  3.04M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 1.20M, False: 1.83M]
  |  Branch (1456:58): [True: 585k, False: 1.25M]
  ------------------
 1457|  1.79M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 878k, False: 914k]
  ------------------
 1458|  2.16M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  3.04M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 1.12M, False: 1.91M]
  ------------------
 1460|  1.91M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 1.08M, False: 833k]
  |  Branch (1460:49): [True: 819k, False: 260k]
  ------------------
 1461|  1.94M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  3.04M|                        const pixel *top_sb_edge = NULL;
 1463|  3.04M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 739k, False: 2.30M]
  ------------------
 1464|   739k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   739k|                            const int sby = t->by >> f->sb_shift;
 1466|   739k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   739k|                        }
 1468|  3.04M|                        const enum IntraPredMode uv_mode =
 1469|  3.04M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 78.3k, False: 2.96M]
  ------------------
 1470|  3.04M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  3.04M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  3.04M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  3.04M|                        const enum IntraPredMode m =
 1474|  3.04M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  3.04M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  3.04M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1475|  3.04M|                                                              ypos, ypos > ystart,
 1476|  3.04M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  3.04M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  3.04M|                                                              edge_flags, dst, stride,
 1479|  3.04M|                                                              top_sb_edge, uv_mode,
 1480|  3.04M|                                                              &angle, uv_t_dim->w,
 1481|  3.04M|                                                              uv_t_dim->h,
 1482|  3.04M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  3.04M|                                                              edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.04M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1484|  3.04M|                        angle |= intra_edge_filter_flag;
 1485|  3.04M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  3.04M|                                                 uv_t_dim->w * 4,
 1487|  3.04M|                                                 uv_t_dim->h * 4,
 1488|  3.04M|                                                 angle | sm_uv_fl,
 1489|  3.04M|                                                 (4 * f->bw + ss_hor -
 1490|  3.04M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  3.04M|                                                 (4 * f->bh + ss_ver -
 1492|  3.04M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  3.04M|                                                 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.04M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1494|  3.04M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  3.04M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.04M]
  |  |  ------------------
  |  |   35|  3.04M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.04M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  3.37M|                    skip_uv_pred: {}
 1505|  3.37M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.68M, False: 1.68M]
  ------------------
 1506|  1.68M|                            enum TxfmType txtp;
 1507|  1.68M|                            int eob;
 1508|  1.68M|                            coef *cf;
 1509|  1.68M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 0, False: 1.68M]
  ------------------
 1510|      0|                                const int p = t->frame_thread.pass & 1;
 1511|      0|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|      0|                                cf = ts->frame_thread[p].cf;
 1513|      0|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|      0|                                eob  = cbi >> 5;
 1515|      0|                                txtp = cbi & 0x1f;
 1516|  1.68M|                            } else {
 1517|  1.68M|                                uint8_t cf_ctx;
 1518|  1.68M|                                cf = bitfn(t->cf);
  ------------------
  |  |   77|  1.68M|#define bitfn(x) x##_16bpc
  ------------------
 1519|  1.68M|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  1.68M|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  1.68M|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  1.68M|                                                   &txtp, &cf_ctx);
 1523|  1.68M|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.68M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.68M]
  |  |  ------------------
  |  |   35|  1.68M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.68M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  1.68M|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  1.68M|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  1.68M|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  1.68M|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  1.68M|                            }
 1532|  1.68M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 453k, False: 1.22M]
  ------------------
 1533|   453k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   453k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 453k]
  |  |  ------------------
  |  |   35|   453k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   453k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   453k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   453k|                                                  [txtp](dst, stride,
 1538|   453k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   453k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1539|   453k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   453k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 453k]
  |  |  ------------------
  |  |   35|   453k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   453k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   453k|                            }
 1543|  1.68M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 1.68M, False: 0]
  ------------------
 1544|  1.68M|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|  1.68M|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|  1.68M|                        }
 1547|  3.37M|                        dst += uv_t_dim->w * 4;
 1548|  3.37M|                    }
 1549|  2.23M|                    t->bx -= x << ss_hor;
 1550|  2.23M|                }
 1551|  1.89M|                t->by -= y << ss_ver;
 1552|  1.89M|            }
 1553|   946k|        }
 1554|  1.49M|    }
 1555|  1.44M|}
dav1d_recon_b_inter_16bpc:
 1559|   989k|{
 1560|   989k|    Dav1dTileState *const ts = t->ts;
 1561|   989k|    const Dav1dFrameContext *const f = t->f;
 1562|   989k|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|   989k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|   989k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|   989k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|   989k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|   989k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|   989k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|   989k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|   989k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 415k, False: 573k]
  ------------------
 1571|   415k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 387k, False: 27.8k]
  |  Branch (1571:45): [True: 13.9k, False: 13.8k]
  ------------------
 1572|   401k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 388k, False: 13.5k]
  |  Branch (1572:45): [True: 6.81k, False: 6.77k]
  ------------------
 1573|   989k|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 573k, False: 415k]
  ------------------
 1574|   989k|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|   989k|    int res;
 1576|       |
 1577|       |    // prediction
 1578|   989k|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|   989k|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|   989k|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1581|   989k|    const ptrdiff_t uvdstoff =
 1582|   989k|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1583|   989k|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|   989k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   989k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 455k, False: 533k]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|   455k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 455k, False: 0]
  ------------------
 1586|   455k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|   455k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|   455k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 455k]
  ------------------
 1589|   496k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 165k, False: 290k]
  |  Branch (1589:42): [True: 330k, False: 165k]
  ------------------
 1590|   330k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|   330k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|   330k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|   330k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|   330k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 330k]
  ------------------
 1595|   330k|        }
 1596|   533k|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 445k, False: 88.3k]
  ------------------
 1597|   445k|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|   445k|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|   445k|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 287k, False: 157k]
  ------------------
 1601|   287k|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 178k, False: 109k]
  |  Branch (1601:44): [True: 11.4k, False: 166k]
  ------------------
 1602|   276k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 45.1k, False: 230k]
  |  Branch (1602:44): [True: 43.1k, False: 1.98k]
  ------------------
 1603|  54.6k|        {
 1604|  54.6k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|  54.6k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 43.1k, False: 11.4k]
  ------------------
 1606|  54.6k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|  54.6k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 54.6k]
  ------------------
 1608|   390k|        } else {
 1609|   390k|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|   390k|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|   390k|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 390k]
  ------------------
 1612|   390k|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 88.8k, False: 301k]
  ------------------
 1613|  88.8k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|  88.8k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 88.8k]
  ------------------
 1615|  88.8k|            }
 1616|   390k|        }
 1617|   445k|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 19.9k, False: 425k]
  ------------------
 1618|  19.9k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  ------------------
 1619|  19.9k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 3.71k, False: 16.2k]
  ------------------
 1620|  16.2k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  19.9k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  ------------------
 1622|  19.9k|            int angle = 0;
 1623|  19.9k|            const pixel *top_sb_edge = NULL;
 1624|  19.9k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 2.73k, False: 17.2k]
  ------------------
 1625|  2.73k|                top_sb_edge = f->ipred_edge[0];
 1626|  2.73k|                const int sby = t->by >> f->sb_shift;
 1627|  2.73k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  2.73k|            }
 1629|  19.9k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  19.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1630|  19.9k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  19.9k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  19.9k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  19.9k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  19.9k|                                                  HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1635|  19.9k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  19.9k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  19.9k|                                     HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1638|  19.9k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  19.9k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  19.9k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  19.9k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 14.6k, False: 5.34k]
  |  |  ------------------
  |  |   85|  19.9k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  19.9k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  19.9k|        }
 1641|       |
 1642|   445k|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 278k, False: 166k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   166k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 4.63k, False: 162k]
  |  Branch (1645:42): [True: 2.02k, False: 159k]
  ------------------
 1646|   166k|        refmvs_block *const *r;
 1647|   166k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 6.65k, False: 159k]
  ------------------
 1648|  6.65k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 6.65k, False: 0]
  ------------------
 1649|  6.65k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  6.65k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 4.63k, False: 2.02k]
  ------------------
 1651|  6.65k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 4.56k, False: 2.09k]
  ------------------
 1652|  6.65k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 4.63k, False: 2.02k]
  |  Branch (1652:29): [True: 2.53k, False: 2.09k]
  ------------------
 1653|  2.53k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  6.65k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   166k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 6.48k, False: 160k]
  ------------------
 1658|  6.48k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 6.48k, False: 0]
  ------------------
 1659|  6.48k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  6.48k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 4.51k, False: 1.96k]
  |  Branch (1660:29): [True: 2.46k, False: 2.04k]
  ------------------
 1661|  7.40k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 4.93k, False: 2.46k]
  ------------------
 1662|  4.93k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  4.93k|                             NULL, f->cur.stride[1],
 1664|  4.93k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  4.93k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  4.93k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  4.93k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  4.93k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 4.93k, False: 0]
  ------------------
 1669|  4.93k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  4.93k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 4.93k]
  ------------------
 1671|  4.93k|                }
 1672|  2.46k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1673|  2.46k|                h_off = 2;
 1674|  2.46k|            }
 1675|  6.48k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 4.51k, False: 1.96k]
  ------------------
 1676|  4.51k|                const enum Filter2d left_filter_2d =
 1677|  4.51k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|  13.5k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 9.03k, False: 4.51k]
  ------------------
 1679|  9.03k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  9.03k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  9.03k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  9.03k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  9.03k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  9.03k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 9.03k, False: 0]
  ------------------
 1685|  9.03k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  9.03k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 9.03k]
  ------------------
 1687|  9.03k|                }
 1688|  4.51k|                h_off = 2;
 1689|  4.51k|            }
 1690|  6.48k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 4.43k, False: 2.04k]
  ------------------
 1691|  4.43k|                const enum Filter2d top_filter_2d =
 1692|  4.43k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|  13.2k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 8.86k, False: 4.43k]
  ------------------
 1694|  8.86k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  8.86k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  8.86k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  8.86k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  8.86k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  8.86k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 8.86k, False: 0]
  ------------------
 1700|  8.86k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  8.86k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 8.86k]
  ------------------
 1702|  8.86k|                }
 1703|  4.43k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1704|  4.43k|            }
 1705|  19.4k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 12.9k, False: 6.48k]
  ------------------
 1706|  12.9k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|  12.9k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|  12.9k|                         refp, b->ref[0], filter_2d);
 1709|  12.9k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 12.9k]
  ------------------
 1710|  12.9k|            }
 1711|   160k|        } else {
 1712|   160k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 96.1k, False: 63.9k]
  ------------------
 1713|  96.1k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 40.5k, False: 55.6k]
  |  Branch (1713:48): [True: 5.10k, False: 35.4k]
  ------------------
 1714|  91.0k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 4.57k, False: 86.4k]
  |  Branch (1714:48): [True: 3.41k, False: 1.16k]
  ------------------
 1715|  8.51k|            {
 1716|  25.5k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 17.0k, False: 8.51k]
  ------------------
 1717|  17.0k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|  17.0k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|  17.0k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 6.82k, False: 10.2k]
  ------------------
 1720|  17.0k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|  17.0k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 17.0k]
  ------------------
 1722|  17.0k|                }
 1723|   151k|            } else {
 1724|   454k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 303k, False: 151k]
  ------------------
 1725|   303k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|   303k|                             NULL, f->cur.stride[1],
 1727|   303k|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|   303k|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|   303k|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|   303k|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 303k]
  ------------------
 1731|   303k|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 66.1k, False: 237k]
  ------------------
 1732|  66.1k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|  66.1k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|  66.1k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 66.1k]
  ------------------
 1735|  66.1k|                    }
 1736|   303k|                }
 1737|   151k|            }
 1738|   160k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 9.96k, False: 150k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  9.96k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  9.96k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  9.96k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 6.97k, False: 2.98k]
  |  |  ------------------
  |  |   85|  9.96k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  9.96k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|  29.8k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 19.9k, False: 9.96k]
  ------------------
 1745|  19.9k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  ------------------
 1746|  19.9k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  ------------------
 1747|  19.9k|                    enum IntraPredMode m =
 1748|  19.9k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 3.82k, False: 16.0k]
  ------------------
 1749|  16.0k|                        SMOOTH_PRED : b->interintra_mode;
 1750|  19.9k|                    int angle = 0;
 1751|  19.9k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|  19.9k|                    const pixel *top_sb_edge = NULL;
 1753|  19.9k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 3.40k, False: 16.5k]
  ------------------
 1754|  3.40k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  3.40k|                        const int sby = t->by >> f->sb_shift;
 1756|  3.40k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  3.40k|                    }
 1758|  19.9k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|  19.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  19.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1759|  19.9k|                                                          (t->bx >> ss_hor) >
 1760|  19.9k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|  19.9k|                                                          t->by >> ss_ver,
 1762|  19.9k|                                                          (t->by >> ss_ver) >
 1763|  19.9k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|  19.9k|                                                          ts->tiling.col_end >> ss_hor,
 1765|  19.9k|                                                          ts->tiling.row_end >> ss_ver,
 1766|  19.9k|                                                          0, uvdst, f->cur.stride[1],
 1767|  19.9k|                                                          top_sb_edge, m,
 1768|  19.9k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|  19.9k|                                                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1770|  19.9k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|  19.9k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|  19.9k|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1773|  19.9k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|  19.9k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|  19.9k|                }
 1776|  9.96k|            }
 1777|   160k|        }
 1778|       |
 1779|   445k|    skip_inter_chroma_pred: {}
 1780|   445k|        t->tl_4x4_filter = filter_2d;
 1781|   445k|    } else {
 1782|  88.3k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|  88.3k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|  88.3k|        int jnt_weight;
 1786|  88.3k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|  88.3k|        const uint8_t *mask;
 1788|       |
 1789|   264k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 176k, False: 88.3k]
  ------------------
 1790|   176k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   176k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 14.5k, False: 162k]
  |  Branch (1792:55): [True: 503, False: 14.0k]
  ------------------
 1793|    503|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|    503|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|    503|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 503]
  ------------------
 1796|   176k|            } else {
 1797|   176k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   176k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   176k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 176k]
  ------------------
 1800|   176k|            }
 1801|   176k|        }
 1802|  88.3k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 88.3k, False: 0]
  ------------------
 1803|  47.8k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 47.8k, False: 40.5k]
  ------------------
 1804|  47.8k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  47.8k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  47.8k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1806|  47.8k|            break;
 1807|  13.2k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 13.2k, False: 75.1k]
  ------------------
 1808|  13.2k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  13.2k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  13.2k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  13.2k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1811|  13.2k|            break;
 1812|  19.3k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 19.3k, False: 68.9k]
  ------------------
 1813|  19.3k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  19.3k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  19.3k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  19.3k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1817|  19.3k|            mask = seg_mask;
 1818|  19.3k|            break;
 1819|  7.89k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 7.89k, False: 80.4k]
  ------------------
 1820|  7.89k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  7.89k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  7.89k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  7.89k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  7.89k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  7.89k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  7.89k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1824|  7.89k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 6.03k, False: 1.85k]
  ------------------
 1825|  6.03k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  6.03k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  6.03k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  7.89k|            break;
 1827|  88.3k|        }
 1828|       |
 1829|       |        // chroma
 1830|   189k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 63.1k, False: 25.2k]
  |  Branch (1830:42): [True: 126k, False: 63.1k]
  ------------------
 1831|   378k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 252k, False: 126k]
  ------------------
 1832|   252k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   252k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 22.4k, False: 230k]
  ------------------
 1834|  22.4k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 21.6k, False: 836]
  |  Branch (1834:45): [True: 362, False: 21.2k]
  ------------------
 1835|    362|                {
 1836|    362|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|    362|                                      b_dim, 1 + pl,
 1838|    362|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|    362|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 362]
  ------------------
 1840|   252k|                } else {
 1841|   252k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|   252k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|   252k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 252k]
  ------------------
 1844|   252k|                }
 1845|   252k|            }
 1846|   126k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|   126k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 126k, False: 0]
  ------------------
 1848|  65.8k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 65.8k, False: 60.3k]
  ------------------
 1849|  65.8k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|  65.8k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|  65.8k|                            HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  65.8k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1852|  65.8k|                break;
 1853|  24.7k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 24.7k, False: 101k]
  ------------------
 1854|  24.7k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  24.7k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  24.7k|                              HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  24.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1857|  24.7k|                break;
 1858|  12.0k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 12.0k, False: 114k]
  ------------------
 1859|  35.6k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 23.5k, False: 102k]
  ------------------
 1860|  35.6k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  35.6k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  35.6k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  35.6k|                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  35.6k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1864|  35.6k|                break;
 1865|   126k|            }
 1866|   126k|        }
 1867|  88.3k|    }
 1868|       |
 1869|   989k|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   989k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 989k]
  |  |  ------------------
  |  |   35|   989k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   989k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|   989k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|   989k|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 740k, False: 249k]
  ------------------
 1882|       |        // reset coef contexts
 1883|   740k|        BlockContext *const a = t->a;
 1884|   740k|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|   740k|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|   740k|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 215k, False: 524k]
  ------------------
 1887|   215k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   215k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   215k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   215k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   215k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   215k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   215k|        }
 1894|   740k|        return 0;
 1895|   740k|    }
 1896|       |
 1897|   249k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   249k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   249k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   500k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 251k, False: 249k]
  ------------------
 1902|   509k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 257k, False: 251k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   257k|            int y_off = !!init_y, y;
 1905|   257k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
 1906|   535k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 277k, False: 257k]
  ------------------
 1907|   277k|                 y += ytx->h, y_off++)
 1908|   277k|            {
 1909|   277k|                int x, x_off = !!init_x;
 1910|   684k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 406k, False: 277k]
  ------------------
 1911|   406k|                     x += ytx->w, x_off++)
 1912|   406k|                {
 1913|   406k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   406k|                                   x_off, y_off, &dst[x * 4]);
 1915|   406k|                    t->bx += ytx->w;
 1916|   406k|                }
 1917|   277k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
 1918|   277k|                t->bx -= x;
 1919|   277k|                t->by += ytx->h;
 1920|   277k|            }
 1921|   257k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
 1922|   257k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|   554k|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 184k, False: 73.0k]
  |  Branch (1925:46): [True: 369k, False: 184k]
  ------------------
 1926|   369k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   369k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
 1928|   369k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|   777k|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 408k, False: 369k]
  ------------------
 1930|   408k|                {
 1931|   408k|                    int x;
 1932|   408k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|   979k|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 571k, False: 408k]
  ------------------
 1934|   571k|                    {
 1935|   571k|                        coef *cf;
 1936|   571k|                        int eob;
 1937|   571k|                        enum TxfmType txtp;
 1938|   571k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 0, False: 571k]
  ------------------
 1939|      0|                            const int p = t->frame_thread.pass & 1;
 1940|      0|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|      0|                            cf = ts->frame_thread[p].cf;
 1942|      0|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|      0|                            eob  = cbi >> 5;
 1944|      0|                            txtp = cbi & 0x1f;
 1945|   571k|                        } else {
 1946|   571k|                            uint8_t cf_ctx;
 1947|   571k|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|   571k|#define bitfn(x) x##_16bpc
  ------------------
 1948|   571k|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|   571k|                                                        bx4 + (x << ss_hor)];
 1950|   571k|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|   571k|                                               &t->l.ccoef[pl][cby4 + y],
 1952|   571k|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|   571k|                                               cf, &txtp, &cf_ctx);
 1954|   571k|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   571k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 571k]
  |  |  ------------------
  |  |   35|   571k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   571k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|   571k|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|   571k|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|   571k|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|   571k|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|   571k|                        }
 1963|   571k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 175k, False: 396k]
  ------------------
 1964|   175k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   175k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 175k]
  |  |  ------------------
  |  |   35|   175k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   175k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   175k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   175k|                                              [txtp](&uvdst[4 * x],
 1968|   175k|                                                     f->cur.stride[1],
 1969|   175k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   175k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1970|   175k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   175k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 175k]
  |  |  ------------------
  |  |   35|   175k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   175k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   175k|                        }
 1974|   571k|                        t->bx += uvtx->w << ss_hor;
 1975|   571k|                    }
 1976|   408k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
 1977|   408k|                    t->bx -= x << ss_hor;
 1978|   408k|                    t->by += uvtx->h << ss_ver;
 1979|   408k|                }
 1980|   369k|                t->by -= y << ss_ver;
 1981|   369k|            }
 1982|   257k|        }
 1983|   251k|    }
 1984|   249k|    return 0;
 1985|   989k|}
dav1d_filter_sbrow_deblock_cols_16bpc:
 1987|  61.6k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|  61.6k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 0, False: 61.6k]
  ------------------
 1989|  61.6k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 43.2k, False: 18.4k]
  |  Branch (1989:50): [True: 37.6k, False: 5.56k]
  ------------------
 1990|  37.6k|    {
 1991|  37.6k|        return;
 1992|  37.6k|    }
 1993|  24.0k|    const int y = sby * f->sb_step * 4;
 1994|  24.0k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|  24.0k|    pixel *const p[3] = {
 1996|  24.0k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 1997|  24.0k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 1998|  24.0k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 1999|  24.0k|    };
 2000|  24.0k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|  24.0k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|  24.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  24.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2002|  24.0k|                                        f->lf.start_of_tile_row[sby]);
 2003|  24.0k|}
dav1d_filter_sbrow_deblock_rows_16bpc:
 2005|  61.6k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|  61.6k|    const int y = sby * f->sb_step * 4;
 2007|  61.6k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|  61.6k|    pixel *const p[3] = {
 2009|  61.6k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2010|  61.6k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2011|  61.6k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2012|  61.6k|    };
 2013|  61.6k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|  61.6k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 61.6k, False: 0]
  ------------------
 2015|  61.6k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 18.4k, False: 43.2k]
  |  Branch (2015:49): [True: 5.56k, False: 37.6k]
  ------------------
 2016|  24.0k|    {
 2017|  24.0k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|  24.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  24.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2018|  24.0k|    }
 2019|  61.6k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 26.0k, False: 35.6k]
  |  Branch (2019:29): [True: 4.95k, False: 30.6k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|  30.9k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|  30.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  30.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2022|  30.9k|    }
 2023|  61.6k|}
dav1d_filter_sbrow_cdef_16bpc:
 2025|  26.0k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|  26.0k|    const Dav1dFrameContext *const f = tc->f;
 2027|  26.0k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 26.0k]
  ------------------
 2028|  26.0k|    const int sbsz = f->sb_step;
 2029|  26.0k|    const int y = sby * sbsz * 4;
 2030|  26.0k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|  26.0k|    pixel *const p[3] = {
 2032|  26.0k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2033|  26.0k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2034|  26.0k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2035|  26.0k|    };
 2036|  26.0k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|  26.0k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|  26.0k|    const int start = sby * sbsz;
 2039|  26.0k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 23.6k, False: 2.40k]
  ------------------
 2040|  23.6k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|  23.6k|        pixel *p_up[3] = {
 2042|  23.6k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
 2043|  23.6k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2044|  23.6k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2045|  23.6k|        };
 2046|  23.6k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|  23.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  23.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2047|  23.6k|    }
 2048|  26.0k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|  26.0k|    const int end = imin(start + n_blks, f->bh);
 2050|  26.0k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|  26.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  26.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2051|  26.0k|}
dav1d_filter_sbrow_resize_16bpc:
 2053|  4.16k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  4.16k|    const int sbsz = f->sb_step;
 2055|  4.16k|    const int y = sby * sbsz * 4;
 2056|  4.16k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  4.16k|    const pixel *const p[3] = {
 2058|  4.16k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2059|  4.16k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2060|  4.16k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2061|  4.16k|    };
 2062|  4.16k|    pixel *const sr_p[3] = {
 2063|  4.16k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2064|  4.16k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2065|  4.16k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2066|  4.16k|    };
 2067|  4.16k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  15.2k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 11.0k, False: 4.16k]
  ------------------
 2069|  11.0k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 6.91k, False: 4.16k]
  |  Branch (2069:34): [True: 3.59k, False: 3.32k]
  ------------------
 2070|  11.0k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  11.0k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  11.0k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
 2073|  11.0k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  11.0k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
 2075|  11.0k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  11.0k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 6.91k, False: 4.16k]
  |  Branch (2076:34): [True: 4.86k, False: 2.04k]
  ------------------
 2077|  11.0k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  11.0k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  11.0k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  11.0k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  11.0k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  11.0k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  11.0k|                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  11.0k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 2085|  11.0k|    }
 2086|  4.16k|}
dav1d_filter_sbrow_lr_16bpc:
 2088|  13.9k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|  13.9k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 13.9k]
  ------------------
 2090|  13.9k|    const int y = sby * f->sb_step * 4;
 2091|  13.9k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|  13.9k|    pixel *const sr_p[3] = {
 2093|  13.9k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2094|  13.9k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2095|  13.9k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2096|  13.9k|    };
 2097|  13.9k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|  13.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  13.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2098|  13.9k|}
dav1d_filter_sbrow_16bpc:
 2100|  61.6k|void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
 2101|  61.6k|    bytefn(dav1d_filter_sbrow_deblock_cols)(f, sby);
  ------------------
  |  |   87|  61.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  61.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2102|  61.6k|    bytefn(dav1d_filter_sbrow_deblock_rows)(f, sby);
  ------------------
  |  |   87|  61.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  61.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2103|  61.6k|    if (f->seq_hdr->cdef)
  ------------------
  |  Branch (2103:9): [True: 26.0k, False: 35.6k]
  ------------------
 2104|  26.0k|        bytefn(dav1d_filter_sbrow_cdef)(f->c->tc, sby);
  ------------------
  |  |   87|  26.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  26.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2105|  61.6k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
  ------------------
  |  Branch (2105:9): [True: 4.16k, False: 57.4k]
  ------------------
 2106|  4.16k|        bytefn(dav1d_filter_sbrow_resize)(f, sby);
  ------------------
  |  |   87|  4.16k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  4.16k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2107|  61.6k|    if (f->lf.restore_planes)
  ------------------
  |  Branch (2107:9): [True: 13.9k, False: 47.6k]
  ------------------
 2108|  13.9k|        bytefn(dav1d_filter_sbrow_lr)(f, sby);
  ------------------
  |  |   87|  13.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  13.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2109|  61.6k|}
dav1d_backup_ipred_edge_16bpc:
 2111|  72.1k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|  72.1k|    const Dav1dFrameContext *const f = t->f;
 2113|  72.1k|    Dav1dTileState *const ts = t->ts;
 2114|  72.1k|    const int sby = t->by >> f->sb_shift;
 2115|  72.1k|    const int sby_off = f->sb128w * 128 * sby;
 2116|  72.1k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|  72.1k|    const pixel *const y =
 2119|  72.1k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|  72.1k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
 2121|  72.1k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   65|  72.1k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2122|  72.1k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|  72.1k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 36.4k, False: 35.6k]
  ------------------
 2125|  36.4k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|  36.4k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|  36.4k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|  36.4k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
 2130|   109k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 72.8k, False: 36.4k]
  ------------------
 2131|  72.8k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   65|  72.8k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2132|  36.4k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|  36.4k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|  36.4k|    }
 2135|  72.1k|}
dav1d_copy_pal_block_y_16bpc:
 2141|  39.1k|{
 2142|  39.1k|    const Dav1dFrameContext *const f = t->f;
 2143|  39.1k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 0, False: 39.1k]
  ------------------
 2144|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|      0|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  39.1k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|  39.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  39.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2147|   223k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 184k, False: 39.1k]
  ------------------
 2148|   184k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   184k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   184k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2149|   156k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 117k, False: 39.1k]
  ------------------
 2150|   117k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   117k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   117k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2151|  39.1k|}
dav1d_copy_pal_block_uv_16bpc:
 2157|  10.0k|{
 2158|  10.0k|    const Dav1dFrameContext *const f = t->f;
 2159|  10.0k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 0, False: 10.0k]
  ------------------
 2160|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|      0|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  10.0k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|  10.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  10.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  30.0k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 20.0k, False: 10.0k]
  ------------------
 2165|   118k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 98.6k, False: 20.0k]
  ------------------
 2166|  98.6k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  98.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  98.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2167|  95.3k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 75.3k, False: 20.0k]
  ------------------
 2168|  75.3k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  75.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  75.3k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2169|  20.0k|    }
 2170|  10.0k|}
dav1d_read_pal_plane_16bpc:
 2175|  49.1k|{
 2176|  49.1k|    Dav1dTileState *const ts = t->ts;
 2177|  49.1k|    const Dav1dFrameContext *const f = t->f;
 2178|  49.1k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  49.1k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  49.1k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  49.1k|    pixel cache[16], used_cache[8];
 2181|  49.1k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 10.0k, False: 39.1k]
  ------------------
 2182|  49.1k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  49.1k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 35.4k, False: 13.6k]
  |  Branch (2184:30): [True: 7.62k, False: 27.8k]
  ------------------
 2185|  49.1k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  49.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  49.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2186|  49.1k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  49.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  49.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|  81.4k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 48.0k, False: 33.3k]
  |  Branch (2189:23): [True: 32.2k, False: 15.7k]
  ------------------
 2190|  32.2k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 15.4k, False: 16.8k]
  ------------------
 2191|  15.4k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 3.32k, False: 12.1k]
  |  Branch (2191:29): [True: 11.5k, False: 562]
  ------------------
 2192|  14.8k|                cache[n_cache++] = *l;
 2193|  15.4k|            l++;
 2194|  15.4k|            l_cache--;
 2195|  16.8k|        } else {
 2196|  16.8k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 5.54k, False: 11.2k]
  ------------------
 2197|  5.54k|                l++;
 2198|  5.54k|                l_cache--;
 2199|  5.54k|            }
 2200|  16.8k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 2.37k, False: 14.4k]
  |  Branch (2200:29): [True: 14.3k, False: 114]
  ------------------
 2201|  16.7k|                cache[n_cache++] = *a;
 2202|  16.8k|            a++;
 2203|  16.8k|            a_cache--;
 2204|  16.8k|        }
 2205|  32.2k|    }
 2206|  49.1k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 15.7k, False: 33.3k]
  ------------------
 2207|  70.6k|        do {
 2208|  70.6k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 13.7k, False: 56.9k]
  |  Branch (2208:29): [True: 47.5k, False: 9.35k]
  ------------------
 2209|  61.2k|                cache[n_cache++] = *l;
 2210|  70.6k|            l++;
 2211|  70.6k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 54.8k, False: 15.7k]
  ------------------
 2212|  33.3k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 10.5k, False: 22.8k]
  ------------------
 2213|  47.7k|        do {
 2214|  47.7k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 7.79k, False: 39.9k]
  |  Branch (2214:29): [True: 34.0k, False: 5.94k]
  ------------------
 2215|  41.8k|                cache[n_cache++] = *a;
 2216|  47.7k|            a++;
 2217|  47.7k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 37.2k, False: 10.5k]
  ------------------
 2218|  10.5k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  49.1k|    int i = 0;
 2222|   164k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 122k, False: 42.8k]
  |  Branch (2222:36): [True: 115k, False: 6.25k]
  ------------------
 2223|   115k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|   115k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 53.1k, False: 62.6k]
  ------------------
 2224|  53.1k|            used_cache[i++] = cache[n];
 2225|  49.1k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  49.1k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 0, False: 49.1k]
  ------------------
 2229|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|      0|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  49.1k|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|  49.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  49.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2232|  49.1k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 41.0k, False: 8.08k]
  ------------------
 2233|  41.0k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [Folded, False: 41.0k]
  ------------------
 2234|  41.0k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  41.0k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 37.1k, False: 3.85k]
  ------------------
 2237|  37.1k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  37.1k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|  97.3k|            do {
 2241|  97.3k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|  97.3k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|  97.3k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 14.3k, False: 83.0k]
  ------------------
 2244|  39.6k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 25.3k, False: 14.3k]
  ------------------
 2245|  25.3k|                        pal[i] = max;
 2246|  14.3k|                    break;
 2247|  14.3k|                }
 2248|  83.0k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|  83.0k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 60.1k, False: 22.8k]
  ------------------
 2250|  37.1k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  41.0k|        int n = 0, m = n_used_cache;
 2254|   236k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 195k, False: 41.0k]
  ------------------
 2255|   195k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 55.0k, False: 140k]
  |  Branch (2255:38): [True: 11.2k, False: 43.7k]
  |  Branch (2255:53): [True: 20.6k, False: 23.0k]
  ------------------
 2256|  31.9k|                pal[i] = used_cache[n++];
 2257|   163k|            } else {
 2258|   163k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 163k, False: 0]
  ------------------
 2259|   163k|                pal[i] = pal[m++];
 2260|   163k|            }
 2261|   195k|        }
 2262|  41.0k|    } else {
 2263|  8.08k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  8.08k|    }
 2265|       |
 2266|  49.1k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  49.1k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 49.1k]
  |  |  ------------------
  |  |   35|  49.1k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  49.1k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  49.1k|}
dav1d_read_pal_uv_16bpc:
 2280|  10.0k|{
 2281|  10.0k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  10.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  10.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  10.0k|    Dav1dTileState *const ts = t->ts;
 2285|  10.0k|    const Dav1dFrameContext *const f = t->f;
 2286|  10.0k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 0, False: 10.0k]
  ------------------
 2287|      0|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|      0|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  10.0k|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  10.0k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  10.0k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2290|  10.0k|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [Folded, False: 10.0k]
  ------------------
 2291|  10.0k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  10.0k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 4.75k, False: 5.26k]
  ------------------
 2292|  4.75k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  4.75k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  4.75k|        const int max = (1 << bpc) - 1;
 2295|  23.0k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 18.2k, False: 4.75k]
  ------------------
 2296|  18.2k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  18.2k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  18.0k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 18.0k, False: 215]
  |  Branch (2297:26): [True: 9.11k, False: 8.93k]
  ------------------
 2298|  18.2k|            prev = pal[i] = (prev + delta) & max;
 2299|  18.2k|        }
 2300|  5.26k|    } else {
 2301|  27.0k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 21.7k, False: 5.26k]
  ------------------
 2302|  21.7k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  5.26k|    }
 2304|  10.0k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  10.0k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 10.0k]
  |  |  ------------------
  |  |   35|  10.0k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  10.0k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  10.0k|}

dav1d_ref_create:
   37|  75.1k|Dav1dRef *dav1d_ref_create(const enum AllocationType type, size_t size) {
   38|  75.1k|    size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
   39|       |
   40|  75.1k|    uint8_t *const data = dav1d_alloc_aligned(type, size + sizeof(Dav1dRef), 64);
  ------------------
  |  |  134|  75.1k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
   41|  75.1k|    if (!data) return NULL;
  ------------------
  |  Branch (41:9): [True: 0, False: 75.1k]
  ------------------
   42|       |
   43|  75.1k|    Dav1dRef *const res = (Dav1dRef*)(data + size);
   44|  75.1k|    res->const_data = res->user_data = res->data = data;
   45|  75.1k|    atomic_init(&res->ref_cnt, 1);
   46|  75.1k|    res->free_ref = 0;
   47|  75.1k|    res->free_callback = default_free_callback;
   48|       |
   49|  75.1k|    return res;
   50|  75.1k|}
dav1d_ref_create_using_pool:
   56|   110k|Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *const pool, size_t size) {
   57|   110k|    void *const buf = dav1d_mem_pool_pop(pool, size);
   58|   110k|    if (!buf) return NULL;
  ------------------
  |  Branch (58:9): [True: 0, False: 110k]
  ------------------
   59|       |
   60|       |    /* Store Dav1dRef inside the Dav1dMemPoolBuffer alignment padding */
   61|   110k|    assert(sizeof(Dav1dMemPoolBuffer) + sizeof(Dav1dRef) <= 64);
  ------------------
  |  Branch (61:5): [True: 110k, Folded]
  ------------------
   62|   110k|    Dav1dRef *const res = &((Dav1dRef*)buf)[-1];
   63|   110k|    res->data = buf;
   64|   110k|    res->const_data = pool;
   65|   110k|    atomic_init(&res->ref_cnt, 1);
   66|   110k|    res->free_ref = 0;
   67|   110k|    res->free_callback = pool_free_callback;
   68|   110k|    res->user_data = buf;
   69|       |
   70|   110k|    return res;
   71|   110k|}
dav1d_ref_dec:
   73|  7.88M|void dav1d_ref_dec(Dav1dRef **const pref) {
   74|  7.88M|    assert(pref != NULL);
  ------------------
  |  Branch (74:5): [True: 7.88M, False: 0]
  ------------------
   75|       |
   76|  7.88M|    Dav1dRef *const ref = *pref;
   77|  7.88M|    if (!ref) return;
  ------------------
  |  Branch (77:9): [True: 5.72M, False: 2.15M]
  ------------------
   78|       |
   79|  2.15M|    *pref = NULL;
   80|  2.15M|    if (atomic_fetch_sub(&ref->ref_cnt, 1) == 1) {
  ------------------
  |  Branch (80:9): [True: 236k, False: 1.91M]
  ------------------
   81|   236k|        const int free_ref = ref->free_ref;
   82|   236k|        ref->free_callback(ref->const_data, ref->user_data);
   83|   236k|        if (free_ref) dav1d_free(ref);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  |  Branch (83:13): [True: 0, False: 236k]
  ------------------
   84|   236k|    }
   85|  2.15M|}
ref.c:default_free_callback:
   32|  75.1k|static void default_free_callback(const uint8_t *const data, void *const user_data) {
   33|  75.1k|    assert(data == user_data);
  ------------------
  |  Branch (33:5): [True: 75.1k, False: 0]
  ------------------
   34|  75.1k|    dav1d_free_aligned(user_data);
  ------------------
  |  |  136|  75.1k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
   35|  75.1k|}
ref.c:pool_free_callback:
   52|   110k|static void pool_free_callback(const uint8_t *const data, void *const user_data) {
   53|   110k|    dav1d_mem_pool_push((Dav1dMemPool*)data, user_data);
   54|   110k|}

obu.c:dav1d_ref_is_writable:
   73|  73.2k|static inline int dav1d_ref_is_writable(Dav1dRef *const ref) {
   74|  73.2k|    return atomic_load(&ref->ref_cnt) == 1 && ref->data;
  ------------------
  |  Branch (74:12): [True: 73.2k, False: 0]
  |  Branch (74:47): [True: 73.2k, False: 0]
  ------------------
   75|  73.2k|}
obu.c:dav1d_ref_init:
   59|    575|{
   60|    575|    ref->data = NULL;
   61|    575|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|    575|    ref->free_ref = free_ref;
   64|    575|    ref->free_callback = free_callback;
   65|    575|    ref->user_data = user_data;
   66|    575|    return ref;
   67|    575|}
obu.c:dav1d_ref_inc:
   69|  2.53k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  2.53k|}
picture.c:dav1d_ref_inc:
   69|  1.54M|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  1.54M|}
picture.c:dav1d_ref_init:
   59|  49.8k|{
   60|  49.8k|    ref->data = NULL;
   61|  49.8k|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|  49.8k|    ref->free_ref = free_ref;
   64|  49.8k|    ref->free_callback = free_callback;
   65|  49.8k|    ref->user_data = user_data;
   66|  49.8k|    return ref;
   67|  49.8k|}
cdf.c:dav1d_ref_inc:
   69|  80.7k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  80.7k|}
data.c:dav1d_ref_inc:
   69|   120k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   120k|}
decode.c:dav1d_ref_inc:
   69|   171k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   171k|}

dav1d_refmvs_find:
  354|  1.70M|{
  355|  1.70M|    const refmvs_frame *const rf = rt->rf;
  356|  1.70M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  357|  1.70M|    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
  358|  1.70M|    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
  359|  1.70M|    mv gmv[2], tgmv[2];
  360|       |
  361|  1.70M|    *cnt = 0;
  362|  1.70M|    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
  ------------------
  |  Branch (362:5): [True: 1.70M, False: 0]
  |  Branch (362:5): [True: 1.70M, False: 0]
  |  Branch (362:5): [True: 1.70M, False: 0]
  |  Branch (362:5): [True: 1.70M, False: 0]
  ------------------
  363|  1.70M|           ref.ref[1] >= -1 && ref.ref[1] <= 8);
  364|  1.70M|    if (ref.ref[0] > 0) {
  ------------------
  |  Branch (364:9): [True: 1.09M, False: 604k]
  ------------------
  365|  1.09M|        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
  366|  1.09M|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  367|  1.09M|        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (367:18): [True: 173k, False: 924k]
  ------------------
  368|   924k|                 tgmv[0] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   924k|#define INVALID_MV 0x80008000
  ------------------
  369|  1.09M|    } else {
  370|   604k|        tgmv[0] = (mv) { .n = 0 };
  371|   604k|        gmv[0] = (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   604k|#define INVALID_MV 0x80008000
  ------------------
  372|   604k|    }
  373|  1.70M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (373:9): [True: 192k, False: 1.51M]
  ------------------
  374|   192k|        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
  375|   192k|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  376|   192k|        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (376:18): [True: 22.0k, False: 170k]
  ------------------
  377|   170k|                 tgmv[1] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   170k|#define INVALID_MV 0x80008000
  ------------------
  378|   192k|    }
  379|       |
  380|       |    // top
  381|  1.70M|    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
  382|  1.70M|    unsigned max_rows = 0, n_rows = ~0;
  383|  1.70M|    const refmvs_block *b_top;
  384|  1.70M|    if (by4 > rt->tile_row.start) {
  ------------------
  |  Branch (384:9): [True: 1.23M, False: 472k]
  ------------------
  385|  1.23M|        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
  386|  1.23M|        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
  387|  1.23M|        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
  388|  1.23M|                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (388:46): [True: 116k, False: 1.11M]
  ------------------
  389|  1.23M|                          &have_newmv, &have_row_mvs);
  390|  1.23M|    }
  391|       |
  392|       |    // left
  393|  1.70M|    unsigned max_cols = 0, n_cols = ~0U;
  394|  1.70M|    refmvs_block *const *b_left;
  395|  1.70M|    if (bx4 > rt->tile_col.start) {
  ------------------
  |  Branch (395:9): [True: 1.62M, False: 78.5k]
  ------------------
  396|  1.62M|        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
  397|  1.62M|        b_left = &rt->r[(by4 & 31) + 5];
  398|  1.62M|        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
  399|  1.62M|                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (399:55): [True: 223k, False: 1.40M]
  ------------------
  400|  1.62M|                          &have_newmv, &have_col_mvs);
  401|  1.62M|    }
  402|       |
  403|       |    // top/right
  404|  1.70M|    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
  ------------------
  |  Branch (404:9): [True: 1.23M, False: 472k]
  |  Branch (404:26): [True: 723k, False: 507k]
  ------------------
  405|   723k|        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
  ------------------
  |  Branch (405:9): [True: 690k, False: 32.2k]
  |  Branch (405:33): [True: 647k, False: 43.9k]
  ------------------
  406|   647k|    {
  407|   647k|        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
  408|   647k|                              &have_newmv, &have_row_mvs);
  409|   647k|    }
  410|       |
  411|  1.70M|    const int nearest_match = have_col_mvs + have_row_mvs;
  412|  1.70M|    const int nearest_cnt = *cnt;
  413|  3.77M|    for (int n = 0; n < nearest_cnt; n++)
  ------------------
  |  Branch (413:21): [True: 2.07M, False: 1.70M]
  ------------------
  414|  2.07M|        mvstack[n].weight += 640;
  415|       |
  416|       |    // temporal
  417|  1.70M|    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
  418|  1.70M|    if (rf->use_ref_frame_mvs) {
  ------------------
  |  Branch (418:9): [True: 264k, False: 1.43M]
  ------------------
  419|   264k|        const ptrdiff_t stride = rf->rp_stride;
  420|   264k|        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
  421|   264k|        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
  422|   264k|        const refmvs_temporal_block *rb = rbi;
  423|   264k|        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
  ------------------
  |  Branch (423:28): [True: 12.3k, False: 251k]
  |  Branch (423:56): [True: 17.0k, False: 246k]
  ------------------
  424|   264k|        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
  425|   725k|        for (int y = 0; y < h8; y += step_v) {
  ------------------
  |  Branch (425:25): [True: 461k, False: 264k]
  ------------------
  426|  1.42M|            for (int x = 0; x < w8; x+= step_h) {
  ------------------
  |  Branch (426:29): [True: 962k, False: 461k]
  ------------------
  427|   962k|                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
  428|   962k|                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
  ------------------
  |  Branch (428:40): [True: 264k, False: 698k]
  ------------------
  429|   962k|            }
  430|   461k|            rb += stride * step_v;
  431|   461k|        }
  432|   264k|        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
  ------------------
  |  Branch (432:13): [True: 188k, False: 76.0k]
  |  Branch (432:36): [True: 169k, False: 18.7k]
  ------------------
  433|   169k|            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
  434|   169k|            rb = &rbi[bh8 * stride];
  435|   169k|            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
  436|   169k|                                                    (by8 & ~7) + 8);
  437|   169k|            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
  ------------------
  |  Branch (437:17): [True: 123k, False: 45.3k]
  |  Branch (437:31): [True: 95.1k, False: 28.6k]
  ------------------
  438|  95.1k|                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
  439|  95.1k|                                       NULL, NULL);
  440|  95.1k|            }
  441|   169k|            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
  ------------------
  |  Branch (441:17): [True: 125k, False: 43.4k]
  ------------------
  442|   125k|                if (has_bottom) {
  ------------------
  |  Branch (442:21): [True: 94.2k, False: 31.5k]
  ------------------
  443|  94.2k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
  444|  94.2k|                                           NULL, NULL);
  445|  94.2k|                }
  446|   125k|                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
  ------------------
  |  Branch (446:21): [True: 125k, False: 820]
  ------------------
  447|   125k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
  448|   125k|                                           ref, NULL, NULL);
  449|   125k|                }
  450|   125k|            }
  451|   169k|        }
  452|   264k|    }
  453|  1.70M|    assert(*cnt <= 8);
  ------------------
  |  Branch (453:5): [True: 1.70M, False: 0]
  ------------------
  454|       |
  455|       |    // top/left (which, confusingly, is part of "secondary" references)
  456|  1.70M|    int have_dummy_newmv_match;
  457|  1.70M|    if ((n_rows | n_cols) != ~0U) {
  ------------------
  |  Branch (457:9): [True: 1.16M, False: 539k]
  ------------------
  458|  1.16M|        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
  459|  1.16M|                              &have_dummy_newmv_match, &have_row_mvs);
  460|  1.16M|    }
  461|       |
  462|       |    // "secondary" (non-direct neighbour) top & left edges
  463|       |    // what is different about secondary is that everything is now in 8x8 resolution
  464|  5.10M|    for (int n = 2; n <= 3; n++) {
  ------------------
  |  Branch (464:21): [True: 3.40M, False: 1.70M]
  ------------------
  465|  3.40M|        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
  ------------------
  |  Branch (465:13): [True: 1.74M, False: 1.66M]
  |  Branch (465:38): [True: 1.18M, False: 557k]
  ------------------
  466|  1.18M|            n_rows += scan_row(mvstack, cnt, ref, gmv,
  467|  1.18M|                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
  468|  1.18M|                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (468:59): [True: 17.8k, False: 1.16M]
  ------------------
  469|  1.18M|                               &have_dummy_newmv_match, &have_row_mvs);
  470|  1.18M|        }
  471|       |
  472|  3.40M|        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
  ------------------
  |  Branch (472:13): [True: 2.42M, False: 983k]
  |  Branch (472:38): [True: 1.86M, False: 556k]
  ------------------
  473|  1.86M|            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
  474|  1.86M|                               bh4, h4, (bx4 - n * 2 + 1) | 1,
  475|  1.86M|                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (475:50): [True: 160k, False: 1.70M]
  ------------------
  476|  1.86M|                               &have_dummy_newmv_match, &have_col_mvs);
  477|  1.86M|        }
  478|  3.40M|    }
  479|  1.70M|    assert(*cnt <= 8);
  ------------------
  |  Branch (479:5): [True: 1.70M, False: 0]
  ------------------
  480|       |
  481|  1.70M|    const int ref_match_count = have_col_mvs + have_row_mvs;
  482|       |
  483|       |    // context build-up
  484|  1.70M|    int refmv_ctx, newmv_ctx;
  485|  1.70M|    switch (nearest_match) {
  ------------------
  |  Branch (485:13): [True: 1.70M, False: 0]
  ------------------
  486|   214k|    case 0:
  ------------------
  |  Branch (486:5): [True: 214k, False: 1.48M]
  ------------------
  487|   214k|        refmv_ctx = imin(2, ref_match_count);
  488|   214k|        newmv_ctx = ref_match_count > 0;
  489|   214k|        break;
  490|   711k|    case 1:
  ------------------
  |  Branch (490:5): [True: 711k, False: 991k]
  ------------------
  491|   711k|        refmv_ctx = imin(ref_match_count * 3, 4);
  492|   711k|        newmv_ctx = 3 - have_newmv;
  493|   711k|        break;
  494|   776k|    case 2:
  ------------------
  |  Branch (494:5): [True: 776k, False: 926k]
  ------------------
  495|   776k|        refmv_ctx = 5;
  496|   776k|        newmv_ctx = 5 - have_newmv;
  497|   776k|        break;
  498|  1.70M|    }
  499|       |
  500|       |    // sorting (nearest, then "secondary")
  501|  1.70M|    int len = nearest_cnt;
  502|  3.45M|    while (len) {
  ------------------
  |  Branch (502:12): [True: 1.75M, False: 1.70M]
  ------------------
  503|  1.75M|        int last = 0;
  504|  2.44M|        for (int n = 1; n < len; n++) {
  ------------------
  |  Branch (504:25): [True: 689k, False: 1.75M]
  ------------------
  505|   689k|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (505:17): [True: 292k, False: 397k]
  ------------------
  506|   292k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  507|   292k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   292k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 292k]
  |  |  ------------------
  ------------------
  508|   292k|                last = n;
  509|   292k|            }
  510|   689k|        }
  511|  1.75M|        len = last;
  512|  1.75M|    }
  513|  1.70M|    len = *cnt;
  514|  2.68M|    while (len > nearest_cnt) {
  ------------------
  |  Branch (514:12): [True: 982k, False: 1.70M]
  ------------------
  515|   982k|        int last = nearest_cnt;
  516|  1.59M|        for (int n = nearest_cnt + 1; n < len; n++) {
  ------------------
  |  Branch (516:39): [True: 611k, False: 982k]
  ------------------
  517|   611k|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (517:17): [True: 173k, False: 438k]
  ------------------
  518|   173k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   173k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 173k]
  |  |  ------------------
  ------------------
  519|   173k|#undef EXCHANGE
  520|   173k|                last = n;
  521|   173k|            }
  522|   611k|        }
  523|   982k|        len = last;
  524|   982k|    }
  525|       |
  526|  1.70M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (526:9): [True: 192k, False: 1.51M]
  ------------------
  527|   192k|        if (*cnt < 2) {
  ------------------
  |  Branch (527:13): [True: 129k, False: 63.3k]
  ------------------
  528|   129k|            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
  529|   129k|            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
  530|   129k|            const int sz4 = imin(w4, h4);
  531|   129k|            refmvs_candidate *const same = &mvstack[*cnt];
  532|   129k|            int same_count[4] = { 0 };
  533|       |
  534|       |            // non-self references in top
  535|   233k|            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
  ------------------
  |  Branch (535:17): [True: 111k, False: 17.7k]
  |  Branch (535:48): [True: 121k, False: 111k]
  ------------------
  536|   121k|                const refmvs_block *const cand_b = &b_top[x];
  537|   121k|                add_compound_extended_candidate(same, same_count, cand_b,
  538|   121k|                                                sign0, sign1, ref, rf->sign_bias);
  539|   121k|                x += dav1d_block_dimensions[cand_b->bs][0];
  540|   121k|            }
  541|       |
  542|       |            // non-self references in left
  543|   264k|            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
  ------------------
  |  Branch (543:17): [True: 122k, False: 6.85k]
  |  Branch (543:48): [True: 142k, False: 122k]
  ------------------
  544|   142k|                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  545|   142k|                add_compound_extended_candidate(same, same_count, cand_b,
  546|   142k|                                                sign0, sign1, ref, rf->sign_bias);
  547|   142k|                y += dav1d_block_dimensions[cand_b->bs][1];
  548|   142k|            }
  549|       |
  550|   129k|            refmvs_candidate *const diff = &same[2];
  551|   129k|            const int *const diff_count = &same_count[2];
  552|       |
  553|       |            // merge together
  554|   388k|            for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (554:29): [True: 258k, False: 129k]
  ------------------
  555|   258k|                int m = same_count[n];
  556|       |
  557|   258k|                if (m >= 2) continue;
  ------------------
  |  Branch (557:21): [True: 82.7k, False: 176k]
  ------------------
  558|       |
  559|   176k|                const int l = diff_count[n];
  560|   176k|                if (l) {
  ------------------
  |  Branch (560:21): [True: 162k, False: 13.6k]
  ------------------
  561|   162k|                    same[m].mv.mv[n] = diff[0].mv.mv[n];
  562|   162k|                    if (++m == 2) continue;
  ------------------
  |  Branch (562:25): [True: 106k, False: 56.2k]
  ------------------
  563|  56.2k|                    if (l == 2) {
  ------------------
  |  Branch (563:25): [True: 46.6k, False: 9.63k]
  ------------------
  564|  46.6k|                        same[1].mv.mv[n] = diff[1].mv.mv[n];
  565|  46.6k|                        continue;
  566|  46.6k|                    }
  567|  56.2k|                }
  568|  30.9k|                do {
  569|  30.9k|                    same[m].mv.mv[n] = tgmv[n];
  570|  30.9k|                } while (++m < 2);
  ------------------
  |  Branch (570:26): [True: 7.67k, False: 23.2k]
  ------------------
  571|  23.2k|            }
  572|       |
  573|       |            // if the first extended was the same as the non-extended one,
  574|       |            // then replace it with the second extended one
  575|   129k|            int n = *cnt;
  576|   129k|            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
  ------------------
  |  Branch (576:17): [True: 74.7k, False: 54.7k]
  |  Branch (576:27): [True: 53.4k, False: 21.2k]
  ------------------
  577|  53.4k|                mvstack[1].mv = mvstack[2].mv;
  578|   184k|            do {
  579|   184k|                mvstack[n].weight = 2;
  580|   184k|            } while (++n < 2);
  ------------------
  |  Branch (580:22): [True: 54.7k, False: 129k]
  ------------------
  581|   129k|            *cnt = 2;
  582|   129k|        }
  583|       |
  584|       |        // clamping
  585|   192k|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  586|   192k|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  587|   192k|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  588|   192k|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  589|       |
  590|   192k|        const int n_refmvs = *cnt;
  591|   192k|        int n = 0;
  592|   431k|        do {
  593|   431k|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  594|   431k|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  595|   431k|            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
  596|   431k|            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
  597|   431k|        } while (++n < n_refmvs);
  ------------------
  |  Branch (597:18): [True: 238k, False: 192k]
  ------------------
  598|       |
  599|   192k|        switch (refmv_ctx >> 1) {
  ------------------
  |  Branch (599:17): [True: 192k, False: 0]
  ------------------
  600|  67.8k|        case 0:
  ------------------
  |  Branch (600:9): [True: 67.8k, False: 124k]
  ------------------
  601|  67.8k|            *ctx = imin(newmv_ctx, 1);
  602|  67.8k|            break;
  603|  66.7k|        case 1:
  ------------------
  |  Branch (603:9): [True: 66.7k, False: 125k]
  ------------------
  604|  66.7k|            *ctx = 1 + imin(newmv_ctx, 3);
  605|  66.7k|            break;
  606|  58.1k|        case 2:
  ------------------
  |  Branch (606:9): [True: 58.1k, False: 134k]
  ------------------
  607|  58.1k|            *ctx = iclip(3 + newmv_ctx, 4, 7);
  608|  58.1k|            break;
  609|   192k|        }
  610|       |
  611|   192k|        return;
  612|  1.51M|    } else if (*cnt < 2 && ref.ref[0] > 0) {
  ------------------
  |  Branch (612:16): [True: 645k, False: 865k]
  |  Branch (612:28): [True: 545k, False: 100k]
  ------------------
  613|   545k|        const int sign = rf->sign_bias[ref.ref[0] - 1];
  614|   545k|        const int sz4 = imin(w4, h4);
  615|       |
  616|       |        // non-self references in top
  617|   990k|        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (617:13): [True: 477k, False: 67.3k]
  |  Branch (617:44): [True: 514k, False: 475k]
  |  Branch (617:55): [True: 512k, False: 2.18k]
  ------------------
  618|   512k|            const refmvs_block *const cand_b = &b_top[x];
  619|   512k|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  620|   512k|            x += dav1d_block_dimensions[cand_b->bs][0];
  621|   512k|        }
  622|       |
  623|       |        // non-self references in left
  624|   955k|        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (624:13): [True: 482k, False: 63.0k]
  |  Branch (624:44): [True: 520k, False: 435k]
  |  Branch (624:55): [True: 473k, False: 46.7k]
  ------------------
  625|   473k|            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  626|   473k|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  627|   473k|            y += dav1d_block_dimensions[cand_b->bs][1];
  628|   473k|        }
  629|   545k|    }
  630|  1.70M|    assert(*cnt <= 8);
  ------------------
  |  Branch (630:5): [True: 1.51M, False: 0]
  ------------------
  631|       |
  632|       |    // clamping
  633|  1.51M|    int n_refmvs = *cnt;
  634|  1.51M|    if (n_refmvs) {
  ------------------
  |  Branch (634:9): [True: 1.43M, False: 75.0k]
  ------------------
  635|  1.43M|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  636|  1.43M|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  637|  1.43M|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  638|  1.43M|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  639|       |
  640|  1.43M|        int n = 0;
  641|  3.30M|        do {
  642|  3.30M|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  643|  3.30M|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  644|  3.30M|        } while (++n < n_refmvs);
  ------------------
  |  Branch (644:18): [True: 1.87M, False: 1.43M]
  ------------------
  645|  1.43M|    }
  646|       |
  647|  2.13M|    for (int n = *cnt; n < 2; n++)
  ------------------
  |  Branch (647:24): [True: 624k, False: 1.51M]
  ------------------
  648|   624k|        mvstack[n].mv.mv[0] = tgmv[0];
  649|       |
  650|  1.51M|    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
  651|  1.51M|}
dav1d_refmvs_tile_sbrow_init:
  657|   130k|{
  658|   130k|    if (rf->n_tile_threads == 1) tile_row_idx = 0;
  ------------------
  |  Branch (658:9): [True: 130k, False: 0]
  ------------------
  659|   130k|    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
  660|   130k|    const ptrdiff_t r_stride = rf->rp_stride * 2;
  661|   130k|    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
  ------------------
  |  Branch (661:33): [True: 0, False: 130k]
  |  Branch (661:60): [True: 0, False: 0]
  ------------------
  662|   130k|        35 * 2 * rf->n_blocks : 0;
  663|   130k|    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
  664|   130k|    const int sbsz = rf->sbsz;
  665|   130k|    const int off = (sbsz * sby) & 16;
  666|  3.47M|    for (int i = 0; i < sbsz; i++, r += r_stride)
  ------------------
  |  Branch (666:21): [True: 3.34M, False: 130k]
  ------------------
  667|  3.34M|        rt->r[off + 5 + i] = r;
  668|   130k|    rt->r[off + 0] = r;
  669|   130k|    r += r_stride;
  670|   130k|    rt->r[off + 1] = NULL;
  671|   130k|    rt->r[off + 2] = r;
  672|   130k|    r += r_stride;
  673|   130k|    rt->r[off + 3] = NULL;
  674|   130k|    rt->r[off + 4] = r;
  675|   130k|    if (sby & 1) {
  ------------------
  |  Branch (675:9): [True: 51.8k, False: 78.6k]
  ------------------
  676|  51.8k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  677|  51.8k|        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
  ------------------
  |  |  676|  51.8k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 51.8k]
  |  |  ------------------
  ------------------
  678|  51.8k|        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
  ------------------
  |  |  676|  51.8k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 51.8k]
  |  |  ------------------
  ------------------
  679|  51.8k|        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
  ------------------
  |  |  676|  51.8k|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 51.8k]
  |  |  ------------------
  ------------------
  680|  51.8k|#undef EXCHANGE
  681|  51.8k|    }
  682|       |
  683|   130k|    rt->rf = rf;
  684|   130k|    rt->tile_row.start = tile_row_start4;
  685|   130k|    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
  686|   130k|    rt->tile_col.start = tile_col_start4;
  687|   130k|    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
  688|   130k|}
dav1d_refmvs_init_frame:
  807|  28.3k|{
  808|  28.3k|    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
  809|  28.3k|    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
  ------------------
  |  Branch (809:29): [True: 0, False: 28.3k]
  ------------------
  810|  28.3k|    const int n_blocks = rp_stride * n_tile_rows;
  811|       |
  812|  28.3k|    rf->sbsz = 16 << seq_hdr->sb128;
  813|  28.3k|    rf->frm_hdr = frm_hdr;
  814|  28.3k|    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
  815|  28.3k|    rf->ih8 = (frm_hdr->height + 7) >> 3;
  816|  28.3k|    rf->iw4 = rf->iw8 << 1;
  817|  28.3k|    rf->ih4 = rf->ih8 << 1;
  818|  28.3k|    rf->rp = rp;
  819|  28.3k|    rf->rp_stride = rp_stride;
  820|  28.3k|    rf->n_tile_threads = n_tile_threads;
  821|  28.3k|    rf->n_frame_threads = n_frame_threads;
  822|       |
  823|  28.3k|    if (n_blocks != rf->n_blocks) {
  ------------------
  |  Branch (823:9): [True: 6.60k, False: 21.7k]
  ------------------
  824|  6.60k|        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
  825|  6.60k|        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
  826|       |        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
  827|       |         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
  828|  6.60k|        dav1d_free_aligned(rf->r);
  ------------------
  |  |  136|  6.60k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  829|  6.60k|        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
  ------------------
  |  |  134|  6.60k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  830|  6.60k|        if (!rf->r) {
  ------------------
  |  Branch (830:13): [True: 0, False: 6.60k]
  ------------------
  831|      0|            rf->n_blocks = 0;
  832|      0|            return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  833|      0|        }
  834|       |
  835|  6.60k|        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
  836|  6.60k|        rf->n_blocks = n_blocks;
  837|  6.60k|    }
  838|       |
  839|  28.3k|    const int poc = frm_hdr->frame_offset;
  840|   226k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (840:21): [True: 198k, False: 28.3k]
  ------------------
  841|   198k|        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
  842|   198k|                                          ref_poc[i], poc);
  843|   198k|        rf->sign_bias[i] = poc_diff > 0;
  844|   198k|        rf->mfmv_sign[i] = poc_diff < 0;
  845|   198k|        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
  846|   198k|                                            poc, ref_poc[i]), -31, 31);
  847|   198k|    }
  848|       |
  849|       |    // temporal MV setup
  850|  28.3k|    rf->n_mfmvs = 0;
  851|  28.3k|    rf->rp_ref = rp_ref;
  852|  28.3k|    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
  ------------------
  |  Branch (852:9): [True: 7.78k, False: 20.5k]
  |  Branch (852:39): [True: 7.78k, False: 0]
  ------------------
  853|  7.78k|        int total = 2;
  854|  7.78k|        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
  ------------------
  |  Branch (854:13): [True: 5.06k, False: 2.72k]
  |  Branch (854:26): [True: 3.69k, False: 1.37k]
  ------------------
  855|  3.69k|            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
  856|  3.69k|            total = 3;
  857|  3.69k|        }
  858|  7.78k|        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
  ------------------
  |  Branch (858:13): [True: 4.54k, False: 3.24k]
  |  Branch (858:26): [True: 1.05k, False: 3.49k]
  ------------------
  859|  4.54k|                                      frm_hdr->frame_offset) > 0)
  860|  1.05k|        {
  861|  1.05k|            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
  862|  1.05k|        }
  863|  7.78k|        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
  ------------------
  |  Branch (863:13): [True: 5.26k, False: 2.52k]
  |  Branch (863:26): [True: 1.02k, False: 4.24k]
  ------------------
  864|  5.26k|                                      frm_hdr->frame_offset) > 0)
  865|  1.02k|        {
  866|  1.02k|            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
  867|  1.02k|        }
  868|  7.78k|        if (rf->n_mfmvs < total && rp_ref[6] &&
  ------------------
  |  Branch (868:13): [True: 6.97k, False: 813]
  |  Branch (868:36): [True: 4.45k, False: 2.52k]
  ------------------
  869|  4.45k|            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
  ------------------
  |  Branch (869:13): [True: 3.35k, False: 1.09k]
  ------------------
  870|  4.45k|                         frm_hdr->frame_offset) > 0)
  871|  3.35k|        {
  872|  3.35k|            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
  873|  3.35k|        }
  874|  7.78k|        if (rf->n_mfmvs < total && rp_ref[1])
  ------------------
  |  Branch (874:13): [True: 6.56k, False: 1.22k]
  |  Branch (874:36): [True: 4.01k, False: 2.55k]
  ------------------
  875|  4.01k|            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
  876|       |
  877|  20.9k|        for (int n = 0; n < rf->n_mfmvs; n++) {
  ------------------
  |  Branch (877:25): [True: 13.1k, False: 7.78k]
  ------------------
  878|  13.1k|            const int rpoc = ref_poc[rf->mfmv_ref[n]];
  879|  13.1k|            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
  880|  13.1k|                                           rpoc, frm_hdr->frame_offset);
  881|  13.1k|            if (abs(diff1) > 31) {
  ------------------
  |  Branch (881:17): [True: 418, False: 12.7k]
  ------------------
  882|    418|                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
  ------------------
  |  |   41|    418|#define INVALID_REF2CUR (-32)
  ------------------
  883|  12.7k|            } else {
  884|  12.7k|                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
  ------------------
  |  Branch (884:39): [True: 7.56k, False: 5.15k]
  ------------------
  885|   101k|                for (int m = 0; m < 7; m++) {
  ------------------
  |  Branch (885:33): [True: 89.0k, False: 12.7k]
  ------------------
  886|  89.0k|                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
  887|  89.0k|                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
  888|  89.0k|                                                   rpoc, rrpoc);
  889|       |                    // unsigned comparison also catches the < 0 case
  890|  89.0k|                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
  ------------------
  |  Branch (890:46): [True: 26.3k, False: 62.6k]
  ------------------
  891|  89.0k|                }
  892|  12.7k|            }
  893|  13.1k|        }
  894|  7.78k|    }
  895|  28.3k|    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
  896|       |
  897|  28.3k|    return 0;
  898|  28.3k|}
dav1d_refmvs_dsp_init:
  921|  9.69k|{
  922|  9.69k|    c->load_tmvs = load_tmvs_c;
  923|  9.69k|    c->save_tmvs = save_tmvs_c;
  924|  9.69k|    c->splat_mv = splat_mv_c;
  925|       |
  926|  9.69k|#if HAVE_ASM
  927|       |#if ARCH_AARCH64 || ARCH_ARM
  928|       |    refmvs_dsp_init_arm(c);
  929|       |#elif ARCH_LOONGARCH64
  930|       |    refmvs_dsp_init_loongarch(c);
  931|       |#elif ARCH_X86
  932|       |    refmvs_dsp_init_x86(c);
  933|  9.69k|#endif
  934|  9.69k|#endif
  935|  9.69k|}
refmvs.c:scan_row:
  102|  2.41M|{
  103|  2.41M|    const refmvs_block *cand_b = b;
  104|  2.41M|    const enum BlockSize first_cand_bs = cand_b->bs;
  105|  2.41M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  106|  2.41M|    int cand_bw4 = first_cand_b_dim[0];
  107|  2.41M|    int len = imax(step, imin(bw4, cand_bw4));
  108|       |
  109|  2.41M|    if (bw4 <= cand_bw4) {
  ------------------
  |  Branch (109:9): [True: 2.09M, False: 314k]
  ------------------
  110|       |        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
  111|       |        // position of the first block has to be odd already, i.e. not just
  112|       |        // for row_offset=-3/-5
  113|       |        // FIXME why can this not be cand_bw4?
  114|  2.09M|        const int weight = bw4 == 1 ? 2 :
  ------------------
  |  Branch (114:28): [True: 638k, False: 1.46M]
  ------------------
  115|  2.09M|                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
  116|  2.09M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  117|  2.09M|                              have_newmv_match, have_refmv_match);
  118|  2.09M|        return weight >> 1;
  119|  2.09M|    }
  120|       |
  121|   652k|    for (int x = 0;;) {
  122|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  123|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  124|       |        // the weight here
  125|   652k|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  126|   652k|                              have_newmv_match, have_refmv_match);
  127|   652k|        x += len;
  128|   652k|        if (x >= w4) return 1;
  ------------------
  |  Branch (128:13): [True: 314k, False: 337k]
  ------------------
  129|   337k|        cand_b = &b[x];
  130|   337k|        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
  131|   337k|        assert(cand_bw4 < bw4);
  ------------------
  |  Branch (131:9): [True: 337k, False: 0]
  ------------------
  132|   337k|        len = imax(step, cand_bw4);
  133|   337k|    }
  134|   314k|}
refmvs.c:scan_col:
  141|  3.49M|{
  142|  3.49M|    const refmvs_block *cand_b = &b[0][bx4];
  143|  3.49M|    const enum BlockSize first_cand_bs = cand_b->bs;
  144|  3.49M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  145|  3.49M|    int cand_bh4 = first_cand_b_dim[1];
  146|  3.49M|    int len = imax(step, imin(bh4, cand_bh4));
  147|       |
  148|  3.49M|    if (bh4 <= cand_bh4) {
  ------------------
  |  Branch (148:9): [True: 3.05M, False: 433k]
  ------------------
  149|       |        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
  150|       |        // position of the first block has to be odd already, i.e. not just
  151|       |        // for col_offset=-3/-5
  152|       |        // FIXME why can this not be cand_bh4?
  153|  3.05M|        const int weight = bh4 == 1 ? 2 :
  ------------------
  |  Branch (153:28): [True: 1.12M, False: 1.92M]
  ------------------
  154|  3.05M|                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
  155|  3.05M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  156|  3.05M|                            have_newmv_match, have_refmv_match);
  157|  3.05M|        return weight >> 1;
  158|  3.05M|    }
  159|       |
  160|   848k|    for (int y = 0;;) {
  161|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  162|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  163|       |        // the weight here
  164|   848k|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  165|   848k|                              have_newmv_match, have_refmv_match);
  166|   848k|        y += len;
  167|   848k|        if (y >= h4) return 1;
  ------------------
  |  Branch (167:13): [True: 433k, False: 415k]
  ------------------
  168|   415k|        cand_b = &b[y][bx4];
  169|   415k|        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
  170|   415k|        assert(cand_bh4 < bh4);
  ------------------
  |  Branch (170:9): [True: 415k, False: 0]
  ------------------
  171|   415k|        len = imax(step, cand_bh4);
  172|   415k|    }
  173|   433k|}
refmvs.c:add_spatial_candidate:
   46|  8.46M|{
   47|  8.46M|    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
  ------------------
  |  |   40|  8.46M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (47:9): [True: 808k, False: 7.66M]
  ------------------
   48|       |
   49|  7.66M|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (49:9): [True: 6.55M, False: 1.10M]
  ------------------
   50|  8.39M|        for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (50:25): [True: 7.53M, False: 858k]
  ------------------
   51|  7.53M|            if (b->ref.ref[n] == ref.ref[0]) {
  ------------------
  |  Branch (51:17): [True: 5.69M, False: 1.84M]
  ------------------
   52|  5.69M|                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
  ------------------
  |  |   40|  1.49M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (52:37): [True: 1.49M, False: 4.19M]
  |  Branch (52:52): [True: 185k, False: 1.31M]
  ------------------
   53|  5.50M|                                   gmv[0] : b->mv.mv[n];
   54|       |
   55|  5.69M|                *have_refmv_match = 1;
   56|  5.69M|                *have_newmv_match |= b->mf >> 1;
   57|       |
   58|  5.69M|                const int last = *cnt;
   59|  9.48M|                for (int m = 0; m < last; m++)
  ------------------
  |  Branch (59:33): [True: 6.42M, False: 3.05M]
  ------------------
   60|  6.42M|                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
  ------------------
  |  Branch (60:25): [True: 2.64M, False: 3.78M]
  ------------------
   61|  2.64M|                        mvstack[m].weight += weight;
   62|  2.64M|                        return;
   63|  2.64M|                    }
   64|       |
   65|  3.05M|                if (last < 8) {
  ------------------
  |  Branch (65:21): [True: 3.04M, False: 8.19k]
  ------------------
   66|  3.04M|                    mvstack[last].mv.mv[0] = cand_mv;
   67|  3.04M|                    mvstack[last].weight = weight;
   68|  3.04M|                    *cnt = last + 1;
   69|  3.04M|                }
   70|  3.05M|                return;
   71|  5.69M|            }
   72|  7.53M|        }
   73|  6.55M|    } else if (b->ref.pair == ref.pair) {
  ------------------
  |  Branch (73:16): [True: 390k, False: 717k]
  ------------------
   74|   390k|        const refmvs_mvpair cand_mv = { .mv = {
   75|   390k|            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
  ------------------
  |  |   40|  20.2k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (75:20): [True: 20.2k, False: 370k]
  |  Branch (75:35): [True: 4.35k, False: 15.8k]
  ------------------
   76|   390k|            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
  ------------------
  |  |   40|  20.2k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (76:20): [True: 20.2k, False: 370k]
  |  Branch (76:35): [True: 2.98k, False: 17.2k]
  ------------------
   77|   390k|        }};
   78|       |
   79|   390k|        *have_refmv_match = 1;
   80|   390k|        *have_newmv_match |= b->mf >> 1;
   81|       |
   82|   390k|        const int last = *cnt;
   83|   581k|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (83:25): [True: 353k, False: 227k]
  ------------------
   84|   353k|            if (mvstack[n].mv.n == cand_mv.n) {
  ------------------
  |  Branch (84:17): [True: 162k, False: 191k]
  ------------------
   85|   162k|                mvstack[n].weight += weight;
   86|   162k|                return;
   87|   162k|            }
   88|       |
   89|   227k|        if (last < 8) {
  ------------------
  |  Branch (89:13): [True: 227k, False: 358]
  ------------------
   90|   227k|            mvstack[last].mv = cand_mv;
   91|   227k|            mvstack[last].weight = weight;
   92|   227k|            *cnt = last + 1;
   93|   227k|        }
   94|   227k|    }
   95|  7.66M|}
refmvs.c:add_temporal_candidate:
  198|  1.27M|{
  199|  1.27M|    if (rb->mv.n == INVALID_MV) return;
  ------------------
  |  |   40|  1.27M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (199:9): [True: 768k, False: 507k]
  ------------------
  200|       |
  201|   507k|    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
  202|   507k|    fix_mv_precision(rf->frm_hdr, &mv);
  203|       |
  204|   507k|    const int last = *cnt;
  205|   507k|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (205:9): [True: 400k, False: 107k]
  ------------------
  206|   400k|        if (globalmv_ctx)
  ------------------
  |  Branch (206:13): [True: 78.3k, False: 322k]
  ------------------
  207|  78.3k|            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
  208|       |
  209|  1.11M|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (209:25): [True: 986k, False: 128k]
  ------------------
  210|   986k|            if (mvstack[n].mv.mv[0].n == mv.n) {
  ------------------
  |  Branch (210:17): [True: 272k, False: 713k]
  ------------------
  211|   272k|                mvstack[n].weight += 2;
  212|   272k|                return;
  213|   272k|            }
  214|   128k|        if (last < 8) {
  ------------------
  |  Branch (214:13): [True: 128k, False: 713]
  ------------------
  215|   128k|            mvstack[last].mv.mv[0] = mv;
  216|   128k|            mvstack[last].weight = 2;
  217|   128k|            *cnt = last + 1;
  218|   128k|        }
  219|   128k|    } else {
  220|   107k|        refmvs_mvpair mvp = { .mv = {
  221|   107k|            [0] = mv,
  222|   107k|            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
  223|   107k|        }};
  224|   107k|        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
  225|       |
  226|   186k|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (226:25): [True: 166k, False: 19.7k]
  ------------------
  227|   166k|            if (mvstack[n].mv.n == mvp.n) {
  ------------------
  |  Branch (227:17): [True: 87.2k, False: 79.0k]
  ------------------
  228|  87.2k|                mvstack[n].weight += 2;
  229|  87.2k|                return;
  230|  87.2k|            }
  231|  19.7k|        if (last < 8) {
  ------------------
  |  Branch (231:13): [True: 19.4k, False: 303]
  ------------------
  232|  19.4k|            mvstack[last].mv = mvp;
  233|  19.4k|            mvstack[last].weight = 2;
  234|  19.4k|            *cnt = last + 1;
  235|  19.4k|        }
  236|  19.7k|    }
  237|   507k|}
refmvs.c:mv_projection:
  175|   614k|static inline union mv mv_projection(const union mv mv, const int num, const int den) {
  176|   614k|    static const uint16_t div_mult[32] = {
  177|   614k|           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
  178|   614k|        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
  179|   614k|        1024,   963,  910,  862,  819,  780,  744,  712,
  180|   614k|         682,   655,  630,  606,  585,  564,  546,  528
  181|   614k|    };
  182|   614k|    assert(den > 0 && den < 32);
  ------------------
  |  Branch (182:5): [True: 614k, False: 0]
  |  Branch (182:5): [True: 614k, False: 0]
  ------------------
  183|   614k|    assert(num > -32 && num < 32);
  ------------------
  |  Branch (183:5): [True: 614k, False: 0]
  |  Branch (183:5): [True: 614k, False: 0]
  ------------------
  184|   614k|    const int frac = num * div_mult[den];
  185|   614k|    const int y = mv.y * frac, x = mv.x * frac;
  186|       |    // Round and clip according to AV1 spec section 7.9.3
  187|   614k|    return (union mv) { // 0x3fff == (1 << 14) - 1
  188|   614k|        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
  189|   614k|        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
  190|   614k|    };
  191|   614k|}
refmvs.c:add_compound_extended_candidate:
  245|   263k|{
  246|   263k|    refmvs_candidate *const diff = &same[2];
  247|   263k|    int *const diff_count = &same_count[2];
  248|       |
  249|   673k|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (249:21): [True: 513k, False: 160k]
  ------------------
  250|   513k|        const int cand_ref = cand_b->ref.ref[n];
  251|       |
  252|   513k|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (252:13): [True: 103k, False: 409k]
  ------------------
  253|       |
  254|   409k|        mv cand_mv = cand_b->mv.mv[n];
  255|   409k|        if (cand_ref == ref.ref[0]) {
  ------------------
  |  Branch (255:13): [True: 154k, False: 254k]
  ------------------
  256|   154k|            if (same_count[0] < 2)
  ------------------
  |  Branch (256:17): [True: 147k, False: 7.55k]
  ------------------
  257|   147k|                same[same_count[0]++].mv.mv[0] = cand_mv;
  258|   154k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (258:17): [True: 128k, False: 25.9k]
  ------------------
  259|   128k|                if (sign1 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (259:21): [True: 12.0k, False: 116k]
  ------------------
  260|  12.0k|                    cand_mv.y = -cand_mv.y;
  261|  12.0k|                    cand_mv.x = -cand_mv.x;
  262|  12.0k|                }
  263|   128k|                diff[diff_count[1]++].mv.mv[1] = cand_mv;
  264|   128k|            }
  265|   254k|        } else if (cand_ref == ref.ref[1]) {
  ------------------
  |  Branch (265:20): [True: 134k, False: 120k]
  ------------------
  266|   134k|            if (same_count[1] < 2)
  ------------------
  |  Branch (266:17): [True: 130k, False: 4.04k]
  ------------------
  267|   130k|                same[same_count[1]++].mv.mv[1] = cand_mv;
  268|   134k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (268:17): [True: 110k, False: 24.3k]
  ------------------
  269|   110k|                if (sign0 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (269:21): [True: 8.38k, False: 101k]
  ------------------
  270|  8.38k|                    cand_mv.y = -cand_mv.y;
  271|  8.38k|                    cand_mv.x = -cand_mv.x;
  272|  8.38k|                }
  273|   110k|                diff[diff_count[0]++].mv.mv[0] = cand_mv;
  274|   110k|            }
  275|   134k|        } else {
  276|   120k|            mv i_cand_mv = (union mv) {
  277|   120k|                .x = -cand_mv.x,
  278|   120k|                .y = -cand_mv.y
  279|   120k|            };
  280|       |
  281|   120k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (281:17): [True: 91.7k, False: 28.7k]
  ------------------
  282|  91.7k|                diff[diff_count[0]++].mv.mv[0] =
  283|  91.7k|                    sign0 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (283:21): [True: 10.2k, False: 81.4k]
  ------------------
  284|  81.4k|                    i_cand_mv : cand_mv;
  285|  91.7k|            }
  286|       |
  287|   120k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (287:17): [True: 83.1k, False: 37.3k]
  ------------------
  288|  83.1k|                diff[diff_count[1]++].mv.mv[1] =
  289|  83.1k|                    sign1 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (289:21): [True: 8.21k, False: 74.9k]
  ------------------
  290|  74.9k|                    i_cand_mv : cand_mv;
  291|  83.1k|            }
  292|   120k|        }
  293|   409k|    }
  294|   263k|}
refmvs.c:add_single_extended_candidate:
  299|   986k|{
  300|  1.95M|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (300:21): [True: 1.92M, False: 35.8k]
  ------------------
  301|  1.92M|        const int cand_ref = cand_b->ref.ref[n];
  302|       |
  303|  1.92M|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (303:13): [True: 950k, False: 970k]
  ------------------
  304|       |        // we need to continue even if cand_ref == ref.ref[0], since
  305|       |        // the candidate could have been added as a globalmv variant,
  306|       |        // which changes the value
  307|       |        // FIXME if scan_{row,col}() returned a mask for the nearest
  308|       |        // edge, we could skip the appropriate ones here
  309|       |
  310|   970k|        mv cand_mv = cand_b->mv.mv[n];
  311|   970k|        if (sign ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (311:13): [True: 12.2k, False: 957k]
  ------------------
  312|  12.2k|            cand_mv.y = -cand_mv.y;
  313|  12.2k|            cand_mv.x = -cand_mv.x;
  314|  12.2k|        }
  315|       |
  316|   970k|        int m;
  317|   970k|        const int last = *cnt;
  318|  1.07M|        for (m = 0; m < last; m++)
  ------------------
  |  Branch (318:21): [True: 939k, False: 135k]
  ------------------
  319|   939k|            if (cand_mv.n == mvstack[m].mv.mv[0].n)
  ------------------
  |  Branch (319:17): [True: 834k, False: 105k]
  ------------------
  320|   834k|                break;
  321|   970k|        if (m == last) {
  ------------------
  |  Branch (321:13): [True: 135k, False: 834k]
  ------------------
  322|   135k|            mvstack[m].mv.mv[0] = cand_mv;
  323|   135k|            mvstack[m].weight = 2; // "minimal"
  324|   135k|            *cnt = last + 1;
  325|   135k|        }
  326|   970k|    }
  327|   986k|}

decode.c:dav1d_refmvs_save_tmvs:
  145|  48.9k|{
  146|  48.9k|    const refmvs_frame *const rf = rt->rf;
  147|       |
  148|  48.9k|    assert(row_start8 >= 0);
  ------------------
  |  Branch (148:5): [True: 48.9k, False: 0]
  ------------------
  149|  48.9k|    assert((unsigned) (row_end8 - row_start8) <= 16U);
  ------------------
  |  Branch (149:5): [True: 48.9k, False: 0]
  ------------------
  150|  48.9k|    row_end8 = imin(row_end8, rf->ih8);
  151|  48.9k|    col_end8 = imin(col_end8, rf->iw8);
  152|       |
  153|  48.9k|    const ptrdiff_t stride = rf->rp_stride;
  154|  48.9k|    const uint8_t *const ref_sign = rf->mfmv_sign;
  155|  48.9k|    refmvs_temporal_block *rp = &rf->rp[row_start8 * stride];
  156|       |
  157|  48.9k|    dsp->save_tmvs(rp, stride, rt->r + 6, ref_sign,
  158|  48.9k|                   col_end8, row_end8, col_start8, row_start8);
  159|  48.9k|}

dav1d_init_last_nonzero_col_from_eob_tables:
  350|  2.36k|COLD void dav1d_init_last_nonzero_col_from_eob_tables(void) {
  351|       |    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  352|  2.36k|    pthread_once(&initted, init_internal);
  353|  2.36k|}
scan.c:init_internal:
  333|      1|static COLD void init_internal(void) {
  334|      1|    init_tbl(last_nonzero_col_from_eob_4x4,   scan_4x4,    4,  4);
  335|      1|    init_tbl(last_nonzero_col_from_eob_8x8,   scan_8x8,    8,  8);
  336|      1|    init_tbl(last_nonzero_col_from_eob_16x16, scan_16x16, 16, 16);
  337|      1|    init_tbl(last_nonzero_col_from_eob_32x32, scan_32x32, 32, 32);
  338|      1|    init_tbl(last_nonzero_col_from_eob_4x8,   scan_4x8,    4,  8);
  339|      1|    init_tbl(last_nonzero_col_from_eob_8x4,   scan_8x4,    8,  4);
  340|      1|    init_tbl(last_nonzero_col_from_eob_8x16,  scan_8x16,   8, 16);
  341|      1|    init_tbl(last_nonzero_col_from_eob_16x8,  scan_16x8,  16,  8);
  342|      1|    init_tbl(last_nonzero_col_from_eob_16x32, scan_16x32, 16, 32);
  343|      1|    init_tbl(last_nonzero_col_from_eob_32x16, scan_32x16, 32, 16);
  344|      1|    init_tbl(last_nonzero_col_from_eob_4x16,  scan_4x16,   4, 16);
  345|      1|    init_tbl(last_nonzero_col_from_eob_16x4,  scan_16x4,  16,  4);
  346|      1|    init_tbl(last_nonzero_col_from_eob_8x32,  scan_8x32,   8, 32);
  347|      1|    init_tbl(last_nonzero_col_from_eob_32x8,  scan_32x8,  32,  8);
  348|      1|}
scan.c:init_tbl:
  321|     14|{
  322|     14|    int max_col = 0;
  323|    218|    for (int y = 0, n = 0; y < h; y++) {
  ------------------
  |  Branch (323:28): [True: 204, False: 14]
  ------------------
  324|  3.54k|        for (int x = 0; x < w; x++, n++) {
  ------------------
  |  Branch (324:25): [True: 3.34k, False: 204]
  ------------------
  325|  3.34k|            const int rc = scan[n];
  326|  3.34k|            const int rcx = rc & (h - 1);
  327|  3.34k|            max_col = imax(max_col, rcx);
  328|  3.34k|            last_nonzero_col_from_eob[n] = max_col;
  329|  3.34k|        }
  330|    204|    }
  331|     14|}

dav1d_get_shear_params:
   80|  81.4k|int dav1d_get_shear_params(Dav1dWarpedMotionParams *const wm) {
   81|  81.4k|    const int32_t *const mat = wm->matrix;
   82|       |
   83|  81.4k|    if (mat[2] <= 0) return 1;
  ------------------
  |  Branch (83:9): [True: 0, False: 81.4k]
  ------------------
   84|       |
   85|  81.4k|    wm->u.p.alpha = iclip_wmp(mat[2] - 0x10000);
   86|  81.4k|    wm->u.p.beta = iclip_wmp(mat[3]);
   87|       |
   88|  81.4k|    int shift;
   89|  81.4k|    const int y = apply_sign(resolve_divisor_32(abs(mat[2]), &shift), mat[2]);
   90|  81.4k|    const int64_t v1 = ((int64_t) mat[4] * 0x10000) * y;
   91|  81.4k|    const int rnd = (1 << shift) >> 1;
   92|  81.4k|    wm->u.p.gamma = iclip_wmp(apply_sign64((int) ((llabs(v1) + rnd) >> shift), v1));
   93|  81.4k|    const int64_t v2 = ((int64_t) mat[3] * mat[4]) * y;
   94|  81.4k|    wm->u.p.delta = iclip_wmp(mat[5] -
   95|  81.4k|                          apply_sign64((int) ((llabs(v2) + rnd) >> shift), v2) -
   96|  81.4k|                          0x10000);
   97|       |
   98|  81.4k|    return (4 * abs(wm->u.p.alpha) + 7 * abs(wm->u.p.beta) >= 0x10000) ||
  ------------------
  |  Branch (98:12): [True: 3.29k, False: 78.1k]
  ------------------
   99|  78.1k|           (4 * abs(wm->u.p.gamma) + 4 * abs(wm->u.p.delta) >= 0x10000);
  ------------------
  |  Branch (99:12): [True: 633, False: 77.4k]
  ------------------
  100|  81.4k|}
dav1d_find_affine_int:
  153|  78.1k|{
  154|  78.1k|    int32_t *const mat = wm->matrix;
  155|  78.1k|    int a[2][2] = { { 0, 0 }, { 0, 0 } };
  156|  78.1k|    int bx[2] = { 0, 0 };
  157|  78.1k|    int by[2] = { 0, 0 };
  158|  78.1k|    const int rsuy = 2 * bh4 - 1;
  159|  78.1k|    const int rsux = 2 * bw4 - 1;
  160|  78.1k|    const int suy = rsuy * 8;
  161|  78.1k|    const int sux = rsux * 8;
  162|  78.1k|    const int duy = suy + mv.y;
  163|  78.1k|    const int dux = sux + mv.x;
  164|  78.1k|    const int isuy = by4 * 4 + rsuy;
  165|  78.1k|    const int isux = bx4 * 4 + rsux;
  166|       |
  167|   274k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (167:21): [True: 196k, False: 78.1k]
  ------------------
  168|   196k|        const int dx = pts[i][1][0] - dux;
  169|   196k|        const int dy = pts[i][1][1] - duy;
  170|   196k|        const int sx = pts[i][0][0] - sux;
  171|   196k|        const int sy = pts[i][0][1] - suy;
  172|   196k|        if (abs(sx - dx) < 256 && abs(sy - dy) < 256) {
  ------------------
  |  Branch (172:13): [True: 194k, False: 1.14k]
  |  Branch (172:35): [True: 193k, False: 1.15k]
  ------------------
  173|   193k|            a[0][0] += ((sx * sx) >> 2) + sx * 2 + 8;
  174|   193k|            a[0][1] += ((sx * sy) >> 2) + sx + sy + 4;
  175|   193k|            a[1][1] += ((sy * sy) >> 2) + sy * 2 + 8;
  176|   193k|            bx[0] += ((sx * dx) >> 2) + sx + dx + 8;
  177|   193k|            bx[1] += ((sy * dx) >> 2) + sy + dx + 4;
  178|   193k|            by[0] += ((sx * dy) >> 2) + sx + dy + 4;
  179|   193k|            by[1] += ((sy * dy) >> 2) + sy + dy + 8;
  180|   193k|        }
  181|   196k|    }
  182|       |
  183|       |    // compute determinant of a
  184|  78.1k|    const int64_t det = (int64_t) a[0][0] * a[1][1] - (int64_t) a[0][1] * a[0][1];
  185|  78.1k|    if (det == 0) return 1;
  ------------------
  |  Branch (185:9): [True: 2.30k, False: 75.8k]
  ------------------
  186|  75.8k|    int shift, idet = apply_sign64(resolve_divisor_64(llabs(det), &shift), det);
  187|  75.8k|    shift -= 16;
  188|  75.8k|    if (shift < 0) {
  ------------------
  |  Branch (188:9): [True: 0, False: 75.8k]
  ------------------
  189|      0|        idet <<= -shift;
  190|      0|        shift = 0;
  191|      0|    }
  192|       |
  193|       |    // solve the least-squares
  194|  75.8k|    mat[2] = get_mult_shift_diag((int64_t) a[1][1] * bx[0] -
  195|  75.8k|                                 (int64_t) a[0][1] * bx[1], idet, shift);
  196|  75.8k|    mat[3] = get_mult_shift_ndiag((int64_t) a[0][0] * bx[1] -
  197|  75.8k|                                  (int64_t) a[0][1] * bx[0], idet, shift);
  198|  75.8k|    mat[4] = get_mult_shift_ndiag((int64_t) a[1][1] * by[0] -
  199|  75.8k|                                  (int64_t) a[0][1] * by[1], idet, shift);
  200|  75.8k|    mat[5] = get_mult_shift_diag((int64_t) a[0][0] * by[1] -
  201|  75.8k|                                 (int64_t) a[0][1] * by[0], idet, shift);
  202|       |
  203|  75.8k|    mat[0] = iclip(mv.x * 0x2000 - (isux * (mat[2] - 0x10000) + isuy * mat[3]),
  204|  75.8k|                   -0x800000, 0x7fffff);
  205|  75.8k|    mat[1] = iclip(mv.y * 0x2000 - (isux * mat[4] + isuy * (mat[5] - 0x10000)),
  206|  75.8k|                   -0x800000, 0x7fffff);
  207|       |
  208|  75.8k|    return 0;
  209|  78.1k|}
warpmv.c:iclip_wmp:
   63|   325k|static inline int iclip_wmp(const int v) {
   64|   325k|    const int cv = iclip(v, INT16_MIN, INT16_MAX);
   65|       |
   66|   325k|    return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
   67|   325k|}
warpmv.c:resolve_divisor_32:
   69|  81.4k|static inline int resolve_divisor_32(const unsigned d, int *const shift) {
   70|  81.4k|    *shift = ulog2(d);
   71|  81.4k|    const int e = d - (1 << *shift);
   72|  81.4k|    const int f = *shift > 8 ? (e + (1 << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (72:19): [True: 81.4k, False: 0]
  ------------------
   73|  81.4k|                               e << (8 - *shift);
   74|  81.4k|    assert(f <= 256);
  ------------------
  |  Branch (74:5): [True: 81.4k, False: 0]
  ------------------
   75|  81.4k|    *shift += 14;
   76|       |    // Use f as lookup into the precomputed table of multipliers
   77|  81.4k|    return div_lut[f];
   78|  81.4k|}
warpmv.c:resolve_divisor_64:
  102|  75.8k|static int resolve_divisor_64(const uint64_t d, int *const shift) {
  103|  75.8k|    *shift = u64log2(d);
  104|  75.8k|    const int64_t e = d - (1LL << *shift);
  105|  75.8k|    const int64_t f = *shift > 8 ? (e + (1LL << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (105:23): [True: 75.8k, False: 0]
  ------------------
  106|  75.8k|                                   e << (8 - *shift);
  107|  75.8k|    assert(f <= 256);
  ------------------
  |  Branch (107:5): [True: 75.8k, False: 0]
  ------------------
  108|  75.8k|    *shift += 14;
  109|       |    // Use f as lookup into the precomputed table of multipliers
  110|  75.8k|    return div_lut[f];
  111|  75.8k|}
warpmv.c:get_mult_shift_diag:
  125|   151k|{
  126|   151k|    const int64_t v1 = px * idet;
  127|   151k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  128|   151k|                                        ((1LL << shift) >> 1)) >> shift),
  129|   151k|                                v1);
  130|   151k|    return iclip(v2, 0xe001, 0x11fff);
  131|   151k|}
warpmv.c:get_mult_shift_ndiag:
  115|   151k|{
  116|   151k|    const int64_t v1 = px * idet;
  117|   151k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  118|   151k|                                        ((1LL << shift) >> 1)) >> shift),
  119|   151k|                                v1);
  120|   151k|    return iclip(v2, -0x1fff, 0x1fff);
  121|   151k|}

dav1d_init_ii_wedge_masks:
  207|      1|COLD void dav1d_init_ii_wedge_masks(void) {
  208|       |    // This function is guaranteed to be called only once
  209|       |
  210|      1|    enum WedgeMasterLineType {
  211|      1|        WEDGE_MASTER_LINE_ODD,
  212|      1|        WEDGE_MASTER_LINE_EVEN,
  213|      1|        WEDGE_MASTER_LINE_VERT,
  214|      1|        N_WEDGE_MASTER_LINES,
  215|      1|    };
  216|      1|    static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
  217|      1|        [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
  218|      1|        [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
  219|      1|        [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
  220|      1|    };
  221|      1|    uint8_t master[6][64 * 64];
  222|       |
  223|       |    // create master templates
  224|     65|    for (int y = 0, off = 0; y < 64; y++, off += 64)
  ------------------
  |  Branch (224:30): [True: 64, False: 1]
  ------------------
  225|     64|        insert_border(&master[WEDGE_VERTICAL][off],
  226|     64|                      wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
  227|     33|    for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
  ------------------
  |  Branch (227:40): [True: 32, False: 1]
  ------------------
  228|     32|    {
  229|     32|        insert_border(&master[WEDGE_OBLIQUE63][off],
  230|     32|                      wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
  231|     32|        insert_border(&master[WEDGE_OBLIQUE63][off + 64],
  232|     32|                      wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
  233|     32|    }
  234|       |
  235|      1|    transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
  236|      1|    transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
  237|      1|    hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
  238|      1|    hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
  239|       |
  240|      1|#define fill(w, h, sz_422, sz_420, hvsw, signs) \
  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  242|      1|                master, wedge_codebook_16_##hvsw, \
  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  246|       |
  247|      1|    fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  248|      1|    fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  249|      1|    fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  250|      1|    fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  251|      1|    fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  252|      1|    fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  253|      1|    fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  254|      1|    fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  255|      1|    fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  256|      1|#undef fill
  257|       |
  258|      1|    memset(dav1d_masks.ii_dc, 32, 32 * 32);
  259|      4|    for (int c = 0; c < 3; c++) {
  ------------------
  |  Branch (259:21): [True: 3, False: 1]
  ------------------
  260|      3|        dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
  261|      3|        dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
  262|      3|        dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
  263|      3|        dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
  264|      3|        dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
  265|      3|        dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
  266|      3|        dav1d_masks.offsets[c][BS_8x8  -BS_32x32].ii[II_DC_PRED] =
  267|      3|            MASK_OFFSET(dav1d_masks.ii_dc);
  ------------------
  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  268|      3|    }
  269|       |
  270|      1|#define BUILD_NONDC_II_MASKS(w, h, step) \
  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  272|       |
  273|      1|#define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
  274|      1|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  275|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  276|      1|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  277|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  278|      1|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  279|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  280|       |
  281|      1|    BUILD_NONDC_II_MASKS(32, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  282|      1|    BUILD_NONDC_II_MASKS(16, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  283|      1|    BUILD_NONDC_II_MASKS(16, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  284|      1|    BUILD_NONDC_II_MASKS( 8, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  285|      1|    BUILD_NONDC_II_MASKS( 8, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  286|      1|    BUILD_NONDC_II_MASKS( 8,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  287|      1|    BUILD_NONDC_II_MASKS( 4, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  288|      1|    BUILD_NONDC_II_MASKS( 4,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  289|      1|    BUILD_NONDC_II_MASKS( 4,  4, 8);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  290|      4|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (290:21): [True: 3, False: 1]
  ------------------
  291|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  292|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  293|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32,  8, 32,  8, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  294|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16,  8, 16,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  295|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x8,  16, 16,  8,  8,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  296|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x16,   8, 16,  4, 16,  4,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  297|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x8,    8,  8,  4,  8,  4,  4);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  298|      3|    }
  299|      1|}
wedge.c:insert_border:
   90|    128|{
   91|    128|    if (ctr > 4) memset(dst, 0, ctr - 4);
  ------------------
  |  Branch (91:9): [True: 128, False: 0]
  ------------------
   92|    128|    memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
   93|    128|    if (ctr < 64 - 4)
  ------------------
  |  Branch (93:9): [True: 128, False: 0]
  ------------------
   94|    128|        memset(dst + ctr + 4, 64, 64 - 4 - ctr);
   95|    128|}
wedge.c:transpose:
   97|      2|static void transpose(uint8_t *const dst, const uint8_t *const src) {
   98|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (98:32): [True: 128, False: 2]
  ------------------
   99|  8.32k|        for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
  ------------------
  |  Branch (99:36): [True: 8.19k, False: 128]
  ------------------
  100|  8.19k|            dst[x_off + y] = src[y_off + x];
  101|      2|}
wedge.c:hflip:
  103|      2|static void hflip(uint8_t *const dst, const uint8_t *const src) {
  104|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (104:32): [True: 128, False: 2]
  ------------------
  105|  8.32k|        for (int x = 0; x < 64; x++)
  ------------------
  |  Branch (105:25): [True: 8.19k, False: 128]
  ------------------
  106|  8.19k|            dst[y_off + 64 - 1 - x] = src[y_off + x];
  107|      2|}
wedge.c:fill2d_16x2:
  153|      9|{
  154|      9|    const int n_stride_444 = (w * h);
  155|      9|    const int n_stride_422 = n_stride_444 >> 1;
  156|      9|    const int n_stride_420 = n_stride_444 >> 2;
  157|      9|    const int sign_stride_422 = 16 * n_stride_422;
  158|      9|    const int sign_stride_420 = 16 * n_stride_420;
  159|       |
  160|       |    // assign pointer offsets in lookup table
  161|    153|    for (int n = 0; n < 16; n++) {
  ------------------
  |  Branch (161:21): [True: 144, False: 9]
  ------------------
  162|    144|        const int sign = signs & 1;
  163|       |
  164|    144|        copy2d(masks_444, master[cb[n].direction], sign, w, h,
  165|    144|               32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
  166|       |
  167|       |        // not using !sign is intentional here, since 444 does not require
  168|       |        // any rounding since no chroma subsampling is applied.
  169|    144|        dav1d_masks.offsets[0][bs].wedge[0][n] =
  170|    144|        dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
  ------------------
  |  |  129|    144|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  171|       |
  172|    144|        dav1d_masks.offsets[1][bs].wedge[0][n] =
  173|    144|            init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
  174|    144|        dav1d_masks.offsets[1][bs].wedge[1][n] =
  175|    144|            init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
  176|    144|        dav1d_masks.offsets[2][bs].wedge[0][n] =
  177|    144|            init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
  178|    144|        dav1d_masks.offsets[2][bs].wedge[1][n] =
  179|    144|            init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
  180|       |
  181|    144|        signs >>= 1;
  182|    144|        masks_444 += n_stride_444;
  183|    144|        masks_422 += n_stride_422;
  184|    144|        masks_420 += n_stride_420;
  185|    144|    }
  186|      9|}
wedge.c:copy2d:
  111|    144|{
  112|    144|    src += y_off * 64 + x_off;
  113|    144|    if (sign) {
  ------------------
  |  Branch (113:9): [True: 109, False: 35]
  ------------------
  114|  2.14k|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (114:25): [True: 2.03k, False: 109]
  ------------------
  115|  40.4k|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (115:29): [True: 38.4k, False: 2.03k]
  ------------------
  116|  38.4k|                dst[x] = 64 - src[x];
  117|  2.03k|            src += 64;
  118|  2.03k|            dst += w;
  119|  2.03k|        }
  120|    109|    } else {
  121|    691|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (121:25): [True: 656, False: 35]
  ------------------
  122|    656|            memcpy(dst, src, w);
  123|    656|            src += 64;
  124|    656|            dst += w;
  125|    656|        }
  126|     35|    }
  127|    144|}
wedge.c:init_chroma:
  134|    576|{
  135|    576|    const uint16_t offset = MASK_OFFSET(chroma);
  ------------------
  |  |  129|    576|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  136|  8.64k|    for (int y = 0; y < h; y += 1 + ss_ver) {
  ------------------
  |  Branch (136:21): [True: 8.06k, False: 576]
  ------------------
  137|  83.3k|        for (int x = 0; x < w; x += 2) {
  ------------------
  |  Branch (137:25): [True: 75.2k, False: 8.06k]
  ------------------
  138|  75.2k|            int sum = luma[x] + luma[x + 1] + 1;
  139|  75.2k|            if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
  ------------------
  |  Branch (139:17): [True: 25.0k, False: 50.1k]
  ------------------
  140|  75.2k|            chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
  141|  75.2k|        }
  142|  8.06k|        luma += w << ss_ver;
  143|  8.06k|        chroma += w >> 1;
  144|  8.06k|    }
  145|    576|    return offset;
  146|    576|}
wedge.c:build_nondc_ii_masks:
  190|      9|{
  191|      9|    static const uint8_t ii_weights_1d[32] = {
  192|      9|        60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
  193|      9|         6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
  194|      9|    };
  195|       |
  196|      9|    uint8_t *const mask_h  = &mask_v[w * h];
  197|      9|    uint8_t *const mask_sm = &mask_h[w * h];
  198|    173|    for (int y = 0, off = 0; y < h; y++, off += w) {
  ------------------
  |  Branch (198:30): [True: 164, False: 9]
  ------------------
  199|    164|        memset(&mask_v[off], ii_weights_1d[y * step], w);
  200|  2.51k|        for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (200:25): [True: 2.35k, False: 164]
  ------------------
  201|  2.35k|            mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
  202|  2.35k|            mask_h[off + x] = ii_weights_1d[x * step];
  203|  2.35k|        }
  204|    164|    }
  205|      9|}

cdef_tmpl.c:cdef_dsp_init_x86:
   46|  3.47k|static ALWAYS_INLINE void cdef_dsp_init_x86(Dav1dCdefDSPContext *const c) {
   47|  3.47k|    const unsigned flags = dav1d_get_cpu_flags();
   48|       |
   49|  3.47k|#if BITDEPTH == 8
   50|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (50:9): [True: 0, False: 3.47k]
  ------------------
   51|       |
   52|  3.47k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  3.47k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  3.47k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  3.47k|#endif
   56|       |
   57|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (57:9): [True: 0, False: 3.47k]
  ------------------
   58|       |
   59|  3.47k|    c->dir = BF(dav1d_cdef_dir, ssse3);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   60|  3.47k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, ssse3);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  3.47k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, ssse3);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  3.47k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, ssse3);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (64:9): [True: 0, False: 3.47k]
  ------------------
   65|       |
   66|  3.47k|    c->dir = BF(dav1d_cdef_dir, sse4);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|  3.47k|#if BITDEPTH == 8
   68|  3.47k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse4);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|  3.47k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse4);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|  3.47k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse4);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|  3.47k|#endif
   72|       |
   73|  3.47k|#if ARCH_X86_64
   74|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 3.47k]
  ------------------
   75|       |
   76|  3.47k|    c->dir = BF(dav1d_cdef_dir, avx2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  3.47k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|  3.47k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|  3.47k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx2);
  ------------------
  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|       |
   81|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (81:9): [True: 3.47k, False: 0]
  ------------------
   82|       |
   83|      0|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   84|      0|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   85|      0|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   86|      0|#endif
   87|      0|}

dav1d_get_cpu_flags_x86:
   47|      1|COLD unsigned dav1d_get_cpu_flags_x86(void) {
   48|      1|    union {
   49|      1|        CpuidRegisters r;
   50|      1|        struct {
   51|      1|            uint32_t max_leaf;
   52|      1|            char vendor[12];
   53|      1|        };
   54|      1|    } cpu;
   55|      1|    dav1d_cpu_cpuid(&cpu.r, 0, 0);
   56|      1|    unsigned flags = dav1d_get_default_cpu_flags();
   57|       |
   58|      1|    if (cpu.max_leaf >= 1) {
  ------------------
  |  Branch (58:9): [True: 1, False: 0]
  ------------------
   59|      1|        CpuidRegisters r;
   60|      1|        dav1d_cpu_cpuid(&r, 1, 0);
   61|      1|        const unsigned family = ((r.eax >> 8) & 0x0f) + ((r.eax >> 20) & 0xff);
   62|       |
   63|      1|        if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   64|      1|            flags |= DAV1D_X86_CPU_FLAG_SSE2;
   65|      1|            if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   66|      1|                flags |= DAV1D_X86_CPU_FLAG_SSSE3;
   67|      1|                if (X(r.ecx, 0x00080000)) /* SSE4.1 */
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   68|      1|                    flags |= DAV1D_X86_CPU_FLAG_SSE41;
   69|      1|            }
   70|      1|        }
   71|      1|#if ARCH_X86_64
   72|       |        /* We only support >128-bit SIMD on x86-64. */
   73|      1|        if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   74|      1|            const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
   75|      1|            if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   76|      1|                if (cpu.max_leaf >= 7) {
  ------------------
  |  Branch (76:21): [True: 1, False: 0]
  ------------------
   77|      1|                    dav1d_cpu_cpuid(&r, 7, 0);
   78|      1|                    if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   79|      1|                        flags |= DAV1D_X86_CPU_FLAG_AVX2;
   80|      1|                        if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 1]
  |  |  ------------------
  ------------------
   81|      0|                            if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
   82|      0|                                flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
   83|      0|                        }
   84|      1|                    }
   85|      1|                }
   86|      1|            }
   87|      1|        }
   88|      1|#endif
   89|      1|        if (!memcmp(cpu.vendor, "AuthenticAMD", sizeof(cpu.vendor))) {
  ------------------
  |  Branch (89:13): [True: 1, False: 0]
  ------------------
   90|      1|            if ((flags & DAV1D_X86_CPU_FLAG_AVX2) && family <= 0x19) {
  ------------------
  |  Branch (90:17): [True: 1, False: 0]
  |  Branch (90:54): [True: 1, False: 0]
  ------------------
   91|       |                /* Excavator, Zen, Zen+, Zen 2, Zen 3, Zen 3+, Zen 4 */
   92|      1|                flags |= DAV1D_X86_CPU_FLAG_SLOW_GATHER;
   93|      1|            }
   94|      1|        }
   95|      1|    }
   96|       |
   97|      1|    return flags;
   98|      1|}

filmgrain_tmpl.c:film_grain_dsp_init_x86:
   45|  8.15k|static ALWAYS_INLINE void film_grain_dsp_init_x86(Dav1dFilmGrainDSPContext *const c) {
   46|  8.15k|    const unsigned flags = dav1d_get_cpu_flags();
   47|       |
   48|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 8.15k]
  ------------------
   49|       |
   50|  8.15k|    c->generate_grain_y = BF(dav1d_generate_grain_y, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   51|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   52|  8.15k|    c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  8.15k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.15k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.15k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  8.15k|#if ARCH_X86_64
   60|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (60:9): [True: 0, False: 8.15k]
  ------------------
   61|       |
   62|  8.15k|    c->generate_grain_y = BF(dav1d_generate_grain_y, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  8.15k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|       |
   67|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (67:9): [True: 0, False: 8.15k]
  ------------------
   68|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|      0|    }
   73|       |
   74|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (74:9): [True: 8.15k, False: 0]
  ------------------
   75|       |
   76|      0|    if (BITDEPTH == 8 || !(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (76:9): [True: 0, Folded]
  |  Branch (76:26): [True: 0, False: 0]
  ------------------
   77|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   81|      0|    }
   82|      0|#endif
   83|      0|}

ipred_tmpl.c:intra_pred_dsp_init_x86:
   71|  8.15k|static ALWAYS_INLINE void intra_pred_dsp_init_x86(Dav1dIntraPredDSPContext *const c) {
   72|  8.15k|    const unsigned flags = dav1d_get_cpu_flags();
   73|       |
   74|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 8.15k]
  ------------------
   75|       |
   76|  8.15k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   77|  8.15k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   78|  8.15k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   79|  8.15k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   80|  8.15k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   81|  8.15k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   82|  8.15k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   83|  8.15k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   84|  8.15k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   85|  8.15k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   86|  8.15k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   87|  8.15k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   88|  8.15k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   89|  8.15k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   ssse3);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   90|       |
   91|  8.15k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      ssse3);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   92|  8.15k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  ssse3);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   93|  8.15k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  ssse3);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   94|  8.15k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, ssse3);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   95|       |
   96|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, ssse3);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   97|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, ssse3);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   98|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, ssse3);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  8.15k|    c->pal_pred = BF(dav1d_pal_pred, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  101|       |
  102|  8.15k|#if ARCH_X86_64
  103|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (103:9): [True: 0, False: 8.15k]
  ------------------
  104|       |
  105|  8.15k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  106|  8.15k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  107|  8.15k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  108|  8.15k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  109|  8.15k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  110|  8.15k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  111|  8.15k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  112|  8.15k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  113|  8.15k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  114|  8.15k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|  8.15k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  116|  8.15k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  117|  8.15k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  118|  8.15k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx2);
  ------------------
  |  |   39|  8.15k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  119|       |
  120|  8.15k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      avx2);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  121|  8.15k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  avx2);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  122|  8.15k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  avx2);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  123|  8.15k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, avx2);
  ------------------
  |  |   41|  8.15k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  124|       |
  125|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, avx2);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  126|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, avx2);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  127|  8.15k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, avx2);
  ------------------
  |  |   43|  8.15k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  128|       |
  129|  8.15k|    c->pal_pred = BF(dav1d_pal_pred, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|       |
  131|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (131:9): [True: 8.15k, False: 0]
  ------------------
  132|       |
  133|      0|#if BITDEPTH == 8
  134|      0|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  8.15k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  135|      0|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  136|      0|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  137|      0|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  138|      0|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  139|      0|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  140|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  141|      0|#endif
  142|      0|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  143|      0|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  144|      0|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  145|      0|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  146|      0|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  147|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  148|      0|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  149|      0|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  150|       |
  151|      0|    c->pal_pred = BF(dav1d_pal_pred, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  152|      0|#endif
  153|      0|}

itx_tmpl.c:itx_dsp_init_x86:
  112|  3.47k|{
  113|  3.47k|#define assign_itx_bpc_fn(pfx, w, h, type, type_enum, bpc, ext) \
  114|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  115|  3.47k|        BF_BPC(dav1d_inv_txfm_add_##type##_##w##x##h, bpc, ext)
  116|       |
  117|  3.47k|#define assign_itx1_bpc_fn(pfx, w, h, bpc, ext) \
  118|  3.47k|    assign_itx_bpc_fn(pfx, w, h, dct_dct,           DCT_DCT,           bpc, ext)
  119|       |
  120|  3.47k|#define assign_itx2_bpc_fn(pfx, w, h, bpc, ext) \
  121|  3.47k|    assign_itx1_bpc_fn(pfx, w, h, bpc, ext); \
  122|  3.47k|    assign_itx_bpc_fn(pfx, w, h, identity_identity, IDTX,              bpc, ext)
  123|       |
  124|  3.47k|#define assign_itx12_bpc_fn(pfx, w, h, bpc, ext) \
  125|  3.47k|    assign_itx2_bpc_fn(pfx, w, h, bpc, ext); \
  126|  3.47k|    assign_itx_bpc_fn(pfx, w, h, dct_adst,          ADST_DCT,          bpc, ext); \
  127|  3.47k|    assign_itx_bpc_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      bpc, ext); \
  128|  3.47k|    assign_itx_bpc_fn(pfx, w, h, dct_identity,      H_DCT,             bpc, ext); \
  129|  3.47k|    assign_itx_bpc_fn(pfx, w, h, adst_dct,          DCT_ADST,          bpc, ext); \
  130|  3.47k|    assign_itx_bpc_fn(pfx, w, h, adst_adst,         ADST_ADST,         bpc, ext); \
  131|  3.47k|    assign_itx_bpc_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     bpc, ext); \
  132|  3.47k|    assign_itx_bpc_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      bpc, ext); \
  133|  3.47k|    assign_itx_bpc_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     bpc, ext); \
  134|  3.47k|    assign_itx_bpc_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, bpc, ext); \
  135|  3.47k|    assign_itx_bpc_fn(pfx, w, h, identity_dct,      V_DCT,             bpc, ext)
  136|       |
  137|  3.47k|#define assign_itx16_bpc_fn(pfx, w, h, bpc, ext) \
  138|  3.47k|    assign_itx12_bpc_fn(pfx, w, h, bpc, ext); \
  139|  3.47k|    assign_itx_bpc_fn(pfx, w, h, adst_identity,     H_ADST,            bpc, ext); \
  140|  3.47k|    assign_itx_bpc_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        bpc, ext); \
  141|  3.47k|    assign_itx_bpc_fn(pfx, w, h, identity_adst,     V_ADST,            bpc, ext); \
  142|  3.47k|    assign_itx_bpc_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        bpc, ext)
  143|       |
  144|  3.47k|    const unsigned flags = dav1d_get_cpu_flags();
  145|       |
  146|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (146:9): [True: 0, False: 3.47k]
  ------------------
  147|       |
  148|  3.47k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, sse2);
  ------------------
  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  149|       |
  150|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (150:9): [True: 0, False: 3.47k]
  ------------------
  151|       |
  152|  3.47k|#if BITDEPTH == 8
  153|  3.47k|    assign_itx16_fn(,   4,  4, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  154|  3.47k|    assign_itx16_fn(R,  4,  8, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  155|  3.47k|    assign_itx16_fn(R,  8,  4, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  156|  3.47k|    assign_itx16_fn(,   8,  8, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  157|  3.47k|    assign_itx16_fn(R,  4, 16, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  158|  3.47k|    assign_itx16_fn(R, 16,  4, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  159|  3.47k|    assign_itx16_fn(R,  8, 16, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  160|  3.47k|    assign_itx16_fn(R, 16,  8, ssse3);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  161|  3.47k|    assign_itx12_fn(,  16, 16, ssse3);
  ------------------
  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  162|  3.47k|    assign_itx2_fn (R,  8, 32, ssse3);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  163|  3.47k|    assign_itx2_fn (R, 32,  8, ssse3);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  164|  3.47k|    assign_itx2_fn (R, 16, 32, ssse3);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  165|  3.47k|    assign_itx2_fn (R, 32, 16, ssse3);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  166|  3.47k|    assign_itx2_fn (,  32, 32, ssse3);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  167|  3.47k|    assign_itx1_fn (R, 16, 64, ssse3);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  168|  3.47k|    assign_itx1_fn (R, 32, 64, ssse3);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  169|  3.47k|    assign_itx1_fn (R, 64, 16, ssse3);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  170|  3.47k|    assign_itx1_fn (R, 64, 32, ssse3);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  171|  3.47k|    assign_itx1_fn ( , 64, 64, ssse3);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  172|  3.47k|    *all_simd = 1;
  173|  3.47k|#endif
  174|       |
  175|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (175:9): [True: 0, False: 3.47k]
  ------------------
  176|       |
  177|       |#if BITDEPTH == 16
  178|       |    if (bpc == 10) {
  179|       |        assign_itx16_fn(,   4,  4, sse4);
  180|       |        assign_itx16_fn(R,  4,  8, sse4);
  181|       |        assign_itx16_fn(R,  4, 16, sse4);
  182|       |        assign_itx16_fn(R,  8,  4, sse4);
  183|       |        assign_itx16_fn(,   8,  8, sse4);
  184|       |        assign_itx16_fn(R,  8, 16, sse4);
  185|       |        assign_itx16_fn(R, 16,  4, sse4);
  186|       |        assign_itx16_fn(R, 16,  8, sse4);
  187|       |        assign_itx12_fn(,  16, 16, sse4);
  188|       |        assign_itx2_fn (R,  8, 32, sse4);
  189|       |        assign_itx2_fn (R, 32,  8, sse4);
  190|       |        assign_itx2_fn (R, 16, 32, sse4);
  191|       |        assign_itx2_fn (R, 32, 16, sse4);
  192|       |        assign_itx2_fn (,  32, 32, sse4);
  193|       |        assign_itx1_fn (R, 16, 64, sse4);
  194|       |        assign_itx1_fn (R, 32, 64, sse4);
  195|       |        assign_itx1_fn (R, 64, 16, sse4);
  196|       |        assign_itx1_fn (R, 64, 32, sse4);
  197|       |        assign_itx1_fn (,  64, 64, sse4);
  198|       |        *all_simd = 1;
  199|       |    }
  200|       |#endif
  201|       |
  202|  3.47k|#if ARCH_X86_64
  203|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (203:9): [True: 0, False: 3.47k]
  ------------------
  204|       |
  205|  3.47k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, avx2);
  ------------------
  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  206|       |
  207|  3.47k|#if BITDEPTH == 8
  208|  3.47k|    assign_itx16_fn( ,  4,  4, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  209|  3.47k|    assign_itx16_fn(R,  4,  8, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  210|  3.47k|    assign_itx16_fn(R,  4, 16, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  211|  3.47k|    assign_itx16_fn(R,  8,  4, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  212|  3.47k|    assign_itx16_fn( ,  8,  8, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  213|  3.47k|    assign_itx16_fn(R,  8, 16, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  214|  3.47k|    assign_itx2_fn (R,  8, 32, avx2);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  215|  3.47k|    assign_itx16_fn(R, 16,  4, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  216|  3.47k|    assign_itx16_fn(R, 16,  8, avx2);
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.47k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.47k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  217|  3.47k|    assign_itx12_fn( , 16, 16, avx2);
  ------------------
  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.47k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.47k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.47k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.47k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.47k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.47k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.47k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  218|  3.47k|    assign_itx2_fn (R, 16, 32, avx2);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  219|  3.47k|    assign_itx1_fn (R, 16, 64, avx2);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  220|  3.47k|    assign_itx2_fn (R, 32,  8, avx2);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  221|  3.47k|    assign_itx2_fn (R, 32, 16, avx2);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  222|  3.47k|    assign_itx2_fn ( , 32, 32, avx2);
  ------------------
  |  |   84|  3.47k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  223|  3.47k|    assign_itx1_fn (R, 32, 64, avx2);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  224|  3.47k|    assign_itx1_fn (R, 64, 16, avx2);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  225|  3.47k|    assign_itx1_fn (R, 64, 32, avx2);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  226|  3.47k|    assign_itx1_fn ( , 64, 64, avx2);
  ------------------
  |  |   81|  3.47k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  227|       |#else
  228|       |    if (bpc == 10) {
  229|       |        assign_itx16_bpc_fn( ,  4,  4, 10, avx2);
  230|       |        assign_itx16_bpc_fn(R,  4,  8, 10, avx2);
  231|       |        assign_itx16_bpc_fn(R,  4, 16, 10, avx2);
  232|       |        assign_itx16_bpc_fn(R,  8,  4, 10, avx2);
  233|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx2);
  234|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx2);
  235|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx2);
  236|       |        assign_itx16_bpc_fn(R, 16,  4, 10, avx2);
  237|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx2);
  238|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx2);
  239|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx2);
  240|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx2);
  241|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx2);
  242|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx2);
  243|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx2);
  244|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx2);
  245|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx2);
  246|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx2);
  247|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx2);
  248|       |    } else {
  249|       |        assign_itx16_bpc_fn( ,  4,  4, 12, avx2);
  250|       |        assign_itx16_bpc_fn(R,  4,  8, 12, avx2);
  251|       |        assign_itx16_bpc_fn(R,  4, 16, 12, avx2);
  252|       |        assign_itx16_bpc_fn(R,  8,  4, 12, avx2);
  253|       |        assign_itx16_bpc_fn( ,  8,  8, 12, avx2);
  254|       |        assign_itx16_bpc_fn(R,  8, 16, 12, avx2);
  255|       |        assign_itx2_bpc_fn (R,  8, 32, 12, avx2);
  256|       |        assign_itx16_bpc_fn(R, 16,  4, 12, avx2);
  257|       |        assign_itx16_bpc_fn(R, 16,  8, 12, avx2);
  258|       |        assign_itx12_bpc_fn( , 16, 16, 12, avx2);
  259|       |        assign_itx2_bpc_fn (R, 32,  8, 12, avx2);
  260|       |        assign_itx_bpc_fn(R, 16, 32, identity_identity, IDTX, 12, avx2);
  261|       |        assign_itx_bpc_fn(R, 32, 16, identity_identity, IDTX, 12, avx2);
  262|       |        assign_itx_bpc_fn( , 32, 32, identity_identity, IDTX, 12, avx2);
  263|       |    }
  264|       |#endif
  265|       |
  266|  3.47k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (266:9): [True: 3.47k, False: 0]
  ------------------
  267|       |
  268|      0|#if BITDEPTH == 8
  269|  3.47k|    assign_itx16_fn( ,  4,  4, avx512icl); // no wht
  ------------------
  |  |  101|  3.47k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.47k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.47k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.47k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.47k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.47k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.47k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.47k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  270|      0|    assign_itx16_fn(R,  4,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  271|      0|    assign_itx16_fn(R,  4, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  272|      0|    assign_itx16_fn(R,  8,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  273|      0|    assign_itx16_fn( ,  8,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  274|      0|    assign_itx16_fn(R,  8, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  275|      0|    assign_itx2_fn (R,  8, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  276|      0|    assign_itx16_fn(R, 16,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  277|      0|    assign_itx16_fn(R, 16,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  278|      0|    assign_itx12_fn( , 16, 16, avx512icl);
  ------------------
  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  279|      0|    assign_itx2_fn (R, 16, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  280|      0|    assign_itx1_fn (R, 16, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  281|      0|    assign_itx2_fn (R, 32,  8, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  282|      0|    assign_itx2_fn (R, 32, 16, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  283|      0|    assign_itx2_fn ( , 32, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  284|      0|    assign_itx1_fn (R, 32, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  285|      0|    assign_itx1_fn (R, 64, 16, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  286|      0|    assign_itx1_fn (R, 64, 32, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  287|      0|    assign_itx1_fn ( , 64, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  288|       |#else
  289|       |    if (bpc == 10) {
  290|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx512icl);
  291|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx512icl);
  292|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx512icl);
  293|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx512icl);
  294|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx512icl);
  295|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx512icl);
  296|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx512icl);
  297|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx512icl);
  298|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx512icl);
  299|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx512icl);
  300|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx512icl);
  301|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx512icl);
  302|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx512icl);
  303|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx512icl);
  304|       |    }
  305|       |#endif
  306|      0|#endif
  307|      0|}

loopfilter_tmpl.c:loop_filter_dsp_init_x86:
   41|  8.15k|static ALWAYS_INLINE void loop_filter_dsp_init_x86(Dav1dLoopFilterDSPContext *const c) {
   42|  8.15k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 8.15k]
  ------------------
   45|       |
   46|  8.15k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   47|  8.15k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   48|  8.15k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   49|  8.15k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   50|       |
   51|  8.15k|#if ARCH_X86_64
   52|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 8.15k]
  ------------------
   53|       |
   54|  8.15k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  8.15k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.15k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.15k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (59:9): [True: 8.15k, False: 0]
  ------------------
   60|       |
   61|      0|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|      0|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (64:9): [True: 0, False: 0]
  ------------------
   65|      0|        c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|      0|        c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|      0|    }
   68|      0|#endif
   69|      0|}

looprestoration_tmpl.c:loop_restoration_dsp_init_x86:
   50|  8.15k|static ALWAYS_INLINE void loop_restoration_dsp_init_x86(Dav1dLoopRestorationDSPContext *const c, const int bpc) {
   51|  8.15k|    const unsigned flags = dav1d_get_cpu_flags();
   52|       |
   53|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (53:9): [True: 0, False: 8.15k]
  ------------------
   54|  8.15k|#if BITDEPTH == 8
   55|  8.15k|    c->wiener[0] = BF(dav1d_wiener_filter7, sse2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  8.15k|    c->wiener[1] = BF(dav1d_wiener_filter5, sse2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  8.15k|#endif
   58|       |
   59|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (59:9): [True: 0, False: 8.15k]
  ------------------
   60|  8.15k|    c->wiener[0] = BF(dav1d_wiener_filter7, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  8.15k|    c->wiener[1] = BF(dav1d_wiener_filter5, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  8.15k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (62:9): [True: 3.47k, Folded]
  |  Branch (62:26): [True: 2.31k, False: 2.36k]
  ------------------
   63|  5.79k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, ssse3);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  5.79k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, ssse3);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  5.79k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, ssse3);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|  5.79k|    }
   67|       |
   68|  8.15k|#if ARCH_X86_64
   69|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (69:9): [True: 0, False: 8.15k]
  ------------------
   70|       |
   71|  8.15k|    c->wiener[0] = BF(dav1d_wiener_filter7, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|  8.15k|    c->wiener[1] = BF(dav1d_wiener_filter5, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   73|  8.15k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (73:9): [True: 3.47k, Folded]
  |  Branch (73:26): [True: 2.31k, False: 2.36k]
  ------------------
   74|  5.79k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx2);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   75|  5.79k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx2);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   76|  5.79k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx2);
  ------------------
  |  |   52|  5.79k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  5.79k|    }
   78|       |
   79|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (79:9): [True: 8.15k, False: 0]
  ------------------
   80|       |
   81|      0|    c->wiener[0] = BF(dav1d_wiener_filter7, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   82|      0|#if BITDEPTH == 8
   83|       |    /* With VNNI we don't need a 5-tap version. */
   84|      0|    c->wiener[1] = c->wiener[0];
   85|       |#else
   86|       |    c->wiener[1] = BF(dav1d_wiener_filter5, avx512icl);
   87|       |#endif
   88|      0|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (88:9): [True: 0, Folded]
  |  Branch (88:26): [True: 0, False: 0]
  ------------------
   89|      0|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   90|      0|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   91|      0|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   92|      0|    }
   93|      0|#endif
   94|      0|}

mc_tmpl.c:mc_dsp_init_x86:
   92|  8.15k|static ALWAYS_INLINE void mc_dsp_init_x86(Dav1dMCDSPContext *const c) {
   93|  8.15k|    const unsigned flags = dav1d_get_cpu_flags();
   94|       |
   95|  8.15k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
  ------------------
  |  Branch (95:8): [True: 0, False: 8.15k]
  ------------------
   96|      0|        return;
   97|       |
   98|  8.15k|    init_8tap_fns(ssse3);
  ------------------
  |  |  143|  8.15k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  8.15k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  8.15k|    init_mc_fn(FILTER_2D_BILINEAR,             bilin,               ssse3);
  ------------------
  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  101|  8.15k|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
  ------------------
  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  102|       |
  103|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  104|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  105|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  106|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  107|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  108|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  109|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  110|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  111|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  112|  8.15k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  113|       |
  114|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  115|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  116|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  117|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  118|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  119|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  120|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  121|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  122|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  123|  8.15k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  124|       |
  125|  8.15k|    c->avg = BF(dav1d_avg, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  126|  8.15k|    c->w_avg = BF(dav1d_w_avg, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  127|  8.15k|    c->mask = BF(dav1d_mask, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  128|  8.15k|    c->w_mask[0] = BF(dav1d_w_mask_444, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  129|  8.15k|    c->w_mask[1] = BF(dav1d_w_mask_422, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|  8.15k|    c->w_mask[2] = BF(dav1d_w_mask_420, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  131|  8.15k|    c->blend = BF(dav1d_blend, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  132|  8.15k|    c->blend_v = BF(dav1d_blend_v, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  133|  8.15k|    c->blend_h = BF(dav1d_blend_h, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  134|  8.15k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  135|  8.15k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  136|  8.15k|    c->emu_edge = BF(dav1d_emu_edge, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  137|  8.15k|    c->resize = BF(dav1d_resize, ssse3);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  138|       |
  139|  8.15k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSE41))
  ------------------
  |  Branch (139:8): [True: 0, False: 8.15k]
  ------------------
  140|      0|        return;
  141|       |
  142|  8.15k|#if BITDEPTH == 8
  143|  8.15k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, sse4);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  144|  8.15k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, sse4);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  145|  8.15k|#endif
  146|       |
  147|  8.15k|#if ARCH_X86_64
  148|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
  ------------------
  |  Branch (148:9): [True: 0, False: 8.15k]
  ------------------
  149|      0|        return;
  150|       |
  151|  8.15k|    init_8tap_fns(avx2);
  ------------------
  |  |  143|  8.15k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  8.15k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  152|       |
  153|  8.15k|    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
  ------------------
  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  154|  8.15k|    init_mct_fn(FILTER_2D_BILINEAR,           bilin,               avx2);
  ------------------
  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  155|       |
  156|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  157|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  158|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  159|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  160|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  161|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  162|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  163|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  164|  8.15k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  165|  8.15k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   40|  8.15k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  166|       |
  167|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  168|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  169|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  170|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  171|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  172|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  173|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  174|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  175|  8.15k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  176|  8.15k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   42|  8.15k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  177|       |
  178|  8.15k|    c->avg = BF(dav1d_avg, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  179|  8.15k|    c->w_avg = BF(dav1d_w_avg, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  180|  8.15k|    c->mask = BF(dav1d_mask, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  181|  8.15k|    c->w_mask[0] = BF(dav1d_w_mask_444, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  182|  8.15k|    c->w_mask[1] = BF(dav1d_w_mask_422, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  183|  8.15k|    c->w_mask[2] = BF(dav1d_w_mask_420, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  184|  8.15k|    c->blend = BF(dav1d_blend, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  185|  8.15k|    c->blend_v = BF(dav1d_blend_v, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  186|  8.15k|    c->blend_h = BF(dav1d_blend_h, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  187|  8.15k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  188|  8.15k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  189|  8.15k|    c->emu_edge = BF(dav1d_emu_edge, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  190|  8.15k|    c->resize = BF(dav1d_resize, avx2);
  ------------------
  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  191|       |
  192|  8.15k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL))
  ------------------
  |  Branch (192:9): [True: 8.15k, False: 0]
  ------------------
  193|  8.15k|        return;
  194|       |
  195|  8.15k|    init_8tap_fns(avx512icl);
  ------------------
  |  |  143|      0|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  8.15k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|      0|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  8.15k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  8.15k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  8.15k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  196|       |
  197|      0|    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  198|      0|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  199|       |
  200|      0|    c->avg = BF(dav1d_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  201|      0|    c->w_avg = BF(dav1d_w_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  202|      0|    c->mask = BF(dav1d_mask, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  203|      0|    c->w_mask[0] = BF(dav1d_w_mask_444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  204|      0|    c->w_mask[1] = BF(dav1d_w_mask_422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  205|      0|    c->w_mask[2] = BF(dav1d_w_mask_420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  206|      0|    c->blend = BF(dav1d_blend, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  207|      0|    c->blend_v = BF(dav1d_blend_v, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  208|      0|    c->blend_h = BF(dav1d_blend_h, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  209|       |
  210|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (210:9): [True: 0, False: 0]
  ------------------
  211|      0|        c->resize = BF(dav1d_resize, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  212|      0|        c->warp8x8  = BF(dav1d_warp_affine_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  213|      0|        c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  214|      0|    }
  215|      0|#endif
  216|      0|}

msac.c:msac_init_x86:
   59|  45.9k|static ALWAYS_INLINE void msac_init_x86(MsacContext *const s) {
   60|  45.9k|    const unsigned flags = dav1d_get_cpu_flags();
   61|       |
   62|  45.9k|    if (flags & DAV1D_X86_CPU_FLAG_SSE2) {
  ------------------
  |  Branch (62:9): [True: 45.9k, False: 0]
  ------------------
   63|  45.9k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_sse2;
   64|  45.9k|    }
   65|       |
   66|  45.9k|    if (flags & DAV1D_X86_CPU_FLAG_AVX2) {
  ------------------
  |  Branch (66:9): [True: 45.9k, False: 0]
  ------------------
   67|  45.9k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_avx2;
   68|  45.9k|    }
   69|  45.9k|}

pal.c:pal_dsp_init_x86:
   34|  9.69k|static ALWAYS_INLINE void pal_dsp_init_x86(Dav1dPalDSPContext *const c) {
   35|  9.69k|    const unsigned flags = dav1d_get_cpu_flags();
   36|       |
   37|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (37:9): [True: 0, False: 9.69k]
  ------------------
   38|       |
   39|  9.69k|    c->pal_idx_finish = dav1d_pal_idx_finish_ssse3;
   40|       |
   41|  9.69k|#if ARCH_X86_64
   42|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (42:9): [True: 0, False: 9.69k]
  ------------------
   43|       |
   44|  9.69k|    c->pal_idx_finish = dav1d_pal_idx_finish_avx2;
   45|       |
   46|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (46:9): [True: 9.69k, False: 0]
  ------------------
   47|       |
   48|      0|    c->pal_idx_finish = dav1d_pal_idx_finish_avx512icl;
   49|      0|#endif
   50|      0|}

refmvs.c:refmvs_dsp_init_x86:
   41|  9.69k|static ALWAYS_INLINE void refmvs_dsp_init_x86(Dav1dRefmvsDSPContext *const c) {
   42|  9.69k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 9.69k]
  ------------------
   45|       |
   46|  9.69k|    c->splat_mv = dav1d_splat_mv_sse2;
   47|       |
   48|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 9.69k]
  ------------------
   49|       |
   50|  9.69k|    c->save_tmvs = dav1d_save_tmvs_ssse3;
   51|       |
   52|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 9.69k]
  ------------------
   53|  9.69k|#if ARCH_X86_64
   54|  9.69k|    c->load_tmvs = dav1d_load_tmvs_sse4;
   55|       |
   56|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (56:9): [True: 0, False: 9.69k]
  ------------------
   57|       |
   58|  9.69k|    c->save_tmvs = dav1d_save_tmvs_avx2;
   59|  9.69k|    c->splat_mv = dav1d_splat_mv_avx2;
   60|       |
   61|  9.69k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (61:9): [True: 9.69k, False: 0]
  ------------------
   62|       |
   63|      0|    c->save_tmvs = dav1d_save_tmvs_avx512icl;
   64|      0|    c->splat_mv = dav1d_splat_mv_avx512icl;
   65|      0|#endif
   66|      0|}

LLVMFuzzerInitialize:
   59|      2|int LLVMFuzzerInitialize(int *argc, char ***argv) {
   60|      2|    int i = 1;
   61|     11|    for (; i < *argc; i++) {
  ------------------
  |  Branch (61:12): [True: 9, False: 2]
  ------------------
   62|      9|        if (!strcmp((*argv)[i], "--cpumask")) {
  ------------------
  |  Branch (62:13): [True: 0, False: 9]
  ------------------
   63|      0|            const char * cpumask = (*argv)[i+1];
   64|      0|            if (cpumask) {
  ------------------
  |  Branch (64:17): [True: 0, False: 0]
  ------------------
   65|      0|                char *end;
   66|      0|                unsigned res;
   67|      0|                if (!strncmp(cpumask, "0x", 2)) {
  ------------------
  |  Branch (67:21): [True: 0, False: 0]
  ------------------
   68|      0|                    cpumask += 2;
   69|      0|                    res = (unsigned) strtoul(cpumask, &end, 16);
   70|      0|                } else {
   71|      0|                    res = (unsigned) strtoul(cpumask, &end, 0);
   72|      0|                }
   73|      0|                if (end != cpumask && !end[0]) {
  ------------------
  |  Branch (73:21): [True: 0, False: 0]
  |  Branch (73:39): [True: 0, False: 0]
  ------------------
   74|      0|                    dav1d_set_cpu_flags_mask(res);
   75|      0|                }
   76|      0|            }
   77|      0|            break;
   78|      0|        }
   79|      9|    }
   80|       |
   81|      2|    for (; i < *argc - 2; i++) {
  ------------------
  |  Branch (81:12): [True: 0, False: 2]
  ------------------
   82|      0|        (*argv)[i] = (*argv)[i + 2];
   83|      0|    }
   84|       |
   85|      2|    *argc = i;
   86|       |
   87|      2|    return 0;
   88|      2|}
LLVMFuzzerTestOneInput:
   94|  9.70k|{
   95|  9.70k|    Dav1dSettings settings = { 0 };
   96|  9.70k|    Dav1dContext * ctx = NULL;
   97|  9.70k|    Dav1dPicture pic;
   98|  9.70k|    const uint8_t *ptr = data;
   99|  9.70k|    int have_seq_hdr = 0;
  100|  9.70k|    int err;
  101|       |
  102|  9.70k|    dav1d_version();
  103|       |
  104|  9.70k|    if (size < 32) goto end;
  ------------------
  |  Branch (104:9): [True: 8, False: 9.69k]
  ------------------
  105|       |#ifdef DAV1D_ALLOC_FAIL
  106|       |    unsigned h = djb_xor(ptr, 32);
  107|       |    unsigned seed = h;
  108|       |    unsigned probability = h > (RAND_MAX >> 5) ? RAND_MAX >> 5 : h;
  109|       |    int max_frame_delay = (h & 0xf) + 1;
  110|       |    int n_threads = ((h >> 4) & 0x7) + 1;
  111|       |    if (max_frame_delay > 5) max_frame_delay = 1;
  112|       |    if (n_threads > 3) n_threads = 1;
  113|       |#endif
  114|  9.69k|    ptr += 32; // skip ivf header
  115|       |
  116|  9.69k|    dav1d_default_settings(&settings);
  117|       |
  118|       |#ifdef DAV1D_MT_FUZZING
  119|       |    settings.max_frame_delay = settings.n_threads = 4;
  120|       |#elif defined(DAV1D_ALLOC_FAIL)
  121|       |    settings.max_frame_delay = max_frame_delay;
  122|       |    settings.n_threads = n_threads;
  123|       |    dav1d_setup_alloc_fail(seed, probability);
  124|       |#else
  125|  9.69k|    settings.max_frame_delay = settings.n_threads = 1;
  126|  9.69k|#endif
  127|  9.69k|#if defined(DAV1D_FUZZ_MAX_SIZE)
  128|  9.69k|    settings.frame_size_limit = DAV1D_FUZZ_MAX_SIZE;
  ------------------
  |  |   56|  9.69k|#define DAV1D_FUZZ_MAX_SIZE 4096 * 4096
  ------------------
  129|  9.69k|#endif
  130|       |
  131|  9.69k|    err = dav1d_open(&ctx, &settings);
  132|  9.69k|    if (err < 0) goto end;
  ------------------
  |  Branch (132:9): [True: 0, False: 9.69k]
  ------------------
  133|       |
  134|  87.6k|    while (ptr <= data + size - 12) {
  ------------------
  |  Branch (134:12): [True: 78.4k, False: 9.13k]
  ------------------
  135|  78.4k|        Dav1dData buf;
  136|  78.4k|        uint8_t *p;
  137|       |
  138|  78.4k|        size_t frame_size = r32le(ptr);
  139|  78.4k|        ptr += 12;
  140|       |
  141|  78.4k|        if (frame_size > size || ptr > data + size - frame_size)
  ------------------
  |  Branch (141:13): [True: 433, False: 78.0k]
  |  Branch (141:34): [True: 127, False: 77.9k]
  ------------------
  142|    560|            break;
  143|       |
  144|  77.9k|        if (!frame_size) continue;
  ------------------
  |  Branch (144:13): [True: 562, False: 77.3k]
  ------------------
  145|       |
  146|  77.3k|        if (!have_seq_hdr) {
  ------------------
  |  Branch (146:13): [True: 12.7k, False: 64.6k]
  ------------------
  147|  12.7k|            Dav1dSequenceHeader seq;
  148|  12.7k|            int err = dav1d_parse_sequence_header(&seq, ptr, frame_size);
  149|       |            // skip frames until we see a sequence header
  150|  12.7k|            if  (err != 0) {
  ------------------
  |  Branch (150:18): [True: 3.42k, False: 9.33k]
  ------------------
  151|  3.42k|                ptr += frame_size;
  152|  3.42k|                continue;
  153|  3.42k|            }
  154|  9.33k|            have_seq_hdr = 1;
  155|  9.33k|        }
  156|       |
  157|       |        // copy frame data to a new buffer to catch reads past the end of input
  158|  73.9k|        p = dav1d_data_create(&buf, frame_size);
  159|  73.9k|        if (!p) goto cleanup;
  ------------------
  |  Branch (159:13): [True: 0, False: 73.9k]
  ------------------
  160|  73.9k|        memcpy(p, ptr, frame_size);
  161|  73.9k|        ptr += frame_size;
  162|       |
  163|  77.3k|        do {
  164|  77.3k|            if ((err = dav1d_send_data(ctx, &buf)) < 0) {
  ------------------
  |  Branch (164:17): [True: 54.0k, False: 23.3k]
  ------------------
  165|  54.0k|                if (err != DAV1D_ERR(EAGAIN))
  ------------------
  |  |   58|  54.0k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (165:21): [True: 50.5k, False: 3.58k]
  ------------------
  166|  50.5k|                    break;
  167|  54.0k|            }
  168|  26.8k|            memset(&pic, 0, sizeof(pic));
  169|  26.8k|            err = dav1d_get_picture(ctx, &pic);
  170|  26.8k|            if (err == 0) {
  ------------------
  |  Branch (170:17): [True: 19.6k, False: 7.26k]
  ------------------
  171|  19.6k|                dav1d_picture_unref(&pic);
  172|  19.6k|            } else if (err != DAV1D_ERR(EAGAIN)) {
  ------------------
  |  |   58|  7.26k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (172:24): [True: 134, False: 7.13k]
  ------------------
  173|    134|                break;
  174|    134|            }
  175|  26.8k|        } while (buf.sz > 0);
  ------------------
  |  Branch (175:18): [True: 3.44k, False: 23.3k]
  ------------------
  176|       |
  177|  73.9k|        if (buf.sz > 0)
  ------------------
  |  Branch (177:13): [True: 50.6k, False: 23.3k]
  ------------------
  178|  50.6k|            dav1d_data_unref(&buf);
  179|  73.9k|    }
  180|       |
  181|  9.69k|    memset(&pic, 0, sizeof(pic));
  182|  9.69k|    if ((err = dav1d_get_picture(ctx, &pic)) == 0) {
  ------------------
  |  Branch (182:9): [True: 240, False: 9.45k]
  ------------------
  183|       |        /* Test calling dav1d_picture_unref() after dav1d_close() */
  184|  1.47k|        do {
  185|  1.47k|            Dav1dPicture pic2 = { 0 };
  186|  1.47k|            if ((err = dav1d_get_picture(ctx, &pic2)) == 0)
  ------------------
  |  Branch (186:17): [True: 1.10k, False: 363]
  ------------------
  187|  1.10k|                dav1d_picture_unref(&pic2);
  188|  1.47k|        } while (err != DAV1D_ERR(EAGAIN));
  ------------------
  |  |   58|  1.47k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (188:18): [True: 1.23k, False: 240]
  ------------------
  189|       |
  190|    240|        dav1d_close(&ctx);
  191|    240|        dav1d_picture_unref(&pic);
  192|    240|        return 0;
  193|    240|    }
  194|       |
  195|  9.45k|cleanup:
  196|  9.45k|    dav1d_close(&ctx);
  197|  9.46k|end:
  198|  9.46k|    return 0;
  199|  9.45k|}
dav1d_fuzzer.c:r32le:
   52|  78.4k|static unsigned r32le(const uint8_t *const p) {
   53|  78.4k|    return ((uint32_t)p[3] << 24U) | (p[2] << 16U) | (p[1] << 8U) | p[0];
   54|  78.4k|}

