obu.c:clz:
  186|  5.76k|static inline int clz(const unsigned int mask) {
  187|  5.76k|    return __builtin_clz(mask);
  188|  5.76k|}
thread_task.c:ctz:
  182|  1.14M|static inline int ctz(const unsigned int mask) {
  183|  1.14M|    return __builtin_ctz(mask);
  184|  1.14M|}
decode.c:ctz:
  182|   196k|static inline int ctz(const unsigned int mask) {
  183|   196k|    return __builtin_ctz(mask);
  184|   196k|}
decode.c:clz:
  186|  22.1M|static inline int clz(const unsigned int mask) {
  187|  22.1M|    return __builtin_clz(mask);
  188|  22.1M|}
getbits.c:clz:
  186|   149k|static inline int clz(const unsigned int mask) {
  187|   149k|    return __builtin_clz(mask);
  188|   149k|}
lf_mask.c:clz:
  186|  8.01M|static inline int clz(const unsigned int mask) {
  187|  8.01M|    return __builtin_clz(mask);
  188|  8.01M|}
warpmv.c:clz:
  186|   309k|static inline int clz(const unsigned int mask) {
  187|   309k|    return __builtin_clz(mask);
  188|   309k|}
warpmv.c:clzll:
  190|   162k|static inline int clzll(const unsigned long long mask) {
  191|   162k|    return __builtin_clzll(mask);
  192|   162k|}
looprestoration_tmpl.c:clz:
  186|  1.72M|static inline int clz(const unsigned int mask) {
  187|  1.72M|    return __builtin_clz(mask);
  188|  1.72M|}
recon_tmpl.c:clz:
  186|  54.8M|static inline int clz(const unsigned int mask) {
  187|  54.8M|    return __builtin_clz(mask);
  188|  54.8M|}
cdef_apply_tmpl.c:clz:
  186|  3.48M|static inline int clz(const unsigned int mask) {
  187|  3.48M|    return __builtin_clz(mask);
  188|  3.48M|}
ipred_prepare_tmpl.c:clz:
  186|  11.6M|static inline int clz(const unsigned int mask) {
  187|  11.6M|    return __builtin_clz(mask);
  188|  11.6M|}

fg_apply_tmpl.c:PXSTRIDE:
   79|  51.2k|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  51.2k|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 51.2k, False: 3]
  ------------------
   81|  51.2k|    return x >> 1;
   82|  51.2k|}
itx_tmpl.c:PXSTRIDE:
   79|  2.68M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  2.68M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 2.68M, False: 18.4E]
  ------------------
   81|  2.68M|    return x >> 1;
   82|  2.68M|}
looprestoration_tmpl.c:PXSTRIDE:
   79|  4.18M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  4.18M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 4.18M, False: 524]
  ------------------
   81|  4.18M|    return x >> 1;
   82|  4.18M|}
recon_tmpl.c:PXSTRIDE:
   79|  26.6M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  26.6M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 26.6M, False: 18.4E]
  ------------------
   81|  26.6M|    return x >> 1;
   82|  26.6M|}
cdef_apply_tmpl.c:PXSTRIDE:
   79|  52.9M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  52.9M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 52.8M, False: 93.7k]
  ------------------
   81|  52.8M|    return x >> 1;
   82|  52.9M|}
ipred_prepare_tmpl.c:PXSTRIDE:
   79|  78.4M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  78.4M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 78.2M, False: 221k]
  ------------------
   81|  78.2M|    return x >> 1;
   82|  78.4M|}
ipred_prepare_tmpl.c:pixel_set:
   66|  1.46M|static inline void pixel_set(pixel *const dst, const int val, const int num) {
   67|  15.5M|    for (int n = 0; n < num; n++)
  ------------------
  |  Branch (67:21): [True: 14.0M, False: 1.46M]
  ------------------
   68|  14.0M|        dst[n] = val;
   69|  1.46M|}
lf_apply_tmpl.c:PXSTRIDE:
   79|  19.9M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  19.9M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 19.9M, False: 18.4E]
  ------------------
   81|  19.9M|    return x >> 1;
   82|  19.9M|}
lr_apply_tmpl.c:PXSTRIDE:
   79|  1.59M|static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
   80|  1.59M|    assert(!(x & 1));
  ------------------
  |  Branch (80:5): [True: 1.59M, False: 18.4E]
  ------------------
   81|  1.59M|    return x >> 1;
   82|  1.59M|}

lib.c:umin:
   47|  10.2k|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  10.2k|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 0, False: 10.2k]
  ------------------
   49|  10.2k|}
obu.c:ulog2:
   67|  5.76k|static inline int ulog2(const unsigned v) {
   68|  5.76k|    return 31 ^ clz(v);
   69|  5.76k|}
obu.c:imin:
   39|   924k|static inline int imin(const int a, const int b) {
   40|   924k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 821k, False: 103k]
  ------------------
   41|   924k|}
obu.c:imax:
   35|   832k|static inline int imax(const int a, const int b) {
   36|   832k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 80.5k, False: 751k]
  ------------------
   37|   832k|}
obu.c:iclip_u8:
   55|   167k|static inline int iclip_u8(const int v) {
   56|   167k|    return iclip(v, 0, 255);
   57|   167k|}
obu.c:iclip:
   51|   167k|static inline int iclip(const int v, const int min, const int max) {
   52|   167k|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 4.33k, False: 162k]
  |  Branch (52:28): [True: 2.66k, False: 160k]
  ------------------
   53|   167k|}
refmvs.c:imin:
   39|  55.2M|static inline int imin(const int a, const int b) {
   40|  55.2M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 24.0M, False: 31.2M]
  ------------------
   41|  55.2M|}
refmvs.c:apply_sign:
   59|  2.01M|static inline int apply_sign(const int v, const int s) {
   60|  2.01M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 1.18M, False: 838k]
  ------------------
   61|  2.01M|}
refmvs.c:imax:
   35|  25.4M|static inline int imax(const int a, const int b) {
   36|  25.4M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 3.40M, False: 22.0M]
  ------------------
   37|  25.4M|}
refmvs.c:iclip:
   51|  26.8M|static inline int iclip(const int v, const int min, const int max) {
   52|  26.8M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 321k, False: 26.5M]
  |  Branch (52:28): [True: 376k, False: 26.1M]
  ------------------
   53|  26.8M|}
thread_task.c:imax:
   35|  6.27M|static inline int imax(const int a, const int b) {
   36|  6.27M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 867k, False: 5.40M]
  ------------------
   37|  6.27M|}
thread_task.c:iclip:
   51|  1.01M|static inline int iclip(const int v, const int min, const int max) {
   52|  1.01M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 410, False: 1.01M]
  |  Branch (52:28): [True: 29.8k, False: 980k]
  ------------------
   53|  1.01M|}
thread_task.c:umin:
   47|  9.20M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|  9.20M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 66.3k, False: 9.13M]
  ------------------
   49|  9.20M|}
wedge.c:imax:
   35|    256|static inline int imax(const int a, const int b) {
   36|    256|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 128, False: 128]
  ------------------
   37|    256|}
wedge.c:imin:
   39|  2.48k|static inline int imin(const int a, const int b) {
   40|  2.48k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 1.41k, False: 1.06k]
  ------------------
   41|  2.48k|}
fg_apply_tmpl.c:imin:
   39|  39.1k|static inline int imin(const int a, const int b) {
   40|  39.1k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 7.46k, False: 31.6k]
  ------------------
   41|  39.1k|}
cdf.c:imin:
   39|   102k|static inline int imin(const int a, const int b) {
   40|   102k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 25.6k, False: 76.8k]
  ------------------
   41|   102k|}
decode.c:iclip:
   51|  3.85M|static inline int iclip(const int v, const int min, const int max) {
   52|  3.85M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 191k, False: 3.66M]
  |  Branch (52:28): [True: 256k, False: 3.40M]
  ------------------
   53|  3.85M|}
decode.c:apply_sign:
   59|  1.30M|static inline int apply_sign(const int v, const int s) {
   60|  1.30M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 936k, False: 364k]
  ------------------
   61|  1.30M|}
decode.c:apply_sign64:
   63|  1.37M|static inline int apply_sign64(const int v, const int64_t s) {
   64|  1.37M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 186k, False: 1.18M]
  ------------------
   65|  1.37M|}
decode.c:ulog2:
   67|  22.1M|static inline int ulog2(const unsigned v) {
   68|  22.1M|    return 31 ^ clz(v);
   69|  22.1M|}
decode.c:imax:
   35|  26.2M|static inline int imax(const int a, const int b) {
   36|  26.2M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 11.7M, False: 14.5M]
  ------------------
   37|  26.2M|}
decode.c:imin:
   39|  61.4M|static inline int imin(const int a, const int b) {
   40|  61.4M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 34.6M, False: 26.7M]
  ------------------
   41|  61.4M|}
decode.c:iclip_u8:
   55|  2.61M|static inline int iclip_u8(const int v) {
   56|  2.61M|    return iclip(v, 0, 255);
   57|  2.61M|}
getbits.c:ulog2:
   67|   149k|static inline int ulog2(const unsigned v) {
   68|   149k|    return 31 ^ clz(v);
   69|   149k|}
getbits.c:inv_recenter:
   75|   124k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|   124k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 3.30k, False: 121k]
  ------------------
   77|  3.30k|        return v;
   78|   121k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 79.8k, False: 41.2k]
  ------------------
   79|  79.8k|        return (v >> 1) + r;
   80|  41.2k|    else
   81|  41.2k|        return r - ((v + 1) >> 1);
   82|   124k|}
lf_mask.c:imin:
   39|   121M|static inline int imin(const int a, const int b) {
   40|   121M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 11.4M, False: 109M]
  ------------------
   41|   121M|}
lf_mask.c:ulog2:
   67|  8.01M|static inline int ulog2(const unsigned v) {
   68|  8.01M|    return 31 ^ clz(v);
   69|  8.01M|}
lf_mask.c:imax:
   35|  2.02M|static inline int imax(const int a, const int b) {
   36|  2.02M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 1.89M, False: 130k]
  ------------------
   37|  2.02M|}
lf_mask.c:iclip:
   51|  6.33M|static inline int iclip(const int v, const int min, const int max) {
   52|  6.33M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 495k, False: 5.83M]
  |  Branch (52:28): [True: 204k, False: 5.63M]
  ------------------
   53|  6.33M|}
msac.c:inv_recenter:
   75|   202k|static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
   76|   202k|    if (v > (r << 1))
  ------------------
  |  Branch (76:9): [True: 67.2k, False: 134k]
  ------------------
   77|  67.2k|        return v;
   78|   134k|    else if ((v & 1) == 0)
  ------------------
  |  Branch (78:14): [True: 68.3k, False: 66.6k]
  ------------------
   79|  68.3k|        return (v >> 1) + r;
   80|  66.6k|    else
   81|  66.6k|        return r - ((v + 1) >> 1);
   82|   202k|}
warpmv.c:apply_sign:
   59|  1.54M|static inline int apply_sign(const int v, const int s) {
   60|  1.54M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (60:12): [True: 421k, False: 1.12M]
  ------------------
   61|  1.54M|}
warpmv.c:ulog2:
   67|   309k|static inline int ulog2(const unsigned v) {
   68|   309k|    return 31 ^ clz(v);
   69|   309k|}
warpmv.c:apply_sign64:
   63|  1.42M|static inline int apply_sign64(const int v, const int64_t s) {
   64|  1.42M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 250k, False: 1.17M]
  ------------------
   65|  1.42M|}
warpmv.c:iclip:
   51|  2.46M|static inline int iclip(const int v, const int min, const int max) {
   52|  2.46M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 66.7k, False: 2.39M]
  |  Branch (52:28): [True: 58.5k, False: 2.33M]
  ------------------
   53|  2.46M|}
warpmv.c:u64log2:
   71|   162k|static inline int u64log2(const uint64_t v) {
   72|   162k|    return 63 ^ clzll(v);
   73|   162k|}
itx_tmpl.c:iclip:
   51|   173M|static inline int iclip(const int v, const int min, const int max) {
   52|   173M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 11.9M, False: 161M]
  |  Branch (52:28): [True: 17.4M, False: 143M]
  ------------------
   53|   173M|}
itx_tmpl.c:imin:
   39|  64.3k|static inline int imin(const int a, const int b) {
   40|  64.3k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 8.96k, False: 55.3k]
  ------------------
   41|  64.3k|}
looprestoration_tmpl.c:iclip:
   51|   131M|static inline int iclip(const int v, const int min, const int max) {
   52|   131M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 756k, False: 130M]
  |  Branch (52:28): [True: 640k, False: 130M]
  ------------------
   53|   131M|}
looprestoration_tmpl.c:imax:
   35|   146M|static inline int imax(const int a, const int b) {
   36|   146M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 109M, False: 37.0M]
  ------------------
   37|   146M|}
looprestoration_tmpl.c:umin:
   47|   146M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|   146M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 136M, False: 10.4M]
  ------------------
   49|   146M|}
recon_tmpl.c:ulog2:
   67|  54.7M|static inline int ulog2(const unsigned v) {
   68|  54.7M|    return 31 ^ clz(v);
   69|  54.7M|}
recon_tmpl.c:imin:
   39|   162M|static inline int imin(const int a, const int b) {
   40|   162M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 143M, False: 18.5M]
  ------------------
   41|   162M|}
recon_tmpl.c:imax:
   35|  10.8M|static inline int imax(const int a, const int b) {
   36|  10.8M|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 7.20M, False: 3.67M]
  ------------------
   37|  10.8M|}
recon_tmpl.c:umin:
   47|   304M|static inline unsigned umin(const unsigned a, const unsigned b) {
   48|   304M|    return a < b ? a : b;
  ------------------
  |  Branch (48:12): [True: 194M, False: 110M]
  ------------------
   49|   304M|}
recon_tmpl.c:apply_sign64:
   63|  1.14M|static inline int apply_sign64(const int v, const int64_t s) {
   64|  1.14M|    return s < 0 ? -v : v;
  ------------------
  |  Branch (64:12): [True: 80.2k, False: 1.06M]
  ------------------
   65|  1.14M|}
recon_tmpl.c:iclip:
   51|  1.31M|static inline int iclip(const int v, const int min, const int max) {
   52|  1.31M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 78.7k, False: 1.23M]
  |  Branch (52:28): [True: 11.2k, False: 1.22M]
  ------------------
   53|  1.31M|}
itx_1d.c:iclip:
   51|   381M|static inline int iclip(const int v, const int min, const int max) {
   52|   381M|    return v < min ? min : v > max ? max : v;
  ------------------
  |  Branch (52:12): [True: 7.64M, False: 373M]
  |  Branch (52:28): [True: 7.59M, False: 366M]
  ------------------
   53|   381M|}
scan.c:imax:
   35|  3.34k|static inline int imax(const int a, const int b) {
   36|  3.34k|    return a > b ? a : b;
  ------------------
  |  Branch (36:12): [True: 2.82k, False: 523]
  ------------------
   37|  3.34k|}
cdef_apply_tmpl.c:imin:
   39|  13.3M|static inline int imin(const int a, const int b) {
   40|  13.3M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 11.8M, False: 1.41M]
  ------------------
   41|  13.3M|}
cdef_apply_tmpl.c:ulog2:
   67|  3.48M|static inline int ulog2(const unsigned v) {
   68|  3.48M|    return 31 ^ clz(v);
   69|  3.48M|}
ipred_prepare_tmpl.c:imin:
   39|  36.2M|static inline int imin(const int a, const int b) {
   40|  36.2M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 34.4M, False: 1.72M]
  ------------------
   41|  36.2M|}
lf_apply_tmpl.c:imin:
   39|  7.57M|static inline int imin(const int a, const int b) {
   40|  7.57M|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 841k, False: 6.72M]
  ------------------
   41|  7.57M|}
lr_apply_tmpl.c:imin:
   39|   351k|static inline int imin(const int a, const int b) {
   40|   351k|    return a < b ? a : b;
  ------------------
  |  Branch (40:12): [True: 224k, False: 127k]
  ------------------
   41|   351k|}

dav1d_cdef_brow_8bpc:
  102|   170k|{
  103|   170k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|   170k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [True: 170k, Folded]
  ------------------
  105|   170k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|   170k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 160k, False: 9.91k]
  ------------------
  107|   170k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|   170k|    const int sbsz = 16;
  109|   170k|    const int sb64w = f->sb128w << 1;
  110|   170k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|   170k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|   170k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|   170k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|   170k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|   170k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|   170k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|   170k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|   170k|    const int have_tt = f->c->n_tc > 1;
  119|   170k|    const int sb128 = f->seq_hdr->sb128;
  120|   170k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|   170k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   170k|#define PXSTRIDE(x) (x)
  ------------------
  122|   170k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|   170k|#define PXSTRIDE(x) (x)
  ------------------
  123|       |
  124|  1.01M|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 843k, False: 172k]
  ------------------
  125|   843k|        const int tf = tc->top_pre_cdef_toggle;
  126|   843k|        const int by_idx = (by & 30) >> 1;
  127|   843k|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 9.69k, False: 833k]
  ------------------
  128|       |
  129|   843k|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 18.4E, False: 843k]
  |  Branch (129:26): [True: 79.8k, False: 763k]
  |  Branch (129:41): [True: 673k, False: 90.3k]
  ------------------
  130|   753k|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 753k, False: 18.4E]
  ------------------
  131|   753k|        {
  132|       |            // backup pre-filter data for next iteration
  133|   753k|            pixel *const cdef_top_bak[3] = {
  134|   753k|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|   753k|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|   753k|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|   753k|            };
  138|   753k|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|   753k|        }
  140|       |
  141|   843k|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|   843k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   843k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|   843k|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|   843k|        edges &= ~CDEF_HAVE_LEFT;
  144|   843k|        edges |= CDEF_HAVE_RIGHT;
  145|   843k|        enum Backup2x8Flags prev_flag = 0;
  146|  3.54M|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 2.70M, False: 845k]
  ------------------
  147|  2.70M|            const int sb128x = sbx >> 1;
  148|  2.70M|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|  2.70M|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|  2.70M|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 1.76M, False: 938k]
  ------------------
  151|   938k|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 293k, False: 644k]
  ------------------
  152|   293k|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 247k, False: 45.6k]
  ------------------
  153|  2.01M|            {
  154|  2.01M|                prev_flag = 0;
  155|  2.01M|                goto next_sb;
  156|  2.01M|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|   688k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|   688k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|   688k|                                                    noskip_row[0][0];
  162|       |
  163|   688k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|   688k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|   688k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|   688k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|   688k|            int y_sec_lvl = y_lvl & 3;
  169|   688k|            y_sec_lvl += y_sec_lvl == 3;
  170|   688k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|   688k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|   688k|            int uv_sec_lvl = uv_lvl & 3;
  174|   688k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|   688k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|   688k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|  5.92M|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 5.24M, False: 683k]
  ------------------
  179|  5.23M|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|  5.24M|            {
  181|  5.24M|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 64.4k, False: 5.17M]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|  5.24M|                const uint32_t bx_mask = 3U << (bx & 30);
  186|  5.24M|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 1.20M, False: 4.03M]
  ------------------
  187|  1.20M|                    prev_flag = 0;
  188|  1.20M|                    goto next_b;
  189|  1.20M|                }
  190|  4.03M|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|  4.03M|                prev_flag = flag;
  192|  4.03M|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 255k, False: 3.78M]
  |  Branch (192:32): [True: 199k, False: 55.9k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|   199k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|   199k|                }
  197|  4.03M|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 3.99M, False: 43.2k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|  3.99M|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|  3.99M|                }
  201|       |
  202|  4.03M|                int dir;
  203|  4.03M|                unsigned variance;
  204|  4.03M|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 3.27M, False: 757k]
  |  Branch (204:34): [True: 452k, False: 305k]
  ------------------
  205|  3.71M|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|  3.71M|                                        &variance HIGHBD_CALL_SUFFIX);
  207|       |
  208|  4.03M|                const pixel *top, *bot;
  209|  4.03M|                ptrdiff_t offset;
  210|       |
  211|  4.03M|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 0, False: 4.03M]
  ------------------
  212|  4.03M|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 255k, False: 3.78M]
  |  Branch (212:36): [True: 255k, False: 18.4E]
  ------------------
  213|   255k|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 7.48k, False: 248k]
  ------------------
  214|  7.48k|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|  7.48k|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|   248k|                    } else {
  217|   248k|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|   248k|                        top = &f->lf.lr_lpf_line[0][offset];
  219|   248k|                    }
  220|   255k|                    bot = bptrs[0] + 8 * y_stride;
  221|  3.78M|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 3.78M, False: 18.4E]
  |  Branch (221:44): [True: 299k, False: 3.48M]
  ------------------
  222|   299k|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|   299k|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 9.13k, False: 290k]
  ------------------
  224|  9.13k|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|  9.13k|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|   290k|                    } else {
  227|   290k|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|   290k|                        offset = line * y_stride + bx * 4;
  229|   290k|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|   290k|                    }
  231|  3.48M|                } else {
  232|  3.48M|            st_y:;
  233|  3.48M|                    offset = sby * 4 * y_stride;
  234|  3.48M|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|  3.48M|                    bot = bptrs[0] + 8 * y_stride;
  236|  3.48M|                }
  237|  4.04M|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 3.27M, False: 763k]
  ------------------
  238|  3.27M|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|  3.27M|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 2.52M, False: 750k]
  |  Branch (239:42): [True: 280k, False: 470k]
  ------------------
  240|  2.81M|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|  2.81M|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|  2.81M|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  243|  3.27M|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 511k, False: 251k]
  ------------------
  244|   511k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|   511k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|   511k|                                    edges HIGHBD_CALL_SUFFIX);
  247|       |
  248|  4.04M|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 416k, False: 3.62M]
  ------------------
  249|  4.04M|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 3.62M, False: 976]
  ------------------
  250|       |
  251|  3.62M|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 3.33M, False: 290k]
  ------------------
  252|  10.8M|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 7.23M, False: 3.62M]
  ------------------
  253|  7.23M|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 0, False: 7.23M]
  ------------------
  254|  7.23M|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 439k, False: 6.79M]
  |  Branch (254:40): [True: 439k, False: 18.4E]
  ------------------
  255|   439k|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 14.0k, False: 425k]
  ------------------
  256|  14.0k|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|  14.0k|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|   425k|                        } else {
  259|   425k|                            const int line = sby * (4 << sb128) - 4;
  260|   425k|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|   425k|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|   425k|                        }
  263|   439k|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|  6.79M|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 6.79M, False: 18.4E]
  |  Branch (264:48): [True: 507k, False: 6.29M]
  ------------------
  265|   507k|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|   507k|                                                     (bx * 4 >> ss_hor);
  267|   507k|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|   507k|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 16.0k, False: 491k]
  ------------------
  269|  16.0k|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|  16.0k|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|   491k|                        } else {
  272|   491k|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|   491k|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|   491k|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|   491k|                        }
  276|  6.28M|                    } else {
  277|  6.28M|                st_uv:;
  278|  6.28M|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|  6.28M|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|  6.28M|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|  6.28M|                    }
  282|  7.23M|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|  7.23M|                                         lr_bak[bit][pl], top, bot,
  284|  7.23M|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|  7.23M|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  286|  7.23M|                }
  287|       |
  288|  4.03M|            skip_uv:
  289|  4.03M|                bit ^= 1;
  290|       |
  291|  5.23M|            next_b:
  292|  5.23M|                bptrs[0] += 8;
  293|  5.23M|                bptrs[1] += 8 >> ss_hor;
  294|  5.23M|                bptrs[2] += 8 >> ss_hor;
  295|  5.23M|            }
  296|       |
  297|  2.70M|        next_sb:
  298|  2.70M|            iptrs[0] += sbsz * 4;
  299|  2.70M|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|  2.70M|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|  2.70M|        }
  302|       |
  303|   845k|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   845k|#define PXSTRIDE(x) (x)
  ------------------
  304|   845k|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   845k|#define PXSTRIDE(x) (x)
  ------------------
  305|   845k|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  ------------------
  |  |   53|   845k|#define PXSTRIDE(x) (x)
  ------------------
  306|   845k|        tc->top_pre_cdef_toggle ^= 1;
  307|   845k|    }
  308|   170k|}
cdef_apply_tmpl.c:backup2lines:
   44|  5.78M|{
   45|  5.78M|    const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
  ------------------
  |  |   53|  5.78M|#define PXSTRIDE(x) (x)
  ------------------
   46|  5.78M|    if (y_stride < 0)
  ------------------
  |  Branch (46:9): [True: 0, False: 5.78M]
  ------------------
   47|      0|        pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   48|  5.78M|    else
   49|  5.78M|        pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
  ------------------
  |  |   47|  5.78M|#define pixel_copy memcpy
  ------------------
   50|       |
   51|  5.78M|    if (layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (51:9): [True: 1.33M, False: 4.45M]
  ------------------
   52|  1.33M|        const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
  ------------------
  |  |   53|  1.33M|#define PXSTRIDE(x) (x)
  ------------------
   53|  1.33M|        if (uv_stride < 0) {
  ------------------
  |  Branch (53:13): [True: 0, False: 1.33M]
  ------------------
   54|      0|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
  ------------------
  |  Branch (54:32): [True: 0, False: 0]
  ------------------
   55|      0|            pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   56|      0|            pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
   57|  1.33M|        } else {
   58|  1.33M|            const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
  ------------------
  |  Branch (58:32): [True: 553k, False: 778k]
  ------------------
   59|  1.33M|            pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|  1.33M|#define pixel_copy memcpy
  ------------------
   60|  1.33M|            pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
  ------------------
  |  |   47|  1.33M|#define pixel_copy memcpy
  ------------------
   61|  1.33M|        }
   62|  1.33M|    }
   63|  5.78M|}
cdef_apply_tmpl.c:backup2x8:
   70|  7.04M|{
   71|  7.04M|    ptrdiff_t y_off = 0;
   72|  7.04M|    if (flag & BACKUP_2X8_Y) {
  ------------------
  |  Branch (72:9): [True: 6.63M, False: 411k]
  ------------------
   73|  59.0M|        for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
  ------------------
  |  |   53|  52.4M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (73:25): [True: 52.4M, False: 6.63M]
  ------------------
   74|  52.4M|            pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  52.4M|#define pixel_copy memcpy
  ------------------
   75|  6.63M|    }
   76|       |
   77|  7.04M|    if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
  ------------------
  |  Branch (77:9): [True: 1.08M, False: 5.95M]
  |  Branch (77:46): [True: 362k, False: 5.59M]
  ------------------
   78|  1.42M|        return;
   79|       |
   80|  5.61M|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   81|  5.61M|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   82|       |
   83|  5.61M|    x_off >>= ss_hor;
   84|  5.61M|    y_off = 0;
   85|  29.9M|    for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
  ------------------
  |  |   53|  24.2M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (85:21): [True: 24.2M, False: 5.61M]
  ------------------
   86|  24.2M|        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  24.2M|#define pixel_copy memcpy
  ------------------
   87|  24.2M|        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
  ------------------
  |  |   47|  24.2M|#define pixel_copy memcpy
  ------------------
   88|  24.2M|    }
   89|  5.61M|}
cdef_apply_tmpl.c:adjust_strength:
   91|  5.97M|static int adjust_strength(const int strength, const unsigned var) {
   92|  5.97M|    if (!var) return 0;
  ------------------
  |  Branch (92:9): [True: 1.70M, False: 4.27M]
  ------------------
   93|  4.27M|    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
  ------------------
  |  Branch (93:19): [True: 3.48M, False: 795k]
  ------------------
   94|  4.27M|    return (strength * (4 + i) + 8) >> 4;
   95|  5.97M|}
dav1d_cdef_brow_16bpc:
  102|   722k|{
  103|   722k|    Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
  104|   722k|    const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
  ------------------
  |  Branch (104:32): [Folded, False: 722k]
  ------------------
  105|   722k|    const Dav1dDSPContext *const dsp = f->dsp;
  106|   722k|    enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
  ------------------
  |  Branch (106:52): [True: 610k, False: 111k]
  ------------------
  107|   722k|    pixel *ptrs[3] = { p[0], p[1], p[2] };
  108|   722k|    const int sbsz = 16;
  109|   722k|    const int sb64w = f->sb128w << 1;
  110|   722k|    const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
  111|   722k|    const enum Dav1dPixelLayout layout = f->cur.p.layout;
  112|   722k|    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
  113|   722k|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  114|   722k|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  115|   722k|    static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
  116|   722k|                                           { 7, 0, 2, 4, 5, 6, 6, 6 } };
  117|   722k|    const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
  118|   722k|    const int have_tt = f->c->n_tc > 1;
  119|   722k|    const int sb128 = f->seq_hdr->sb128;
  120|   722k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  121|   722k|    const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
  122|   722k|    const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
  123|       |
  124|  6.16M|    for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
  ------------------
  |  Branch (124:38): [True: 5.43M, False: 729k]
  ------------------
  125|  5.43M|        const int tf = tc->top_pre_cdef_toggle;
  126|  5.43M|        const int by_idx = (by & 30) >> 1;
  127|  5.43M|        if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
  ------------------
  |  Branch (127:13): [True: 111k, False: 5.32M]
  ------------------
  128|       |
  129|  5.45M|        if ((!have_tt || sbrow_start || by + 2 < by_end) &&
  ------------------
  |  Branch (129:14): [True: 18.4E, False: 5.45M]
  |  Branch (129:26): [True: 304k, False: 5.14M]
  |  Branch (129:41): [True: 4.72M, False: 417k]
  ------------------
  130|  5.03M|            edges & CDEF_HAVE_BOTTOM)
  ------------------
  |  Branch (130:13): [True: 5.03M, False: 1.30k]
  ------------------
  131|  5.03M|        {
  132|       |            // backup pre-filter data for next iteration
  133|  5.03M|            pixel *const cdef_top_bak[3] = {
  134|  5.03M|                f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
  135|  5.03M|                f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
  136|  5.03M|                f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
  137|  5.03M|            };
  138|  5.03M|            backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
  139|  5.03M|        }
  140|       |
  141|  5.43M|        ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
  ------------------
  |  |  100|  5.43M|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.43M|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  142|  5.43M|        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
  143|  5.43M|        edges &= ~CDEF_HAVE_LEFT;
  144|  5.43M|        edges |= CDEF_HAVE_RIGHT;
  145|  5.43M|        enum Backup2x8Flags prev_flag = 0;
  146|  16.9M|        for (int sbx = 0; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (146:27): [True: 11.5M, False: 5.44M]
  ------------------
  147|  11.5M|            const int sb128x = sbx >> 1;
  148|  11.5M|            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
  149|  11.5M|            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
  150|  11.5M|            if (cdef_idx == -1 ||
  ------------------
  |  Branch (150:17): [True: 9.97M, False: 1.57M]
  ------------------
  151|  1.57M|                (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
  ------------------
  |  Branch (151:18): [True: 1.04M, False: 529k]
  ------------------
  152|  1.04M|                 !f->frame_hdr->cdef.uv_strength[cdef_idx]))
  ------------------
  |  Branch (152:18): [True: 1.01M, False: 28.1k]
  ------------------
  153|  11.0M|            {
  154|  11.0M|                prev_flag = 0;
  155|  11.0M|                goto next_sb;
  156|  11.0M|            }
  157|       |
  158|       |            // Create a complete 32-bit mask for the sb row ahead of time.
  159|   547k|            const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
  160|   547k|            const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
  161|   547k|                                                    noskip_row[0][0];
  162|       |
  163|   547k|            const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
  164|   547k|            const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
  165|   547k|            const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
  166|       |
  167|   547k|            const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
  168|   547k|            int y_sec_lvl = y_lvl & 3;
  169|   547k|            y_sec_lvl += y_sec_lvl == 3;
  170|   547k|            y_sec_lvl <<= bitdepth_min_8;
  171|       |
  172|   547k|            const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
  173|   547k|            int uv_sec_lvl = uv_lvl & 3;
  174|   547k|            uv_sec_lvl += uv_sec_lvl == 3;
  175|   547k|            uv_sec_lvl <<= bitdepth_min_8;
  176|       |
  177|   547k|            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
  178|  3.89M|            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
  ------------------
  |  Branch (178:39): [True: 3.35M, False: 544k]
  ------------------
  179|  3.35M|                 bx += 2, edges |= CDEF_HAVE_LEFT)
  180|  3.35M|            {
  181|  3.35M|                if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
  ------------------
  |  Branch (181:21): [True: 178k, False: 3.17M]
  ------------------
  182|       |
  183|       |                // check if this 8x8 block had any coded coefficients; if not,
  184|       |                // go to the next block
  185|  3.35M|                const uint32_t bx_mask = 3U << (bx & 30);
  186|  3.35M|                if (!(noskip_mask & bx_mask)) {
  ------------------
  |  Branch (186:21): [True: 382k, False: 2.97M]
  ------------------
  187|   382k|                    prev_flag = 0;
  188|   382k|                    goto next_b;
  189|   382k|                }
  190|  2.97M|                const enum Backup2x8Flags do_left = (prev_flag ^ flag) & flag;
  191|  2.97M|                prev_flag = flag;
  192|  2.97M|                if (do_left && edges & CDEF_HAVE_LEFT) {
  ------------------
  |  Branch (192:21): [True: 229k, False: 2.74M]
  |  Branch (192:32): [True: 53.8k, False: 175k]
  ------------------
  193|       |                    // we didn't backup the prefilter data because it wasn't
  194|       |                    // there, so do it here instead
  195|  53.8k|                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
  196|  53.8k|                }
  197|  2.97M|                if (edges & CDEF_HAVE_RIGHT) {
  ------------------
  |  Branch (197:21): [True: 2.80M, False: 167k]
  ------------------
  198|       |                    // backup pre-filter data for next iteration
  199|  2.80M|                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
  200|  2.80M|                }
  201|       |
  202|  2.97M|                int dir;
  203|  2.97M|                unsigned variance;
  204|  2.97M|                if (y_pri_lvl || uv_pri_lvl)
  ------------------
  |  Branch (204:21): [True: 2.70M, False: 266k]
  |  Branch (204:34): [True: 173k, False: 92.5k]
  ------------------
  205|  2.86M|                    dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
  206|  2.86M|                                        &variance HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  2.86M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  207|       |
  208|  2.97M|                const pixel *top, *bot;
  209|  2.97M|                ptrdiff_t offset;
  210|       |
  211|  2.97M|                if (!have_tt) goto st_y;
  ------------------
  |  Branch (211:21): [True: 0, False: 2.97M]
  ------------------
  212|  2.97M|                if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (212:21): [True: 181k, False: 2.78M]
  |  Branch (212:36): [True: 181k, False: 18.4E]
  ------------------
  213|   181k|                    if (resize) {
  ------------------
  |  Branch (213:25): [True: 30.5k, False: 150k]
  ------------------
  214|  30.5k|                        offset = (sby - 1) * 4 * y_stride + bx * 4;
  215|  30.5k|                        top = &f->lf.cdef_lpf_line[0][offset];
  216|   150k|                    } else {
  217|   150k|                        offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
  218|   150k|                        top = &f->lf.lr_lpf_line[0][offset];
  219|   150k|                    }
  220|   181k|                    bot = bptrs[0] + 8 * y_stride;
  221|  2.78M|                } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (221:28): [True: 2.78M, False: 587]
  |  Branch (221:44): [True: 237k, False: 2.55M]
  ------------------
  222|   237k|                    top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
  223|   237k|                    if (resize) {
  ------------------
  |  Branch (223:25): [True: 50.2k, False: 187k]
  ------------------
  224|  50.2k|                        offset = (sby * 4 + 2) * y_stride + bx * 4;
  225|  50.2k|                        bot = &f->lf.cdef_lpf_line[0][offset];
  226|   187k|                    } else {
  227|   187k|                        const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  228|   187k|                        offset = line * y_stride + bx * 4;
  229|   187k|                        bot = &f->lf.lr_lpf_line[0][offset];
  230|   187k|                    }
  231|  2.55M|                } else {
  232|  2.55M|            st_y:;
  233|  2.55M|                    offset = sby * 4 * y_stride;
  234|  2.55M|                    top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
  235|  2.55M|                    bot = bptrs[0] + 8 * y_stride;
  236|  2.55M|                }
  237|  2.97M|                if (y_pri_lvl) {
  ------------------
  |  Branch (237:21): [True: 2.69M, False: 274k]
  ------------------
  238|  2.69M|                    const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
  239|  2.69M|                    if (adj_y_pri_lvl || y_sec_lvl)
  ------------------
  |  Branch (239:25): [True: 1.46M, False: 1.23M]
  |  Branch (239:42): [True: 406k, False: 831k]
  ------------------
  240|  1.86M|                        dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  241|  1.86M|                                        top, bot, adj_y_pri_lvl, y_sec_lvl,
  242|  1.86M|                                        dir, damping, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  1.86M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  243|  2.69M|                } else if (y_sec_lvl)
  ------------------
  |  Branch (243:28): [True: 119k, False: 155k]
  ------------------
  244|   119k|                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
  245|   119k|                                    top, bot, 0, y_sec_lvl, 0, damping,
  246|   119k|                                    edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   119k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  247|       |
  248|  2.97M|                if (!uv_lvl) goto skip_uv;
  ------------------
  |  Branch (248:21): [True: 1.08M, False: 1.89M]
  ------------------
  249|  2.97M|                assert(layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (249:17): [True: 1.88M, False: 6.75k]
  ------------------
  250|       |
  251|  1.88M|                const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
  ------------------
  |  Branch (251:35): [True: 1.83M, False: 47.5k]
  ------------------
  252|  5.66M|                for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (252:34): [True: 3.76M, False: 1.89M]
  ------------------
  253|  3.76M|                    if (!have_tt) goto st_uv;
  ------------------
  |  Branch (253:25): [True: 0, False: 3.76M]
  ------------------
  254|  3.76M|                    if (sbrow_start && by == by_start) {
  ------------------
  |  Branch (254:25): [True: 215k, False: 3.55M]
  |  Branch (254:40): [True: 215k, False: 18.4E]
  ------------------
  255|   215k|                        if (resize) {
  ------------------
  |  Branch (255:29): [True: 32.2k, False: 182k]
  ------------------
  256|  32.2k|                            offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
  257|  32.2k|                            top = &f->lf.cdef_lpf_line[pl][offset];
  258|   182k|                        } else {
  259|   182k|                            const int line = sby * (4 << sb128) - 4;
  260|   182k|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  261|   182k|                            top = &f->lf.lr_lpf_line[pl][offset];
  262|   182k|                        }
  263|   215k|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  264|  3.55M|                    } else if (!sbrow_start && by + 2 >= by_end) {
  ------------------
  |  Branch (264:32): [True: 3.55M, False: 18.4E]
  |  Branch (264:48): [True: 256k, False: 3.30M]
  ------------------
  265|   256k|                        const ptrdiff_t top_offset = sby * 8 * uv_stride +
  266|   256k|                                                     (bx * 4 >> ss_hor);
  267|   256k|                        top = &f->lf.cdef_line[tf][pl][top_offset];
  268|   256k|                        if (resize) {
  ------------------
  |  Branch (268:29): [True: 40.6k, False: 216k]
  ------------------
  269|  40.6k|                            offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
  270|  40.6k|                            bot = &f->lf.cdef_lpf_line[pl][offset];
  271|   216k|                        } else {
  272|   216k|                            const int line = sby * (4 << sb128) + 4 * sb128 + 2;
  273|   216k|                            offset = line * uv_stride + (bx * 4 >> ss_hor);
  274|   216k|                            bot = &f->lf.lr_lpf_line[pl][offset];
  275|   216k|                        }
  276|  3.29M|                    } else {
  277|  3.30M|                st_uv:;
  278|  3.30M|                        const ptrdiff_t offset = sby * 8 * uv_stride;
  279|  3.30M|                        top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
  280|  3.30M|                        bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
  281|  3.30M|                    }
  282|  3.77M|                    dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
  283|  3.77M|                                         lr_bak[bit][pl], top, bot,
  284|  3.77M|                                         uv_pri_lvl, uv_sec_lvl, uvdir,
  285|  3.77M|                                         damping - 1, edges HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.77M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
  286|  3.77M|                }
  287|       |
  288|  2.97M|            skip_uv:
  289|  2.97M|                bit ^= 1;
  290|       |
  291|  3.35M|            next_b:
  292|  3.35M|                bptrs[0] += 8;
  293|  3.35M|                bptrs[1] += 8 >> ss_hor;
  294|  3.35M|                bptrs[2] += 8 >> ss_hor;
  295|  3.35M|            }
  296|       |
  297|  11.5M|        next_sb:
  298|  11.5M|            iptrs[0] += sbsz * 4;
  299|  11.5M|            iptrs[1] += sbsz * 4 >> ss_hor;
  300|  11.5M|            iptrs[2] += sbsz * 4 >> ss_hor;
  301|  11.5M|        }
  302|       |
  303|  5.44M|        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
  304|  5.44M|        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  305|  5.44M|        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
  306|  5.44M|        tc->top_pre_cdef_toggle ^= 1;
  307|  5.44M|    }
  308|   722k|}

dav1d_cdef_dsp_init_8bpc:
  320|  3.49k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  3.49k|    c->dir = cdef_find_dir_c;
  322|  3.49k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  3.49k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  3.49k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  3.49k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  3.49k|#endif
  339|  3.49k|}
dav1d_cdef_dsp_init_16bpc:
  320|  5.72k|COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
  321|  5.72k|    c->dir = cdef_find_dir_c;
  322|  5.72k|    c->fb[0] = cdef_filter_block_8x8_c;
  323|  5.72k|    c->fb[1] = cdef_filter_block_4x8_c;
  324|  5.72k|    c->fb[2] = cdef_filter_block_4x4_c;
  325|       |
  326|  5.72k|#if HAVE_ASM
  327|       |#if ARCH_AARCH64 || ARCH_ARM
  328|       |    cdef_dsp_init_arm(c);
  329|       |#elif ARCH_PPC64LE
  330|       |    cdef_dsp_init_ppc(c);
  331|       |#elif ARCH_RISCV
  332|       |    cdef_dsp_init_riscv(c);
  333|       |#elif ARCH_X86
  334|       |    cdef_dsp_init_x86(c);
  335|       |#elif ARCH_LOONGARCH64
  336|       |    cdef_dsp_init_loongarch(c);
  337|       |#endif
  338|  5.72k|#endif
  339|  5.72k|}

dav1d_cdf_thread_update:
 3918|  8.53k|{
 3919|  8.53k|#define update_cdf_1d(n1d, name) \
 3920|  8.53k|    do { \
 3921|  8.53k|        dst->name[n1d] = 0; \
 3922|  8.53k|    } while (0)
 3923|  8.53k|#define update_cdf_2d(n1d, n2d, name) \
 3924|  8.53k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
 3925|  8.53k|#define update_cdf_3d(n1d, n2d, n3d, name) \
 3926|  8.53k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
 3927|  8.53k|#define update_cdf_4d(n1d, n2d, n3d, n4d, name) \
 3928|  8.53k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
 3929|       |
 3930|  8.53k|    memcpy(dst, src, offsetof(CdfContext, m.intrabc));
 3931|       |
 3932|  8.53k|    update_cdf_3d(2, 2, 4, coef.eob_bin_16);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.1k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3933|  8.53k|    update_cdf_3d(2, 2, 5, coef.eob_bin_32);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.1k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3934|  8.53k|    update_cdf_3d(2, 2, 6, coef.eob_bin_64);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.1k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3935|  8.53k|    update_cdf_3d(2, 2, 7, coef.eob_bin_128);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.1k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3936|  8.53k|    update_cdf_3d(2, 2, 8, coef.eob_bin_256);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 34.1k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3937|  8.53k|    update_cdf_2d(2, 9, coef.eob_bin_512);
  ------------------
  |  | 3924|  25.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.0k|    do { \
  |  |  |  | 3921|  17.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3938|  8.53k|    update_cdf_2d(2, 10, coef.eob_bin_1024);
  ------------------
  |  | 3924|  25.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.0k|    do { \
  |  |  |  | 3921|  17.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3939|  8.53k|    update_cdf_4d(N_TX_SIZES, 2, 4, 2, coef.eob_base_tok);
  ------------------
  |  | 3928|  51.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   128k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   426k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   341k|    do { \
  |  |  |  |  |  |  |  | 3921|   341k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   341k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 341k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 341k, False: 85.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 85.3k, False: 42.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 42.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3940|  8.53k|    update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 3, coef.base_tok);
  ------------------
  |  | 3928|  51.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   128k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  3.58M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  3.49M|    do { \
  |  |  |  |  |  |  |  | 3921|  3.49M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  3.49M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 3.49M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 3.49M, False: 85.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 85.3k, False: 42.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 42.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3941|  8.53k|    update_cdf_4d(4, 2, 21, 3, coef.br_tok);
  ------------------
  |  | 3928|  42.6k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   102k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|  1.50M|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|  1.43M|    do { \
  |  |  |  |  |  |  |  | 3921|  1.43M|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|  1.43M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 1.43M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 1.43M, False: 68.2k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 68.2k, False: 34.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 34.1k, False: 8.53k]
  |  |  ------------------
  ------------------
 3942|  8.53k|    update_cdf_4d(N_TX_SIZES, 2, 9, 1, coef.eob_hi_bit);
  ------------------
  |  | 3928|  51.2k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   128k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   853k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   768k|    do { \
  |  |  |  |  |  |  |  | 3921|   768k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   768k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 768k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 768k, False: 85.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 85.3k, False: 42.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 42.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3943|  8.53k|    update_cdf_3d(N_TX_SIZES, 13, 1, coef.skip);
  ------------------
  |  | 3926|  51.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   597k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   554k|    do { \
  |  |  |  |  |  | 3921|   554k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   554k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 554k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 554k, False: 42.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 42.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3944|  8.53k|    update_cdf_3d(2, 3, 1, coef.dc_sign);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  68.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  51.2k|    do { \
  |  |  |  |  |  | 3921|  51.2k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  51.2k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 51.2k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 51.2k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3945|       |
 3946|  8.53k|    update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - 1 - !k, m.uv_mode);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   238k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   221k|    do { \
  |  |  |  |  |  | 3921|   221k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   221k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 221k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 221k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3947|  8.53k|    update_cdf_2d(4, N_PARTITIONS - 3, m.partition[BL_128X128]);
  ------------------
  |  | 3924|  42.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.1k, False: 8.53k]
  |  |  ------------------
  ------------------
 3948|  34.1k|    for (int k = BL_64X64; k < BL_8X8; k++)
  ------------------
  |  Branch (3948:28): [True: 25.6k, False: 8.53k]
  ------------------
 3949|  25.6k|        update_cdf_2d(4, N_PARTITIONS - 1, m.partition[k]);
  ------------------
  |  | 3924|   128k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   102k|    do { \
  |  |  |  | 3921|   102k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   102k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 102k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 102k, False: 25.6k]
  |  |  ------------------
  ------------------
 3950|  8.53k|    update_cdf_2d(4, N_SUB8X8_PARTITIONS - 1, m.partition[BL_8X8]);
  ------------------
  |  | 3924|  42.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.1k, False: 8.53k]
  |  |  ------------------
  ------------------
 3951|  8.53k|    update_cdf_2d(6, 15, m.cfl_alpha);
  ------------------
  |  | 3924|  59.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  51.2k|    do { \
  |  |  |  | 3921|  51.2k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  51.2k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 51.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 51.2k, False: 8.53k]
  |  |  ------------------
  ------------------
 3952|  8.53k|    update_cdf_2d(2, 15, m.txtp_inter1);
  ------------------
  |  | 3924|  25.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.0k|    do { \
  |  |  |  | 3921|  17.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3953|  8.53k|    update_cdf_1d(11, m.txtp_inter2);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3954|  8.53k|    update_cdf_3d(2, N_INTRA_PRED_MODES, 6, m.txtp_intra1);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   238k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   221k|    do { \
  |  |  |  |  |  | 3921|   221k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   221k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 221k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 221k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3955|  8.53k|    update_cdf_3d(3, N_INTRA_PRED_MODES, 4, m.txtp_intra2);
  ------------------
  |  | 3926|  34.1k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   358k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   332k|    do { \
  |  |  |  |  |  | 3921|   332k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   332k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 332k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 332k, False: 25.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 25.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3956|  8.53k|    update_cdf_1d(7, m.cfl_sign);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3957|  8.53k|    update_cdf_2d(8, 6, m.angle_delta);
  ------------------
  |  | 3924|  76.8k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  68.2k|    do { \
  |  |  |  | 3921|  68.2k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  68.2k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 68.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 68.2k, False: 8.53k]
  |  |  ------------------
  ------------------
 3958|  8.53k|    update_cdf_1d(4, m.filter_intra);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3959|  8.53k|    update_cdf_2d(3, DAV1D_MAX_SEGMENTS - 1, m.seg_id);
  ------------------
  |  | 3924|  34.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  25.6k|    do { \
  |  |  |  | 3921|  25.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  25.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 25.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 25.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3960|  8.53k|    update_cdf_3d(2, 7, 6, m.pal_sz);
  ------------------
  |  | 3926|  25.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   136k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   119k|    do { \
  |  |  |  |  |  | 3921|   119k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   119k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 119k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 119k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3961|  8.53k|    update_cdf_4d(2, 7, 5, k + 1, m.color_map);
  ------------------
  |  | 3928|  25.6k|    for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
  |  |  ------------------
  |  |  |  | 3926|   136k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3924|   716k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  | 3920|   597k|    do { \
  |  |  |  |  |  |  |  | 3921|   597k|        dst->name[n1d] = 0; \
  |  |  |  |  |  |  |  | 3922|   597k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 597k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3924:21): [True: 597k, False: 119k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3926:21): [True: 119k, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3928:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3962|  8.53k|    update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 1, 2), m.txsz);
  ------------------
  |  | 3926|  42.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   136k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   102k|    do { \
  |  |  |  |  |  | 3921|   102k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   102k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 102k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 102k, False: 34.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 34.1k, False: 8.53k]
  |  |  ------------------
  ------------------
 3963|  8.53k|    update_cdf_1d(3, m.delta_q);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3964|  8.53k|    update_cdf_2d(5, 3, m.delta_lf);
  ------------------
  |  | 3924|  51.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  42.6k|    do { \
  |  |  |  | 3921|  42.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  42.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 42.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 42.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3965|  8.53k|    update_cdf_1d(2, m.restore_switchable);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3966|  8.53k|    update_cdf_1d(1, m.restore_wiener);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3967|  8.53k|    update_cdf_1d(1, m.restore_sgrproj);
  ------------------
  |  | 3920|  8.53k|    do { \
  |  | 3921|  8.53k|        dst->name[n1d] = 0; \
  |  | 3922|  8.53k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 8.53k]
  |  |  ------------------
  ------------------
 3968|  8.53k|    update_cdf_2d(4, 1, m.txtp_inter3);
  ------------------
  |  | 3924|  42.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.1k|    do { \
  |  |  |  | 3921|  34.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.1k, False: 8.53k]
  |  |  ------------------
  ------------------
 3969|  8.53k|    update_cdf_2d(N_BS_SIZES, 1, m.use_filter_intra);
  ------------------
  |  | 3924|   196k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   187k|    do { \
  |  |  |  | 3921|   187k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   187k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 187k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 187k, False: 8.53k]
  |  |  ------------------
  ------------------
 3970|  8.53k|    update_cdf_3d(7, 3, 1, m.txpart);
  ------------------
  |  | 3926|  68.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   238k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   179k|    do { \
  |  |  |  |  |  | 3921|   179k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   179k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 179k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 179k, False: 59.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 59.7k, False: 8.53k]
  |  |  ------------------
  ------------------
 3971|  8.53k|    update_cdf_2d(3, 1, m.skip);
  ------------------
  |  | 3924|  34.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  25.6k|    do { \
  |  |  |  | 3921|  25.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  25.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 25.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 25.6k, False: 8.53k]
  |  |  ------------------
  ------------------
 3972|  8.53k|    update_cdf_3d(7, 3, 1, m.pal_y);
  ------------------
  |  | 3926|  68.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   238k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|   179k|    do { \
  |  |  |  |  |  | 3921|   179k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|   179k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 179k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 179k, False: 59.7k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 59.7k, False: 8.53k]
  |  |  ------------------
  ------------------
 3973|  8.53k|    update_cdf_2d(2, 1, m.pal_uv);
  ------------------
  |  | 3924|  25.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  17.0k|    do { \
  |  |  |  | 3921|  17.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  17.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 17.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 17.0k, False: 8.53k]
  |  |  ------------------
  ------------------
 3974|       |
 3975|  8.53k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|  8.53k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  8.53k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 3.63k, False: 4.89k]
  |  |  ------------------
  ------------------
 3976|  3.63k|        return;
 3977|       |
 3978|  4.89k|    memcpy(dst->m.y_mode, src->m.y_mode,
 3979|  4.89k|           offsetof(CdfContext, kfym) - offsetof(CdfContext, m.y_mode));
 3980|       |
 3981|  4.89k|    update_cdf_2d(4, N_INTRA_PRED_MODES - 1, m.y_mode);
  ------------------
  |  | 3924|  24.4k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  24.4k|    do { \
  |  |  |  | 3921|  19.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  19.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 19.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 19.5k, False: 4.89k]
  |  |  ------------------
  ------------------
 3982|  4.89k|    update_cdf_2d(9, 15, m.wedge_idx);
  ------------------
  |  | 3924|  48.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  48.9k|    do { \
  |  |  |  | 3921|  44.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  44.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 44.0k, False: 4.89k]
  |  |  ------------------
  ------------------
 3983|  4.89k|    update_cdf_2d(8, N_COMP_INTER_PRED_MODES - 1, m.comp_inter_mode);
  ------------------
  |  | 3924|  44.0k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  44.0k|    do { \
  |  |  |  | 3921|  39.1k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  39.1k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 39.1k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 39.1k, False: 4.89k]
  |  |  ------------------
  ------------------
 3984|  4.89k|    update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS - 1, m.filter);
  ------------------
  |  | 3926|  14.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  88.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  83.2k|    do { \
  |  |  |  |  |  | 3921|  78.3k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  78.3k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 78.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 78.3k, False: 9.79k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 9.79k, False: 4.89k]
  |  |  ------------------
  ------------------
 3985|  4.89k|    update_cdf_2d(4, 3, m.interintra_mode);
  ------------------
  |  | 3924|  24.4k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  24.4k|    do { \
  |  |  |  | 3921|  19.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  19.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 19.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 19.5k, False: 4.89k]
  |  |  ------------------
  ------------------
 3986|  4.89k|    update_cdf_2d(N_BS_SIZES, 2, m.motion_mode);
  ------------------
  |  | 3924|   112k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   112k|    do { \
  |  |  |  | 3921|   107k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   107k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 107k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 107k, False: 4.89k]
  |  |  ------------------
  ------------------
 3987|  4.89k|    update_cdf_2d(3, 1, m.skip_mode);
  ------------------
  |  | 3924|  19.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  19.5k|    do { \
  |  |  |  | 3921|  14.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  14.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 14.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 14.6k, False: 4.89k]
  |  |  ------------------
  ------------------
 3988|  4.89k|    update_cdf_2d(6, 1, m.newmv_mode);
  ------------------
  |  | 3924|  34.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.2k|    do { \
  |  |  |  | 3921|  29.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.3k, False: 4.89k]
  |  |  ------------------
  ------------------
 3989|  4.89k|    update_cdf_2d(2, 1, m.globalmv_mode);
  ------------------
  |  | 3924|  14.6k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  14.6k|    do { \
  |  |  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  9.79k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 9.79k, False: 4.89k]
  |  |  ------------------
  ------------------
 3990|  4.89k|    update_cdf_2d(6, 1, m.refmv_mode);
  ------------------
  |  | 3924|  34.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.2k|    do { \
  |  |  |  | 3921|  29.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.3k, False: 4.89k]
  |  |  ------------------
  ------------------
 3991|  4.89k|    update_cdf_2d(3, 1, m.drl_bit);
  ------------------
  |  | 3924|  19.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  19.5k|    do { \
  |  |  |  | 3921|  14.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  14.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 14.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 14.6k, False: 4.89k]
  |  |  ------------------
  ------------------
 3992|  4.89k|    update_cdf_2d(4, 1, m.intra);
  ------------------
  |  | 3924|  24.4k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  24.4k|    do { \
  |  |  |  | 3921|  19.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  19.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 19.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 19.5k, False: 4.89k]
  |  |  ------------------
  ------------------
 3993|  4.89k|    update_cdf_2d(5, 1, m.comp);
  ------------------
  |  | 3924|  29.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.3k|    do { \
  |  |  |  | 3921|  24.4k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  24.4k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 24.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 24.4k, False: 4.89k]
  |  |  ------------------
  ------------------
 3994|  4.89k|    update_cdf_2d(5, 1, m.comp_dir);
  ------------------
  |  | 3924|  29.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  29.3k|    do { \
  |  |  |  | 3921|  24.4k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  24.4k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 24.4k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 24.4k, False: 4.89k]
  |  |  ------------------
  ------------------
 3995|  4.89k|    update_cdf_2d(6, 1, m.jnt_comp);
  ------------------
  |  | 3924|  34.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.2k|    do { \
  |  |  |  | 3921|  29.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.3k, False: 4.89k]
  |  |  ------------------
  ------------------
 3996|  4.89k|    update_cdf_2d(6, 1, m.mask_comp);
  ------------------
  |  | 3924|  34.2k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  34.2k|    do { \
  |  |  |  | 3921|  29.3k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  29.3k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 29.3k, False: 4.89k]
  |  |  ------------------
  ------------------
 3997|  4.89k|    update_cdf_2d(9, 1, m.wedge_comp);
  ------------------
  |  | 3924|  48.9k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  48.9k|    do { \
  |  |  |  | 3921|  44.0k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  44.0k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.0k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 44.0k, False: 4.89k]
  |  |  ------------------
  ------------------
 3998|  4.89k|    update_cdf_3d(6, 3, 1, m.ref);
  ------------------
  |  | 3926|  34.2k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|   117k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  93.0k|    do { \
  |  |  |  |  |  | 3921|  88.1k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  88.1k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 88.1k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 88.1k, False: 29.3k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 29.3k, False: 4.89k]
  |  |  ------------------
  ------------------
 3999|  4.89k|    update_cdf_3d(3, 3, 1, m.comp_fwd_ref);
  ------------------
  |  | 3926|  19.5k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  58.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  48.9k|    do { \
  |  |  |  |  |  | 3921|  44.0k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.0k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.0k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.0k, False: 14.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 14.6k, False: 4.89k]
  |  |  ------------------
  ------------------
 4000|  4.89k|    update_cdf_3d(2, 3, 1, m.comp_bwd_ref);
  ------------------
  |  | 3926|  14.6k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  39.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  34.2k|    do { \
  |  |  |  |  |  | 3921|  29.3k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  29.3k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 29.3k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 29.3k, False: 9.79k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 9.79k, False: 4.89k]
  |  |  ------------------
  ------------------
 4001|  4.89k|    update_cdf_3d(3, 3, 1, m.comp_uni_ref);
  ------------------
  |  | 3926|  19.5k|    for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
  |  |  ------------------
  |  |  |  | 3924|  58.7k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  |  |  ------------------
  |  |  |  |  |  | 3920|  48.9k|    do { \
  |  |  |  |  |  | 3921|  44.0k|        dst->name[n1d] = 0; \
  |  |  |  |  |  | 3922|  44.0k|    } while (0)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (3922:14): [Folded, False: 44.0k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3924:21): [True: 44.0k, False: 14.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3926:21): [True: 14.6k, False: 4.89k]
  |  |  ------------------
  ------------------
 4002|  4.89k|    update_cdf_2d(3, 1, m.seg_pred);
  ------------------
  |  | 3924|  19.5k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  19.5k|    do { \
  |  |  |  | 3921|  14.6k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  14.6k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 14.6k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 14.6k, False: 4.89k]
  |  |  ------------------
  ------------------
 4003|  4.89k|    update_cdf_2d(4, 1, m.interintra);
  ------------------
  |  | 3924|  24.4k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  24.4k|    do { \
  |  |  |  | 3921|  19.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  19.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 19.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 19.5k, False: 4.89k]
  |  |  ------------------
  ------------------
 4004|  4.89k|    update_cdf_2d(7, 1, m.interintra_wedge);
  ------------------
  |  | 3924|  39.1k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  39.1k|    do { \
  |  |  |  | 3921|  34.2k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  34.2k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 34.2k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 34.2k, False: 4.89k]
  |  |  ------------------
  ------------------
 4005|  4.89k|    update_cdf_2d(N_BS_SIZES, 1, m.obmc);
  ------------------
  |  | 3924|   112k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|   112k|    do { \
  |  |  |  | 3921|   107k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|   107k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 107k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 107k, False: 4.89k]
  |  |  ------------------
  ------------------
 4006|       |
 4007|  14.6k|    for (int k = 0; k < 2; k++) {
  ------------------
  |  Branch (4007:21): [True: 9.79k, False: 4.89k]
  ------------------
 4008|  9.79k|        update_cdf_1d(10, mv.comp[k].classes);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4009|  9.79k|        update_cdf_1d(1, mv.comp[k].sign);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4010|  9.79k|        update_cdf_1d(1, mv.comp[k].class0);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4011|  9.79k|        update_cdf_2d(2, 3, mv.comp[k].class0_fp);
  ------------------
  |  | 3924|  29.3k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  19.5k|    do { \
  |  |  |  | 3921|  19.5k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  19.5k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 19.5k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 19.5k, False: 9.79k]
  |  |  ------------------
  ------------------
 4012|  9.79k|        update_cdf_1d(1, mv.comp[k].class0_hp);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4013|  9.79k|        update_cdf_2d(10, 1, mv.comp[k].classN);
  ------------------
  |  | 3924|   107k|    for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
  |  |  ------------------
  |  |  |  | 3920|  97.9k|    do { \
  |  |  |  | 3921|  97.9k|        dst->name[n1d] = 0; \
  |  |  |  | 3922|  97.9k|    } while (0)
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (3922:14): [Folded, False: 97.9k]
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (3924:21): [True: 97.9k, False: 9.79k]
  |  |  ------------------
  ------------------
 4014|  9.79k|        update_cdf_1d(3, mv.comp[k].classN_fp);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4015|  9.79k|        update_cdf_1d(1, mv.comp[k].classN_hp);
  ------------------
  |  | 3920|  9.79k|    do { \
  |  | 3921|  9.79k|        dst->name[n1d] = 0; \
  |  | 3922|  9.79k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 9.79k]
  |  |  ------------------
  ------------------
 4016|  9.79k|    }
 4017|  4.89k|    update_cdf_1d(N_MV_JOINTS - 1, mv.joint);
  ------------------
  |  | 3920|  4.89k|    do { \
  |  | 3921|  4.89k|        dst->name[n1d] = 0; \
  |  | 3922|  4.89k|    } while (0)
  |  |  ------------------
  |  |  |  Branch (3922:14): [Folded, False: 4.89k]
  |  |  ------------------
  ------------------
 4018|  4.89k|}
dav1d_cdf_thread_init_static:
 4023|   219k|void dav1d_cdf_thread_init_static(CdfThreadContext *const cdf, const unsigned qidx) {
 4024|       |    cdf->ref = NULL;
 4025|   219k|    cdf->data.qcat = (qidx > 20) + (qidx > 60) + (qidx > 120);
 4026|   219k|}
dav1d_cdf_thread_copy:
 4028|   261k|void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const src) {
 4029|   261k|    if (src->ref) {
  ------------------
  |  Branch (4029:9): [True: 24.9k, False: 236k]
  ------------------
 4030|  24.9k|        memcpy(dst, src->data.cdf, sizeof(*dst));
 4031|   236k|    } else {
 4032|   236k|        dst->coef = default_coef_cdf[src->data.qcat];
 4033|   236k|        memcpy(&dst->m, &default_cdf.m,
 4034|   236k|               offsetof(CdfDefaultContext, mv.joint));
 4035|   236k|        memcpy(&dst->mv.comp[1], &default_cdf.mv.comp,
 4036|       |               sizeof(default_cdf) - offsetof(CdfDefaultContext, mv.comp));
 4037|   236k|    }
 4038|   261k|}
dav1d_cdf_thread_alloc:
 4042|  40.8k|{
 4043|  40.8k|    cdf->ref = dav1d_ref_create_using_pool(c->cdf_pool,
 4044|  40.8k|                                           sizeof(CdfContext) + sizeof(atomic_uint));
 4045|  40.8k|    if (!cdf->ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (4045:9): [True: 0, False: 40.8k]
  ------------------
 4046|  40.8k|    cdf->data.cdf = cdf->ref->data;
 4047|  40.8k|    if (have_frame_mt) {
  ------------------
  |  Branch (4047:9): [True: 40.8k, False: 0]
  ------------------
 4048|  40.8k|        cdf->progress = (atomic_uint *) &cdf->data.cdf[1];
 4049|       |        atomic_init(cdf->progress, 0);
 4050|  40.8k|    }
 4051|  40.8k|    return 0;
 4052|  40.8k|}
dav1d_cdf_thread_ref:
 4056|  2.01M|{
 4057|  2.01M|    *dst = *src;
 4058|  2.01M|    if (src->ref)
  ------------------
  |  Branch (4058:9): [True: 344k, False: 1.67M]
  ------------------
 4059|   344k|        dav1d_ref_inc(src->ref);
 4060|  2.01M|}
dav1d_cdf_thread_unref:
 4062|  2.53M|void dav1d_cdf_thread_unref(CdfThreadContext *const cdf) {
 4063|       |    memset(&cdf->data, 0, sizeof(*cdf) - offsetof(CdfThreadContext, data));
 4064|  2.53M|    dav1d_ref_dec(&cdf->ref);
 4065|  2.53M|}

dav1d_init_cpu:
   63|      1|COLD void dav1d_init_cpu(void) {
   64|      1|#if HAVE_ASM && !__has_feature(memory_sanitizer)
   65|       |// memory sanitizer is inherently incompatible with asm
   66|       |#if ARCH_AARCH64 || ARCH_ARM
   67|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_arm();
   68|       |#elif ARCH_LOONGARCH
   69|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_loongarch();
   70|       |#elif ARCH_PPC64LE
   71|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_ppc();
   72|       |#elif ARCH_RISCV
   73|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_riscv();
   74|       |#elif ARCH_X86
   75|       |    dav1d_cpu_flags = dav1d_get_cpu_flags_x86();
   76|      1|#endif
   77|      1|#endif
   78|      1|}

cpu.c:dav1d_get_default_cpu_flags:
   58|      1|static ALWAYS_INLINE unsigned dav1d_get_default_cpu_flags(void) {
   59|      1|    unsigned flags = 0;
   60|       |
   61|       |#if ARCH_AARCH64 || ARCH_ARM
   62|       |#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64
   63|       |    flags |= DAV1D_ARM_CPU_FLAG_NEON;
   64|       |#endif
   65|       |#ifdef __ARM_FEATURE_DOTPROD
   66|       |    flags |= DAV1D_ARM_CPU_FLAG_DOTPROD;
   67|       |#endif
   68|       |#ifdef __ARM_FEATURE_MATMUL_INT8
   69|       |    flags |= DAV1D_ARM_CPU_FLAG_I8MM;
   70|       |#endif
   71|       |#if ARCH_AARCH64
   72|       |#ifdef __ARM_FEATURE_SVE
   73|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE;
   74|       |#endif
   75|       |#ifdef __ARM_FEATURE_SVE2
   76|       |    flags |= DAV1D_ARM_CPU_FLAG_SVE2;
   77|       |#endif
   78|       |#endif /* ARCH_AARCH64 */
   79|       |#elif ARCH_PPC64LE
   80|       |#if defined(__VSX__)
   81|       |    flags |= DAV1D_PPC_CPU_FLAG_VSX;
   82|       |#endif
   83|       |#if defined(__POWER9_VECTOR__)
   84|       |    flags |= DAV1D_PPC_CPU_FLAG_PWR9;
   85|       |#endif
   86|       |#elif ARCH_RISCV
   87|       |#if defined(__riscv_v)
   88|       |    flags |= DAV1D_RISCV_CPU_FLAG_V;
   89|       |#endif
   90|       |#elif ARCH_X86
   91|       |#if defined(__AVX512F__) && defined(__AVX512CD__) && \
   92|       |    defined(__AVX512BW__) && defined(__AVX512DQ__) && \
   93|       |    defined(__AVX512VL__) && defined(__AVX512VNNI__) && \
   94|       |    defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \
   95|       |    defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
   96|       |    defined(__AVX512BITALG__) && defined(__GFNI__) && \
   97|       |    defined(__VAES__) && defined(__VPCLMULQDQ__)
   98|       |    flags |= DAV1D_X86_CPU_FLAG_AVX512ICL |
   99|       |             DAV1D_X86_CPU_FLAG_AVX2 |
  100|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  101|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  102|       |             DAV1D_X86_CPU_FLAG_SSE2;
  103|       |#elif defined(__AVX2__)
  104|       |    flags |= DAV1D_X86_CPU_FLAG_AVX2 |
  105|       |             DAV1D_X86_CPU_FLAG_SSE41 |
  106|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  107|       |             DAV1D_X86_CPU_FLAG_SSE2;
  108|       |#elif defined(__SSE4_1__) || defined(__AVX__)
  109|       |    flags |= DAV1D_X86_CPU_FLAG_SSE41 |
  110|       |             DAV1D_X86_CPU_FLAG_SSSE3 |
  111|       |             DAV1D_X86_CPU_FLAG_SSE2;
  112|       |#elif defined(__SSSE3__)
  113|       |    flags |= DAV1D_X86_CPU_FLAG_SSSE3 |
  114|       |             DAV1D_X86_CPU_FLAG_SSE2;
  115|       |#elif ARCH_X86_64 || defined(__SSE2__) || \
  116|       |      (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
  117|       |    flags |= DAV1D_X86_CPU_FLAG_SSE2;
  118|      1|#endif
  119|      1|#endif
  120|       |
  121|      1|    return flags;
  122|      1|}
pal.c:dav1d_get_cpu_flags:
  124|  10.2k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  10.2k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  10.2k|    return flags;
  134|  10.2k|}
refmvs.c:dav1d_get_cpu_flags:
  124|  10.2k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  10.2k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  10.2k|    return flags;
  134|  10.2k|}
msac.c:dav1d_get_cpu_flags:
  124|   245k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|   245k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|   245k|    return flags;
  134|   245k|}
cdef_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
filmgrain_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
ipred_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
itx_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
loopfilter_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
looprestoration_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}
mc_tmpl.c:dav1d_get_cpu_flags:
  124|  9.21k|static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
  125|  9.21k|    unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
  126|       |
  127|       |#if TRIM_DSP_FUNCTIONS
  128|       |/* Since this function is inlined, unconditionally setting a flag here will
  129|       | * enable dead code elimination in the calling function. */
  130|       |    flags |= dav1d_get_default_cpu_flags();
  131|       |#endif
  132|       |
  133|  9.21k|    return flags;
  134|  9.21k|}

ctx.c:memset_w1:
   34|  45.1M|static void memset_w1(void *const ptr, const int value) {
   35|  45.1M|    set_ctx1((uint8_t *) ptr, 0, value);
  ------------------
  |  |   56|  45.1M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  ------------------
   36|  45.1M|}
ctx.c:memset_w2:
   38|  23.5M|static void memset_w2(void *const ptr, const int value) {
   39|  23.5M|    set_ctx2((uint8_t *) ptr, 0, value);
  ------------------
  |  |   58|  23.5M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  ------------------
   40|  23.5M|}
ctx.c:memset_w4:
   42|  16.6M|static void memset_w4(void *const ptr, const int value) {
   43|  16.6M|    set_ctx4((uint8_t *) ptr, 0, value);
  ------------------
  |  |   60|  16.6M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  ------------------
   44|  16.6M|}
ctx.c:memset_w8:
   46|  10.1M|static void memset_w8(void *const ptr, const int value) {
   47|  10.1M|    set_ctx8((uint8_t *) ptr, 0, value);
  ------------------
  |  |   62|  10.1M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  ------------------
   48|  10.1M|}
ctx.c:memset_w16:
   50|  10.4M|static void memset_w16(void *const ptr, const int value) {
   51|  10.4M|    set_ctx16((uint8_t *) ptr, 0, value);
  ------------------
  |  |   63|  10.4M|#define set_ctx16(var, off, val) do { \
  |  |   64|  10.4M|        memset(&(var)[off], val, 16); \
  |  |   65|  10.4M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (65:14): [Folded, False: 10.4M]
  |  |  ------------------
  ------------------
   52|  10.4M|}
ctx.c:memset_w32:
   54|  2.01M|static void memset_w32(void *const ptr, const int value) {
   55|  2.01M|    set_ctx32((uint8_t *) ptr, 0, value);
  ------------------
  |  |   66|  2.01M|#define set_ctx32(var, off, val) do { \
  |  |   67|  2.01M|        memset(&(var)[off], val, 32); \
  |  |   68|  2.01M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (68:14): [Folded, False: 2.01M]
  |  |  ------------------
  ------------------
   56|  2.01M|}

lf_mask.c:dav1d_memset_likely_pow2:
   44|  8.51M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  8.51M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 8.50M, False: 1.53k]
  |  Branch (45:5): [True: 8.50M, False: 18.4E]
  ------------------
   46|  8.50M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 8.01M, False: 491k]
  ------------------
   47|  8.01M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  8.01M|    } else {
   49|   491k|        memset(ptr, value, n);
   50|   491k|    }
   51|  8.50M|}
recon_tmpl.c:dav1d_memset_likely_pow2:
   44|  50.3M|static inline void dav1d_memset_likely_pow2(void *const ptr, const int value, const int n) {
   45|  50.3M|    assert(n >= 1 && n <= 32);
  ------------------
  |  Branch (45:5): [True: 50.3M, False: 18.4E]
  |  Branch (45:5): [True: 50.4M, False: 18.4E]
  ------------------
   46|  50.4M|    if ((n&(n-1)) == 0) {
  ------------------
  |  Branch (46:9): [True: 49.2M, False: 1.15M]
  ------------------
   47|  49.2M|        dav1d_memset_pow2[ulog2(n)](ptr, value);
   48|  49.2M|    } else {
   49|  1.15M|        memset(ptr, value, n);
   50|  1.15M|    }
   51|  50.4M|}

dav1d_data_create_internal:
   43|   334k|uint8_t *dav1d_data_create_internal(Dav1dData *const buf, const size_t sz) {
   44|   334k|    validate_input_or_ret(buf != NULL, NULL);
  ------------------
  |  |   52|   334k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 334k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
   45|       |
   46|   334k|    if (sz > SIZE_MAX / 2) return NULL;
  ------------------
  |  Branch (46:9): [True: 0, False: 334k]
  ------------------
   47|   334k|    buf->ref = dav1d_ref_create(ALLOC_DAV1DDATA, sz);
  ------------------
  |  |   49|   334k|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
   48|   334k|    if (!buf->ref) return NULL;
  ------------------
  |  Branch (48:9): [True: 0, False: 334k]
  ------------------
   49|   334k|    buf->data = buf->ref->const_data;
   50|   334k|    buf->sz = sz;
   51|   334k|    dav1d_data_props_set_defaults(&buf->m);
   52|   334k|    buf->m.size = sz;
   53|       |
   54|   334k|    return buf->ref->data;
   55|   334k|}
dav1d_data_ref:
   98|   621k|void dav1d_data_ref(Dav1dData *const dst, const Dav1dData *const src) {
   99|   621k|    assert(dst != NULL);
  ------------------
  |  Branch (99:5): [True: 621k, False: 0]
  ------------------
  100|   621k|    assert(dst->data == NULL);
  ------------------
  |  Branch (100:5): [True: 621k, False: 0]
  ------------------
  101|   621k|    assert(src != NULL);
  ------------------
  |  Branch (101:5): [True: 621k, False: 0]
  ------------------
  102|       |
  103|   621k|    if (src->ref) {
  ------------------
  |  Branch (103:9): [True: 621k, False: 0]
  ------------------
  104|   621k|        assert(src->data != NULL);
  ------------------
  |  Branch (104:9): [True: 621k, False: 0]
  ------------------
  105|   621k|        dav1d_ref_inc(src->ref);
  106|   621k|    }
  107|   621k|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (107:9): [True: 0, False: 621k]
  ------------------
  108|   621k|    *dst = *src;
  109|   621k|}
dav1d_data_props_copy:
  113|   512k|{
  114|   512k|    assert(dst != NULL);
  ------------------
  |  Branch (114:5): [True: 512k, False: 0]
  ------------------
  115|   512k|    assert(src != NULL);
  ------------------
  |  Branch (115:5): [True: 512k, False: 0]
  ------------------
  116|       |
  117|   512k|    dav1d_ref_dec(&dst->user_data.ref);
  118|   512k|    *dst = *src;
  119|   512k|    if (dst->user_data.ref) dav1d_ref_inc(dst->user_data.ref);
  ------------------
  |  Branch (119:9): [True: 0, False: 512k]
  ------------------
  120|   512k|}
dav1d_data_props_set_defaults:
  122|  5.38M|void dav1d_data_props_set_defaults(Dav1dDataProps *const props) {
  123|  5.38M|    assert(props != NULL);
  ------------------
  |  Branch (123:5): [True: 5.38M, False: 47]
  ------------------
  124|       |
  125|  5.38M|    memset(props, 0, sizeof(*props));
  126|       |    props->timestamp = INT64_MIN;
  127|  5.38M|    props->offset = -1;
  128|  5.38M|}
dav1d_data_props_unref_internal:
  130|  10.2k|void dav1d_data_props_unref_internal(Dav1dDataProps *const props) {
  131|  10.2k|    validate_input(props != NULL);
  ------------------
  |  |   59|  10.2k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  10.2k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  132|       |
  133|  10.2k|    struct Dav1dRef *user_data_ref = props->user_data.ref;
  134|  10.2k|    dav1d_data_props_set_defaults(props);
  135|  10.2k|    dav1d_ref_dec(&user_data_ref);
  136|  10.2k|}
dav1d_data_unref_internal:
  138|   965k|void dav1d_data_unref_internal(Dav1dData *const buf) {
  139|   965k|    validate_input(buf != NULL);
  ------------------
  |  |   59|   965k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   965k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 965k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  140|       |
  141|   965k|    struct Dav1dRef *user_data_ref = buf->m.user_data.ref;
  142|   965k|    if (buf->ref) {
  ------------------
  |  Branch (142:9): [True: 955k, False: 10.1k]
  ------------------
  143|   955k|        validate_input(buf->data != NULL);
  ------------------
  |  |   59|   955k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|   955k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 955k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  144|   955k|        dav1d_ref_dec(&buf->ref);
  145|   955k|    }
  146|   965k|    memset(buf, 0, sizeof(*buf));
  147|   965k|    dav1d_data_props_set_defaults(&buf->m);
  148|   965k|    dav1d_ref_dec(&user_data_ref);
  149|   965k|}

dav1d_decode_tile_sbrow:
 2594|  2.61M|int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
 2595|  2.61M|    const Dav1dFrameContext *const f = t->f;
 2596|  2.61M|    const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
  ------------------
  |  Branch (2596:37): [True: 1.08M, False: 1.53M]
  ------------------
 2597|  2.61M|    Dav1dTileState *const ts = t->ts;
 2598|  2.61M|    const Dav1dContext *const c = f->c;
 2599|  2.61M|    const int sb_step = f->sb_step;
 2600|  2.61M|    const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
 2601|  2.61M|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2602|  2.61M|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2603|       |
 2604|  2.61M|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|  5.22M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 1.79M, False: 820k]
  |  |  ------------------
  ------------------
  |  Branch (2604:45): [True: 718k, False: 101k]
  ------------------
 2605|  2.51M|        dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
 2606|  2.51M|                                     ts->tiling.col_end, ts->tiling.row_start,
 2607|  2.51M|                                     ts->tiling.row_end, t->by >> f->sb_shift,
 2608|  2.51M|                                     ts->tiling.row, t->frame_thread.pass);
 2609|  2.51M|    }
 2610|       |
 2611|  2.61M|    if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) {
  ------------------
  |  |   36|  5.22M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 1.79M, False: 821k]
  |  |  ------------------
  ------------------
  |  Branch (2611:45): [True: 1.79M, False: 18.4E]
  ------------------
 2612|  1.79M|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 2613|  1.79M|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 2614|  14.3M|        for (int n = 0; n < 7; n++)
  ------------------
  |  Branch (2614:25): [True: 12.5M, False: 1.79M]
  ------------------
 2615|  37.5M|            for (int m = 0; m < 2; m++)
  ------------------
  |  Branch (2615:29): [True: 25.0M, False: 12.5M]
  ------------------
 2616|  25.0M|                lowest_px[n][m] = INT_MIN;
 2617|  1.79M|    }
 2618|       |
 2619|  2.61M|    reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass);
  ------------------
  |  |   43|  2.61M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  2.61M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 2620|  2.61M|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2620:9): [True: 1.25M, False: 1.35M]
  ------------------
 2621|  18.4E|        const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0;
  ------------------
  |  Branch (2621:31): [True: 1.25M, False: 18.4E]
  ------------------
 2622|  1.25M|        for (t->bx = ts->tiling.col_start,
 2623|  1.25M|             t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w;
 2624|  2.92M|             t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2624:14): [True: 1.66M, False: 1.25M]
  ------------------
 2625|  1.66M|        {
 2626|  1.66M|            if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2626:17): [True: 6, False: 1.66M]
  ------------------
 2627|      6|                return 1;
 2628|  1.66M|            if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2628:17): [True: 0, False: 1.66M]
  ------------------
 2629|      0|                return 1;
 2630|  1.66M|            if (t->bx & 16 || f->seq_hdr->sb128)
  ------------------
  |  Branch (2630:17): [True: 525k, False: 1.14M]
  |  Branch (2630:31): [True: 595k, False: 546k]
  ------------------
 2631|  1.11M|                t->a++;
 2632|  1.66M|        }
 2633|  1.25M|        f->bd_fn.backup_ipred_edge(t);
 2634|  1.25M|        return 0;
 2635|  1.25M|    }
 2636|       |
 2637|  1.35M|    if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (2637:9): [True: 1.35M, False: 360]
  |  Branch (2637:27): [True: 586k, False: 773k]
  ------------------
 2638|   586k|        f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row,
 2639|   586k|                                   ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2640|   586k|                                   t->by >> 1, (t->by + sb_step) >> 1);
 2641|   586k|    }
 2642|  1.35M|    memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
 2643|  1.35M|    const int sb128y = t->by >> 5;
 2644|  1.35M|    for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
 2645|  1.35M|         t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
 2646|  3.43M|         t->bx < ts->tiling.col_end; t->bx += sb_step)
  ------------------
  |  Branch (2646:10): [True: 2.12M, False: 1.31M]
  ------------------
 2647|  2.12M|    {
 2648|  2.12M|        if (atomic_load_explicit(c->flush, memory_order_acquire))
  ------------------
  |  Branch (2648:13): [True: 235, False: 2.12M]
  ------------------
 2649|    235|            return 1;
 2650|  2.12M|        if (root_bl == BL_128X128) {
  ------------------
  |  Branch (2650:13): [True: 824k, False: 1.30M]
  ------------------
 2651|   824k|            t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
 2652|   824k|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2653|   824k|            t->cur_sb_cdef_idx_ptr[1] = -1;
 2654|   824k|            t->cur_sb_cdef_idx_ptr[2] = -1;
 2655|   824k|            t->cur_sb_cdef_idx_ptr[3] = -1;
 2656|  1.30M|        } else {
 2657|  1.30M|            t->cur_sb_cdef_idx_ptr =
 2658|  1.30M|                &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
 2659|  1.30M|                                      ((t->by & 16) >> 3)];
 2660|  1.30M|            t->cur_sb_cdef_idx_ptr[0] = -1;
 2661|  1.30M|        }
 2662|       |        // Restoration filter
 2663|  8.50M|        for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2663:25): [True: 6.37M, False: 2.12M]
  ------------------
 2664|  6.37M|            if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2664:17): [True: 5.77M, False: 603k]
  ------------------
 2665|  5.77M|                continue;
 2666|       |
 2667|   603k|            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2667:32): [True: 192k, False: 411k]
  |  Branch (2667:37): [True: 151k, False: 40.3k]
  ------------------
 2668|   603k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2668:32): [True: 192k, False: 411k]
  |  Branch (2668:37): [True: 157k, False: 34.2k]
  ------------------
 2669|   603k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2670|   603k|            const int y = t->by * 4 >> ss_ver;
 2671|   603k|            const int h = (f->cur.p.h + ss_ver) >> ss_ver;
 2672|       |
 2673|   603k|            const int unit_size = 1 << unit_size_log2;
 2674|   603k|            const unsigned mask = unit_size - 1;
 2675|   603k|            if (y & mask) continue;
  ------------------
  |  Branch (2675:17): [True: 225k, False: 378k]
  ------------------
 2676|   378k|            const int half_unit = unit_size >> 1;
 2677|       |            // Round half up at frame boundaries, if there's more than one
 2678|       |            // restoration unit
 2679|   378k|            if (y && y + half_unit > h) continue;
  ------------------
  |  Branch (2679:17): [True: 216k, False: 161k]
  |  Branch (2679:22): [True: 14.4k, False: 202k]
  ------------------
 2680|       |
 2681|   363k|            const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
 2682|       |
 2683|   363k|            if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2683:17): [True: 30.3k, False: 333k]
  ------------------
 2684|  30.3k|                const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2685|  30.3k|                const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
 2686|       |
 2687|  30.3k|                const int d = f->frame_hdr->super_res.width_scale_denominator;
 2688|  30.3k|                const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
 2689|  30.3k|                const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;
 2690|  30.3k|                const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
 2691|       |
 2692|  59.6k|                for (int x = x0; x < imin(x1, n_units); x++) {
  ------------------
  |  Branch (2692:34): [True: 29.2k, False: 30.3k]
  ------------------
 2693|  29.2k|                    const int px_x = x << (unit_size_log2 + ss_hor);
 2694|  29.2k|                    const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
 2695|  29.2k|                    const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
 2696|  29.2k|                    Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2697|       |
 2698|  29.2k|                    read_restoration_info(t, lr, p, frame_type);
 2699|  29.2k|                }
 2700|   333k|            } else {
 2701|   333k|                const int x = 4 * t->bx >> ss_hor;
 2702|   333k|                if (x & mask) continue;
  ------------------
  |  Branch (2702:21): [True: 144k, False: 188k]
  ------------------
 2703|   188k|                const int w = (f->cur.p.w + ss_hor) >> ss_hor;
 2704|       |                // Round half up at frame boundaries, if there's more than one
 2705|       |                // restoration unit
 2706|   188k|                if (x && x + half_unit > w) continue;
  ------------------
  |  Branch (2706:21): [True: 79.5k, False: 109k]
  |  Branch (2706:26): [True: 1.88k, False: 77.7k]
  ------------------
 2707|   186k|                const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
 2708|   186k|                const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
 2709|   186k|                Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2710|       |
 2711|   186k|                read_restoration_info(t, lr, p, frame_type);
 2712|   186k|            }
 2713|   363k|        }
 2714|  2.12M|        if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
  ------------------
  |  Branch (2714:13): [True: 48.5k, False: 2.07M]
  ------------------
 2715|  48.5k|            return 1;
 2716|  2.07M|        if (t->bx & 16 || f->seq_hdr->sb128) {
  ------------------
  |  Branch (2716:13): [True: 611k, False: 1.46M]
  |  Branch (2716:27): [True: 815k, False: 650k]
  ------------------
 2717|  1.42M|            t->a++;
 2718|  1.42M|            t->lf_mask++;
 2719|  1.42M|        }
 2720|  2.07M|    }
 2721|       |
 2722|  1.31M|    if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|   730k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 589k, False: 140k]
  |  |  ------------------
  ------------------
  |  Branch (2722:9): [True: 730k, False: 580k]
  |  Branch (2722:38): [True: 730k, False: 18.4E]
  ------------------
 2723|   589k|        dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
 2724|   589k|                               ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
 2725|   589k|                               t->by >> 1, (t->by + sb_step) >> 1);
 2726|   589k|    }
 2727|       |
 2728|       |    // backup pre-loopfilter pixels for intra prediction of the next sbrow
 2729|  1.31M|    if (t->frame_thread.pass != 1)
  ------------------
  |  Branch (2729:9): [True: 0, False: 1.31M]
  ------------------
 2730|      0|        f->bd_fn.backup_ipred_edge(t);
 2731|       |
 2732|       |    // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
 2733|       |    // up the initial value in neighbour tiles when running the loopfilter
 2734|  1.31M|    int align_h = (f->bh + 31) & ~31;
 2735|  1.31M|    memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
 2736|  1.31M|           &t->l.tx_lpf_y[t->by & 16], sb_step);
 2737|  1.31M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2738|  1.31M|    align_h >>= ss_ver;
 2739|  1.31M|    memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
 2740|  1.31M|           &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
 2741|       |
 2742|       |    // error out on symbol decoder overread
 2743|  1.31M|    if (ts->msac.cnt <= -15) return 1;
  ------------------
  |  Branch (2743:9): [True: 29.3k, False: 1.28M]
  ------------------
 2744|       |
 2745|  1.28M|    return c->strict_std_compliance &&
  ------------------
  |  Branch (2745:12): [True: 0, False: 1.28M]
  ------------------
 2746|      0|           (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] &&
  ------------------
  |  Branch (2746:12): [True: 0, False: 0]
  ------------------
 2747|      0|           check_trailing_bits_after_symbol_coder(&ts->msac);
  ------------------
  |  Branch (2747:12): [True: 0, False: 0]
  ------------------
 2748|  1.31M|}
dav1d_decode_frame_init:
 2750|   281k|int dav1d_decode_frame_init(Dav1dFrameContext *const f) {
 2751|   281k|    const Dav1dContext *const c = f->c;
 2752|   281k|    int retval = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|   281k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 2753|       |
 2754|   281k|    if (f->sbh > f->lf.start_of_tile_row_sz) {
  ------------------
  |  Branch (2754:9): [True: 16.8k, False: 264k]
  ------------------
 2755|  16.8k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  16.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2756|  16.8k|        f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t));
  ------------------
  |  |  132|  16.8k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2757|  16.8k|        if (!f->lf.start_of_tile_row) {
  ------------------
  |  Branch (2757:13): [True: 0, False: 16.8k]
  ------------------
 2758|      0|            f->lf.start_of_tile_row_sz = 0;
 2759|      0|            goto error;
 2760|      0|        }
 2761|  16.8k|        f->lf.start_of_tile_row_sz = f->sbh;
 2762|  16.8k|    }
 2763|   281k|    int sby = 0;
 2764|   642k|    for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2764:28): [True: 360k, False: 281k]
  ------------------
 2765|   360k|        f->lf.start_of_tile_row[sby++] = tile_row;
 2766|  10.1M|        while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1])
  ------------------
  |  Branch (2766:16): [True: 9.80M, False: 360k]
  ------------------
 2767|  9.80M|            f->lf.start_of_tile_row[sby++] = 0;
 2768|   360k|    }
 2769|       |
 2770|   281k|    const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
 2771|   281k|    if (n_ts != f->n_ts) {
  ------------------
  |  Branch (2771:9): [True: 30.6k, False: 250k]
  ------------------
 2772|  30.6k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (2772:13): [True: 30.6k, False: 0]
  ------------------
 2773|  30.6k|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|  30.6k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2774|  30.6k|            f->frame_thread.tile_start_off =
 2775|  30.6k|                dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts);
  ------------------
  |  |  132|  30.6k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2776|  30.6k|            if (!f->frame_thread.tile_start_off) {
  ------------------
  |  Branch (2776:17): [True: 0, False: 30.6k]
  ------------------
 2777|      0|                f->n_ts = 0;
 2778|      0|                goto error;
 2779|      0|            }
 2780|  30.6k|        }
 2781|  30.6k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  30.6k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2782|  30.6k|        f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32);
  ------------------
  |  |  134|  30.6k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2783|  30.6k|        if (!f->ts) goto error;
  ------------------
  |  Branch (2783:13): [True: 0, False: 30.6k]
  ------------------
 2784|  30.6k|        f->n_ts = n_ts;
 2785|  30.6k|    }
 2786|       |
 2787|   281k|    const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1));
  ------------------
  |  Branch (2787:68): [True: 281k, False: 73]
  |  Branch (2787:83): [True: 281k, False: 1]
  ------------------
 2788|   281k|    if (a_sz != f->a_sz) {
  ------------------
  |  Branch (2788:9): [True: 30.0k, False: 251k]
  ------------------
 2789|  30.0k|        dav1d_free(f->a);
  ------------------
  |  |  135|  30.0k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2790|  30.0k|        f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz);
  ------------------
  |  |  132|  30.0k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2791|  30.0k|        if (!f->a) {
  ------------------
  |  Branch (2791:13): [True: 0, False: 30.0k]
  ------------------
 2792|      0|            f->a_sz = 0;
 2793|      0|            goto error;
 2794|      0|        }
 2795|  30.0k|        f->a_sz = a_sz;
 2796|  30.0k|    }
 2797|       |
 2798|   281k|    const int num_sb128 = f->sb128w * f->sb128h;
 2799|   281k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2800|   281k|    const int hbd = !!f->seq_hdr->hbd;
 2801|   281k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (2801:9): [True: 281k, False: 72]
  ------------------
 2802|   281k|        const unsigned sb_step4 = f->sb_step * 4;
 2803|   281k|        int tile_idx = 0;
 2804|   642k|        for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
  ------------------
  |  Branch (2804:32): [True: 360k, False: 281k]
  ------------------
 2805|   360k|            const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
 2806|   360k|                                     sb_step4 * f->sb128w * 128;
 2807|   360k|            const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2808|   360k|                                     f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4;
 2809|   744k|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2809:36): [True: 383k, False: 360k]
  ------------------
 2810|   383k|                f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
 2811|   383k|                    f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4;
 2812|   383k|            }
 2813|   360k|        }
 2814|       |
 2815|   281k|        const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh;
 2816|   281k|        if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) {
  ------------------
  |  Branch (2816:13): [True: 23.2k, False: 257k]
  ------------------
 2817|  23.2k|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|  23.2k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2818|  23.2k|            f->tile_thread.lowest_pixel_mem =
 2819|  23.2k|                dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz *
  ------------------
  |  |  132|  23.2k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 2820|  23.2k|                             sizeof(*f->tile_thread.lowest_pixel_mem));
 2821|  23.2k|            if (!f->tile_thread.lowest_pixel_mem) {
  ------------------
  |  Branch (2821:17): [True: 0, False: 23.2k]
  ------------------
 2822|      0|                f->tile_thread.lowest_pixel_mem_sz = 0;
 2823|      0|                goto error;
 2824|      0|            }
 2825|  23.2k|            f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz;
 2826|  23.2k|        }
 2827|   281k|        int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem;
 2828|   641k|        for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows;
  ------------------
  |  Branch (2828:51): [True: 360k, False: 281k]
  ------------------
 2829|   360k|             tile_row++, tile_row_base += f->frame_hdr->tiling.cols)
 2830|   360k|        {
 2831|   360k|            const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
 2832|   360k|                                      f->frame_hdr->tiling.row_start_sb[tile_row];
 2833|   744k|            for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
  ------------------
  |  Branch (2833:36): [True: 383k, False: 360k]
  ------------------
 2834|   383k|                f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr;
 2835|   383k|                lowest_pixel_ptr += tile_row_sb_h;
 2836|   383k|            }
 2837|   360k|        }
 2838|       |
 2839|   281k|        const int cbi_sz = num_sb128 * size_mul[0];
 2840|   281k|        if (cbi_sz != f->frame_thread.cbi_sz) {
  ------------------
  |  Branch (2840:13): [True: 21.1k, False: 260k]
  ------------------
 2841|  21.1k|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|  21.1k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2842|  21.1k|            f->frame_thread.cbi =
 2843|  21.1k|                dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) *
  ------------------
  |  |  134|  21.1k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2844|  21.1k|                                    cbi_sz * 32 * 32 / 4, 64);
 2845|  21.1k|            if (!f->frame_thread.cbi) {
  ------------------
  |  Branch (2845:17): [True: 0, False: 21.1k]
  ------------------
 2846|      0|                f->frame_thread.cbi_sz = 0;
 2847|      0|                goto error;
 2848|      0|            }
 2849|  21.1k|            f->frame_thread.cbi_sz = cbi_sz;
 2850|  21.1k|        }
 2851|       |
 2852|   281k|        const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
 2853|   281k|        if (cf_sz != f->frame_thread.cf_sz) {
  ------------------
  |  Branch (2853:13): [True: 21.8k, False: 259k]
  ------------------
 2854|  21.8k|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|  21.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2855|  21.8k|            f->frame_thread.cf =
 2856|  21.8k|                dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64);
  ------------------
  |  |  134|  21.8k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2857|  21.8k|            if (!f->frame_thread.cf) {
  ------------------
  |  Branch (2857:17): [True: 0, False: 21.8k]
  ------------------
 2858|      0|                f->frame_thread.cf_sz = 0;
 2859|      0|                goto error;
 2860|      0|            }
 2861|  21.8k|            memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
 2862|  21.8k|            f->frame_thread.cf_sz = cf_sz;
 2863|  21.8k|        }
 2864|       |
 2865|   281k|        if (f->frame_hdr->allow_screen_content_tools) {
  ------------------
  |  Branch (2865:13): [True: 226k, False: 54.5k]
  ------------------
 2866|   226k|            const int pal_sz = num_sb128 << hbd;
 2867|   226k|            if (pal_sz != f->frame_thread.pal_sz) {
  ------------------
  |  Branch (2867:17): [True: 12.5k, False: 214k]
  ------------------
 2868|  12.5k|                dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|  12.5k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2869|  12.5k|                f->frame_thread.pal =
 2870|  12.5k|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) *
  ------------------
  |  |  134|  12.5k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2871|  12.5k|                                        pal_sz * 16 * 16, 64);
 2872|  12.5k|                if (!f->frame_thread.pal) {
  ------------------
  |  Branch (2872:21): [True: 0, False: 12.5k]
  ------------------
 2873|      0|                    f->frame_thread.pal_sz = 0;
 2874|      0|                    goto error;
 2875|      0|                }
 2876|  12.5k|                f->frame_thread.pal_sz = pal_sz;
 2877|  12.5k|            }
 2878|       |
 2879|   226k|            const int pal_idx_sz = num_sb128 * size_mul[1];
 2880|   226k|            if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
  ------------------
  |  Branch (2880:17): [True: 12.5k, False: 214k]
  ------------------
 2881|  12.5k|                dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|  12.5k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2882|  12.5k|                f->frame_thread.pal_idx =
 2883|  12.5k|                    dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) *
  ------------------
  |  |  134|  12.5k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2884|  12.5k|                                        pal_idx_sz * 128 * 128 / 8, 64);
 2885|  12.5k|                if (!f->frame_thread.pal_idx) {
  ------------------
  |  Branch (2885:21): [True: 0, False: 12.5k]
  ------------------
 2886|      0|                    f->frame_thread.pal_idx_sz = 0;
 2887|      0|                    goto error;
 2888|      0|                }
 2889|  12.5k|                f->frame_thread.pal_idx_sz = pal_idx_sz;
 2890|  12.5k|            }
 2891|   226k|        } else if (f->frame_thread.pal) {
  ------------------
  |  Branch (2891:20): [True: 1.64k, False: 52.8k]
  ------------------
 2892|  1.64k|            dav1d_freep_aligned(&f->frame_thread.pal);
 2893|  1.64k|            dav1d_freep_aligned(&f->frame_thread.pal_idx);
 2894|  1.64k|            f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
 2895|  1.64k|        }
 2896|   281k|    }
 2897|       |
 2898|       |    // update allocation of block contexts for above
 2899|   281k|    ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
 2900|   281k|    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
 2901|   281k|    const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize;
  ------------------
  |  Branch (2901:36): [True: 281k, False: 83]
  |  Branch (2901:51): [True: 23.9k, False: 257k]
  ------------------
 2902|   281k|    if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] ||
  ------------------
  |  Branch (2902:9): [True: 21.1k, False: 260k]
  ------------------
 2903|   260k|        uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] ||
  ------------------
  |  Branch (2903:9): [True: 465, False: 259k]
  ------------------
 2904|   259k|        need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy ||
  ------------------
  |  Branch (2904:9): [True: 486, False: 259k]
  ------------------
 2905|   259k|        f->sbh != f->lf.cdef_buf_sbh)
  ------------------
  |  Branch (2905:9): [True: 353, False: 258k]
  ------------------
 2906|  22.3k|    {
 2907|  22.3k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  22.3k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2908|  22.3k|        size_t alloc_sz = 64;
 2909|  22.3k|        alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy;
 2910|  22.3k|        alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy;
 2911|  22.3k|        uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32);
  ------------------
  |  |  134|  22.3k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2912|  22.3k|        if (!ptr) {
  ------------------
  |  Branch (2912:13): [True: 0, False: 22.3k]
  ------------------
 2913|      0|            f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0;
 2914|      0|            goto error;
 2915|      0|        }
 2916|       |
 2917|  22.3k|        ptr += 32;
 2918|  22.3k|        if (y_stride < 0) {
  ------------------
  |  Branch (2918:13): [True: 0, False: 22.3k]
  ------------------
 2919|      0|            f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1);
 2920|      0|            f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3);
 2921|  22.3k|        } else {
 2922|  22.3k|            f->lf.cdef_line[0][0] = ptr + y_stride * 0;
 2923|  22.3k|            f->lf.cdef_line[1][0] = ptr + y_stride * 2;
 2924|  22.3k|        }
 2925|  22.3k|        ptr += llabs(y_stride) * f->sbh * 4;
 2926|  22.3k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2926:13): [True: 0, False: 22.3k]
  ------------------
 2927|      0|            f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1);
 2928|      0|            f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3);
 2929|      0|            f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5);
 2930|      0|            f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7);
 2931|  22.3k|        } else {
 2932|  22.3k|            f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
 2933|  22.3k|            f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
 2934|  22.3k|            f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
 2935|  22.3k|            f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
 2936|  22.3k|        }
 2937|       |
 2938|  22.3k|        if (need_cdef_lpf_copy) {
  ------------------
  |  Branch (2938:13): [True: 4.82k, False: 17.5k]
  ------------------
 2939|  4.82k|            ptr += llabs(uv_stride) * f->sbh * 8;
 2940|  4.82k|            if (y_stride < 0)
  ------------------
  |  Branch (2940:17): [True: 0, False: 4.82k]
  ------------------
 2941|      0|                f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1);
 2942|  4.82k|            else
 2943|  4.82k|                f->lf.cdef_lpf_line[0] = ptr;
 2944|  4.82k|            ptr += llabs(y_stride) * f->sbh * 4;
 2945|  4.82k|            if (uv_stride < 0) {
  ------------------
  |  Branch (2945:17): [True: 0, False: 4.82k]
  ------------------
 2946|      0|                f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1);
 2947|      0|                f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1);
 2948|  4.82k|            } else {
 2949|  4.82k|                f->lf.cdef_lpf_line[1] = ptr;
 2950|  4.82k|                f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4;
 2951|  4.82k|            }
 2952|  4.82k|        }
 2953|       |
 2954|  22.3k|        f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4;
 2955|  22.3k|        f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8;
 2956|  22.3k|        f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
 2957|  22.3k|        f->lf.cdef_buf_sbh = f->sbh;
 2958|  22.3k|    }
 2959|       |
 2960|   281k|    const int sb128 = f->seq_hdr->sb128;
 2961|   281k|    const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12;
  ------------------
  |  Branch (2961:27): [True: 281k, False: 80]
  ------------------
 2962|   281k|    y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1];
 2963|   281k|    if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] ||
  ------------------
  |  Branch (2963:9): [True: 20.4k, False: 260k]
  ------------------
 2964|   260k|        uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1])
  ------------------
  |  Branch (2964:9): [True: 470, False: 260k]
  ------------------
 2965|  20.8k|    {
 2966|  20.8k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  20.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 2967|       |        // lr simd may overread the input, so slightly over-allocate the lpf buffer
 2968|  20.8k|        size_t alloc_sz = 128;
 2969|  20.8k|        alloc_sz += (size_t)llabs(y_stride) * num_lines;
 2970|  20.8k|        alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2;
 2971|  20.8k|        uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64);
  ------------------
  |  |  134|  20.8k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 2972|  20.8k|        if (!ptr) {
  ------------------
  |  Branch (2972:13): [True: 0, False: 20.8k]
  ------------------
 2973|      0|            f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0;
 2974|      0|            goto error;
 2975|      0|        }
 2976|       |
 2977|  20.8k|        ptr += 64;
 2978|  20.8k|        if (y_stride < 0)
  ------------------
  |  Branch (2978:13): [True: 0, False: 20.8k]
  ------------------
 2979|      0|            f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1);
 2980|  20.8k|        else
 2981|  20.8k|            f->lf.lr_lpf_line[0] = ptr;
 2982|  20.8k|        ptr += llabs(y_stride) * num_lines;
 2983|  20.8k|        if (uv_stride < 0) {
  ------------------
  |  Branch (2983:13): [True: 0, False: 20.8k]
  ------------------
 2984|      0|            f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1);
 2985|      0|            f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1);
 2986|  20.8k|        } else {
 2987|  20.8k|            f->lf.lr_lpf_line[1] = ptr;
 2988|  20.8k|            f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines;
 2989|  20.8k|        }
 2990|       |
 2991|  20.8k|        f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines;
 2992|  20.8k|        f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2;
 2993|  20.8k|    }
 2994|       |
 2995|       |    // update allocation for loopfilter masks
 2996|   281k|    if (num_sb128 != f->lf.mask_sz) {
  ------------------
  |  Branch (2996:9): [True: 20.7k, False: 260k]
  ------------------
 2997|  20.7k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  20.7k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2998|  20.7k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  20.7k|#define dav1d_free(ptr) free(ptr)
  ------------------
 2999|  20.7k|        f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128);
  ------------------
  |  |  132|  20.7k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3000|       |        // over-allocate by 3 bytes since some of the SIMD implementations
 3001|       |        // index this from the level type and can thus over-read by up to 3
 3002|  20.7k|        f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
  ------------------
  |  |  132|  20.7k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3003|  20.7k|        if (!f->lf.mask || !f->lf.level) {
  ------------------
  |  Branch (3003:13): [True: 18.4E, False: 20.7k]
  |  Branch (3003:28): [True: 0, False: 20.7k]
  ------------------
 3004|      0|            f->lf.mask_sz = 0;
 3005|      0|            goto error;
 3006|      0|        }
 3007|  20.7k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (3007:13): [True: 20.7k, False: 0]
  ------------------
 3008|  20.7k|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|  20.7k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3009|  20.7k|            f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) *
  ------------------
  |  |  132|  20.7k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3010|  20.7k|                                             num_sb128 * 32 * 32);
 3011|  20.7k|            if (!f->frame_thread.b) {
  ------------------
  |  Branch (3011:17): [True: 0, False: 20.7k]
  ------------------
 3012|      0|                f->lf.mask_sz = 0;
 3013|      0|                goto error;
 3014|      0|            }
 3015|  20.7k|        }
 3016|  20.7k|        f->lf.mask_sz = num_sb128;
 3017|  20.7k|    }
 3018|       |
 3019|   281k|    f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
 3020|   281k|    const int lr_mask_sz = f->sr_sb128w * f->sb128h;
 3021|   281k|    if (lr_mask_sz != f->lf.lr_mask_sz) {
  ------------------
  |  Branch (3021:9): [True: 19.7k, False: 261k]
  ------------------
 3022|  19.7k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  19.7k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3023|  19.7k|        f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz);
  ------------------
  |  |  132|  19.7k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3024|  19.7k|        if (!f->lf.lr_mask) {
  ------------------
  |  Branch (3024:13): [True: 0, False: 19.7k]
  ------------------
 3025|      0|            f->lf.lr_mask_sz = 0;
 3026|      0|            goto error;
 3027|      0|        }
 3028|  19.7k|        f->lf.lr_mask_sz = lr_mask_sz;
 3029|  19.7k|    }
 3030|   281k|    f->lf.restore_planes =
 3031|   281k|        ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
 3032|   281k|        ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
 3033|   281k|        ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
 3034|   281k|    if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
  ------------------
  |  Branch (3034:9): [True: 31.6k, False: 249k]
  ------------------
 3035|  31.6k|        dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
 3036|  31.6k|        f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
 3037|  31.6k|    }
 3038|   281k|    dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
 3039|   281k|    memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
 3040|       |
 3041|   281k|    const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
 3042|   281k|    if (ipred_edge_sz != f->ipred_edge_sz) {
  ------------------
  |  Branch (3042:9): [True: 20.9k, False: 260k]
  ------------------
 3043|  20.9k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  20.9k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
 3044|  20.9k|        uint8_t *ptr = f->ipred_edge[0] =
 3045|  20.9k|            dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64);
  ------------------
  |  |  134|  20.9k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
 3046|  20.9k|        if (!ptr) {
  ------------------
  |  Branch (3046:13): [True: 0, False: 20.9k]
  ------------------
 3047|      0|            f->ipred_edge_sz = 0;
 3048|      0|            goto error;
 3049|      0|        }
 3050|  20.9k|        f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
 3051|  20.9k|        f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
 3052|  20.9k|        f->ipred_edge_sz = ipred_edge_sz;
 3053|  20.9k|    }
 3054|       |
 3055|   281k|    const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
 3056|   281k|    if (re_sz != f->lf.re_sz) {
  ------------------
  |  Branch (3056:9): [True: 22.8k, False: 258k]
  ------------------
 3057|  22.8k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  22.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3058|  22.8k|        f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2);
  ------------------
  |  |  132|  22.8k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3059|  22.8k|        if (!f->lf.tx_lpf_right_edge[0]) {
  ------------------
  |  Branch (3059:13): [True: 0, False: 22.8k]
  ------------------
 3060|      0|            f->lf.re_sz = 0;
 3061|      0|            goto error;
 3062|      0|        }
 3063|  22.8k|        f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
 3064|  22.8k|        f->lf.re_sz = re_sz;
 3065|  22.8k|    }
 3066|       |
 3067|       |    // init ref mvs
 3068|   281k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|   562k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 82.1k, False: 199k]
  |  |  ------------------
  ------------------
  |  Branch (3068:45): [True: 173k, False: 26.1k]
  ------------------
 3069|   254k|        const int ret =
 3070|   254k|            dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
 3071|   254k|                                    f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs,
 3072|   254k|                                    f->c->n_tc, f->c->n_fc);
 3073|   254k|        if (ret < 0) goto error;
  ------------------
  |  Branch (3073:13): [True: 0, False: 254k]
  ------------------
 3074|   254k|    }
 3075|       |
 3076|       |    // setup dequant tables
 3077|   281k|    init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
 3078|   281k|    if (f->frame_hdr->quant.qm)
  ------------------
  |  Branch (3078:9): [True: 24.0k, False: 257k]
  ------------------
 3079|   479k|        for (int i = 0; i < N_RECT_TX_SIZES; i++) {
  ------------------
  |  Branch (3079:25): [True: 455k, False: 24.0k]
  ------------------
 3080|   455k|            f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
 3081|   455k|            f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
 3082|   455k|            f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
 3083|   455k|        }
 3084|   257k|    else
 3085|   257k|        memset(f->qm, 0, sizeof(f->qm));
 3086|       |
 3087|       |    // setup jnt_comp weights
 3088|   281k|    if (f->frame_hdr->switchable_comp_refs) {
  ------------------
  |  Branch (3088:9): [True: 50.3k, False: 230k]
  ------------------
 3089|   401k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3089:25): [True: 351k, False: 50.3k]
  ------------------
 3090|   351k|            const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
 3091|       |
 3092|  1.40M|            for (int j = i + 1; j < 7; j++) {
  ------------------
  |  Branch (3092:33): [True: 1.04M, False: 351k]
  ------------------
 3093|  1.04M|                const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
 3094|       |
 3095|  1.04M|                const unsigned d1 =
 3096|  1.04M|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
 3097|  1.04M|                                          f->cur.frame_hdr->frame_offset)), 31);
 3098|  1.04M|                const unsigned d0 =
 3099|  1.04M|                    imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
 3100|  1.04M|                                          f->cur.frame_hdr->frame_offset)), 31);
 3101|  1.04M|                const int order = d0 <= d1;
 3102|       |
 3103|  1.04M|                static const uint8_t quant_dist_weight[3][2] = {
 3104|  1.04M|                    { 2, 3 }, { 2, 5 }, { 2, 7 }
 3105|  1.04M|                };
 3106|  1.04M|                static const uint8_t quant_dist_lookup_table[4][2] = {
 3107|  1.04M|                    { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
 3108|  1.04M|                };
 3109|       |
 3110|  1.04M|                int k;
 3111|  2.45M|                for (k = 0; k < 3; k++) {
  ------------------
  |  Branch (3111:29): [True: 2.09M, False: 360k]
  ------------------
 3112|  2.09M|                    const int c0 = quant_dist_weight[k][order];
 3113|  2.09M|                    const int c1 = quant_dist_weight[k][!order];
 3114|  2.09M|                    const int d0_c0 = d0 * c0;
 3115|  2.09M|                    const int d1_c1 = d1 * c1;
 3116|  2.09M|                    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
  ------------------
  |  Branch (3116:26): [True: 731k, False: 1.36M]
  |  Branch (3116:37): [True: 186k, False: 545k]
  |  Branch (3116:56): [True: 1.36M, False: 539k]
  |  Branch (3116:68): [True: 504k, False: 865k]
  ------------------
 3117|  2.09M|                }
 3118|       |
 3119|  1.04M|                f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
 3120|  1.04M|            }
 3121|   351k|        }
 3122|  50.3k|    }
 3123|       |
 3124|       |    /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
 3125|       |     * so just point the chroma pointers in 4:0:0 to the luma plane here to
 3126|       |     * avoid having additional in-loop branches in various places. We never
 3127|       |     * dereference those pointers so it doesn't really matter what they
 3128|       |     * point at, as long as the pointers are valid. */
 3129|   281k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 3130|   281k|    f->lf.p[0] = f->cur.data[0];
 3131|   281k|    f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3131:30): [True: 214k, False: 67.0k]
  ------------------
 3132|   281k|    f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3132:30): [True: 214k, False: 67.0k]
  ------------------
 3133|   281k|    f->lf.sr_p[0] = f->sr_cur.p.data[0];
 3134|   281k|    f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
  ------------------
  |  Branch (3134:38): [True: 214k, False: 66.9k]
  ------------------
 3135|   281k|    f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
  ------------------
  |  Branch (3135:38): [True: 214k, False: 66.9k]
  ------------------
 3136|       |
 3137|   281k|    retval = 0;
 3138|   281k|error:
 3139|   280k|    return retval;
 3140|   281k|}
dav1d_decode_frame_init_cdf:
 3142|   239k|int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) {
 3143|   239k|    const Dav1dContext *const c = f->c;
 3144|   239k|    int retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|   239k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3145|       |
 3146|   239k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3146:9): [True: 16.7k, False: 223k]
  ------------------
 3147|  16.7k|        dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
 3148|       |
 3149|       |    // parse individual tiles per tile group
 3150|   239k|    int tile_row = 0, tile_col = 0;
 3151|   239k|    f->task_thread.update_set = 0;
 3152|   472k|    for (int i = 0; i < f->n_tile_data; i++) {
  ------------------
  |  Branch (3152:21): [True: 240k, False: 232k]
  ------------------
 3153|   240k|        const uint8_t *data = f->tile[i].data.data;
 3154|   240k|        size_t size = f->tile[i].data.sz;
 3155|       |
 3156|   485k|        for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
  ------------------
  |  Branch (3156:40): [True: 252k, False: 232k]
  ------------------
 3157|   252k|            size_t tile_sz;
 3158|   252k|            if (j == f->tile[i].end) {
  ------------------
  |  Branch (3158:17): [True: 232k, False: 20.4k]
  ------------------
 3159|   232k|                tile_sz = size;
 3160|   232k|            } else {
 3161|  20.4k|                if (f->frame_hdr->tiling.n_bytes > size) goto error;
  ------------------
  |  Branch (3161:21): [True: 5.78k, False: 14.6k]
  ------------------
 3162|  14.6k|                tile_sz = 0;
 3163|  34.3k|                for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
  ------------------
  |  Branch (3163:38): [True: 19.7k, False: 14.6k]
  ------------------
 3164|  19.7k|                    tile_sz |= (unsigned)*data++ << (k * 8);
 3165|  14.6k|                tile_sz++;
 3166|  14.6k|                size -= f->frame_hdr->tiling.n_bytes;
 3167|  14.6k|                if (tile_sz > size) goto error;
  ------------------
  |  Branch (3167:21): [True: 1.94k, False: 12.6k]
  ------------------
 3168|  14.6k|            }
 3169|       |
 3170|   245k|            setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
 3171|   245k|                       c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
  ------------------
  |  Branch (3171:24): [True: 245k, False: 12]
  ------------------
 3172|       |
 3173|   245k|            if (tile_col == f->frame_hdr->tiling.cols) {
  ------------------
  |  Branch (3173:17): [True: 237k, False: 7.17k]
  ------------------
 3174|   237k|                tile_col = 0;
 3175|   237k|                tile_row++;
 3176|   237k|            }
 3177|   245k|            if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3177:17): [True: 232k, False: 13.0k]
  |  Branch (3177:53): [True: 16.4k, False: 215k]
  ------------------
 3178|  16.4k|                f->task_thread.update_set = 1;
 3179|   245k|            data += tile_sz;
 3180|   245k|            size -= tile_sz;
 3181|   245k|        }
 3182|   240k|    }
 3183|       |
 3184|   232k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (3184:9): [True: 231k, False: 263]
  ------------------
 3185|   231k|        const int uses_2pass = c->n_fc > 1;
 3186|  2.56M|        for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++)
  ------------------
  |  Branch (3186:25): [True: 2.33M, False: 231k]
  ------------------
 3187|  2.33M|            reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr),
  ------------------
  |  |   43|  2.33M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  2.33M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
 3188|  18.4E|                          uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0);
  ------------------
  |  Branch (3188:27): [True: 2.33M, False: 18.4E]
  ------------------
 3189|   231k|    }
 3190|       |
 3191|   232k|    retval = 0;
 3192|   239k|error:
 3193|   239k|    return retval;
 3194|   232k|}
dav1d_decode_frame_exit:
 3242|   321k|void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) {
 3243|   321k|    const Dav1dContext *const c = f->c;
 3244|       |
 3245|   321k|    if (f->sr_cur.p.data[0])
  ------------------
  |  Branch (3245:9): [True: 282k, False: 39.1k]
  ------------------
 3246|   321k|        atomic_init(&f->task_thread.error, 0);
 3247|       |
 3248|   321k|    if (c->n_fc > 1 && retval && f->frame_thread.cf) {
  ------------------
  |  Branch (3248:9): [True: 321k, False: 0]
  |  Branch (3248:24): [True: 187k, False: 134k]
  |  Branch (3248:34): [True: 162k, False: 24.1k]
  ------------------
 3249|   162k|        memset(f->frame_thread.cf, 0,
 3250|   162k|               (size_t)f->frame_thread.cf_sz * 128 * 128 / 2);
 3251|   162k|    }
 3252|  2.57M|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3252:21): [True: 2.24M, False: 321k]
  ------------------
 3253|  2.24M|        if (f->refp[i].p.frame_hdr) {
  ------------------
  |  Branch (3253:13): [True: 576k, False: 1.67M]
  ------------------
 3254|   576k|            if (!retval && c->n_fc > 1 && c->strict_std_compliance &&
  ------------------
  |  Branch (3254:17): [True: 76.6k, False: 499k]
  |  Branch (3254:28): [True: 76.6k, False: 0]
  |  Branch (3254:43): [True: 0, False: 76.6k]
  ------------------
 3255|   576k|                atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR)
  ------------------
  |  |   35|      0|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3255:17): [True: 0, False: 0]
  ------------------
 3256|      0|            {
 3257|      0|                retval = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3258|      0|                atomic_store(&f->task_thread.error, 1);
 3259|      0|                atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
 3260|      0|            }
 3261|   576k|            dav1d_thread_picture_unref(&f->refp[i]);
 3262|   576k|        }
 3263|  2.24M|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3264|  2.24M|    }
 3265|       |
 3266|   321k|    dav1d_picture_unref_internal(&f->cur);
 3267|   321k|    dav1d_thread_picture_unref(&f->sr_cur);
 3268|   321k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3269|   321k|    if (f->frame_hdr && f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3269:9): [True: 297k, False: 23.7k]
  |  Branch (3269:25): [True: 44.2k, False: 253k]
  ------------------
 3270|  44.2k|        if (f->out_cdf.progress)
  ------------------
  |  Branch (3270:13): [True: 40.8k, False: 3.47k]
  ------------------
 3271|  44.2k|            atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR);
  ------------------
  |  Branch (3271:13): [True: 8.51k, False: 32.2k]
  ------------------
 3272|  44.2k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3273|  44.2k|    }
 3274|   321k|    dav1d_ref_dec(&f->cur_segmap_ref);
 3275|   321k|    dav1d_ref_dec(&f->prev_segmap_ref);
 3276|   321k|    dav1d_ref_dec(&f->mvs_ref);
 3277|   321k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3278|   321k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3279|       |
 3280|   603k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3280:21): [True: 282k, False: 321k]
  ------------------
 3281|   282k|        dav1d_data_unref_internal(&f->tile[i].data);
 3282|   321k|    f->task_thread.retval = retval;
 3283|   321k|}
dav1d_submit_frame:
 3327|   285k|int dav1d_submit_frame(Dav1dContext *const c) {
 3328|   285k|    Dav1dFrameContext *f;
 3329|   285k|    int res = -1;
 3330|       |
 3331|       |    // wait for c->out_delayed[next] and move into c->out if visible
 3332|   285k|    Dav1dThreadPicture *out_delayed;
 3333|   285k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (3333:9): [True: 285k, False: 0]
  ------------------
 3334|   285k|        pthread_mutex_lock(&c->task_thread.lock);
 3335|   285k|        const unsigned next = c->frame_thread.next++;
 3336|   285k|        if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (3336:13): [True: 68.2k, False: 217k]
  ------------------
 3337|  68.2k|            c->frame_thread.next = 0;
 3338|       |
 3339|   285k|        f = &c->fc[next];
 3340|   377k|        while (f->n_tile_data > 0)
  ------------------
  |  Branch (3340:16): [True: 92.4k, False: 285k]
  ------------------
 3341|  92.4k|            pthread_cond_wait(&f->task_thread.cond,
 3342|  92.4k|                              &c->task_thread.lock);
 3343|   285k|        out_delayed = &c->frame_thread.out_delayed[next];
 3344|   285k|        if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (3344:13): [True: 265k, False: 19.7k]
  |  Branch (3344:39): [True: 2.76k, False: 16.9k]
  ------------------
 3345|   268k|            unsigned first = atomic_load(&c->task_thread.first);
 3346|   268k|            if (first + 1U < c->n_fc)
  ------------------
  |  Branch (3346:17): [True: 201k, False: 66.6k]
  ------------------
 3347|   268k|                atomic_fetch_add(&c->task_thread.first, 1U);
 3348|  66.6k|            else
 3349|   268k|                atomic_store(&c->task_thread.first, 0);
 3350|   268k|            atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 3351|   268k|                                           &first, UINT_MAX);
 3352|   268k|            if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (3352:17): [True: 266k, False: 2.41k]
  |  Branch (3352:39): [True: 171k, False: 94.6k]
  ------------------
 3353|   171k|                c->task_thread.cur--;
 3354|   268k|        }
 3355|   285k|        const int error = f->task_thread.retval;
 3356|   285k|        if (error) {
  ------------------
  |  Branch (3356:13): [True: 139k, False: 145k]
  ------------------
 3357|   139k|            f->task_thread.retval = 0;
 3358|   139k|            c->cached_error = error;
 3359|   139k|            dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 3360|   139k|            dav1d_thread_picture_unref(out_delayed);
 3361|   145k|        } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (3361:20): [True: 126k, False: 19.7k]
  ------------------
 3362|   126k|            const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 3363|   126k|                                                           memory_order_relaxed);
 3364|   126k|            if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (3364:18): [True: 121k, False: 5.10k]
  |  Branch (3364:42): [True: 0, False: 5.10k]
  ------------------
 3365|   121k|                progress != FRAME_ERROR)
  ------------------
  |  |   35|   121k|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (3365:17): [True: 119k, False: 1.81k]
  ------------------
 3366|   119k|            {
 3367|   119k|                dav1d_thread_picture_ref(&c->out, out_delayed);
 3368|   119k|                c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 3369|   119k|            }
 3370|   126k|            dav1d_thread_picture_unref(out_delayed);
 3371|   126k|        }
 3372|   285k|    } else {
 3373|      0|        f = c->fc;
 3374|      0|    }
 3375|       |
 3376|   285k|    f->seq_hdr = c->seq_hdr;
 3377|   285k|    f->seq_hdr_ref = c->seq_hdr_ref;
 3378|   285k|    dav1d_ref_inc(f->seq_hdr_ref);
 3379|   285k|    f->frame_hdr = c->frame_hdr;
 3380|   285k|    f->frame_hdr_ref = c->frame_hdr_ref;
 3381|   285k|    c->frame_hdr = NULL;
 3382|   285k|    c->frame_hdr_ref = NULL;
 3383|   285k|    f->dsp = &c->dsp[f->seq_hdr->hbd];
 3384|       |
 3385|   285k|    const int bpc = 8 + 2 * f->seq_hdr->hbd;
 3386|       |
 3387|   285k|    if (!f->dsp->ipred.intra_pred[DC_PRED]) {
  ------------------
  |  Branch (3387:9): [True: 9.21k, False: 276k]
  ------------------
 3388|  9.21k|        Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
 3389|       |
 3390|  9.21k|        switch (bpc) {
 3391|      0|#define assign_bitdepth_case(bd) \
 3392|      0|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
 3393|      0|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
 3394|      0|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
 3395|      0|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
 3396|      0|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
 3397|      0|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
 3398|      0|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
 3399|      0|            break
 3400|      0|#if CONFIG_8BPC
 3401|  3.49k|        case 8:
  ------------------
  |  Branch (3401:9): [True: 3.49k, False: 5.72k]
  ------------------
 3402|  3.49k|            assign_bitdepth_case(8);
  ------------------
  |  | 3392|  3.49k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  3.49k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  3.49k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  3.49k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  3.49k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  3.49k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  3.49k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  3.49k|            break
  ------------------
 3403|      0|#endif
 3404|      0|#if CONFIG_16BPC
 3405|  2.32k|        case 10:
  ------------------
  |  Branch (3405:9): [True: 2.32k, False: 6.89k]
  ------------------
 3406|  5.72k|        case 12:
  ------------------
  |  Branch (3406:9): [True: 3.39k, False: 5.82k]
  ------------------
 3407|  5.72k|            assign_bitdepth_case(16);
  ------------------
  |  | 3392|  5.72k|            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
  |  | 3393|  5.72k|            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
  |  | 3394|  5.72k|            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
  |  | 3395|  5.72k|            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
  |  | 3396|  5.72k|            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
  |  | 3397|  5.72k|            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
  |  | 3398|  5.72k|            dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
  |  | 3399|  5.72k|            break
  ------------------
 3408|      0|#endif
 3409|      0|#undef assign_bitdepth_case
 3410|      0|        default:
  ------------------
  |  Branch (3410:9): [True: 0, False: 9.21k]
  ------------------
 3411|      0|            dav1d_log(c, "Compiled without support for %d-bit decoding\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
 3412|      0|                    8 + 2 * f->seq_hdr->hbd);
 3413|      0|            res = DAV1D_ERR(ENOPROTOOPT);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3414|      0|            goto error;
 3415|  9.21k|        }
 3416|  9.21k|    }
 3417|       |
 3418|   285k|#define assign_bitdepth_case(bd) \
 3419|   285k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
 3420|   285k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
 3421|   285k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
 3422|   285k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
 3423|   285k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
 3424|   285k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
 3425|   285k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
 3426|   285k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
 3427|   285k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
 3428|   285k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
 3429|   285k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
 3430|   285k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
 3431|   285k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
 3432|   285k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
 3433|   285k|    if (!f->seq_hdr->hbd) {
  ------------------
  |  Branch (3433:9): [True: 117k, False: 167k]
  ------------------
 3434|   117k|#if CONFIG_8BPC
 3435|   117k|        assign_bitdepth_case(8);
  ------------------
  |  | 3419|   117k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|   117k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|   117k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|   117k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|   117k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|   117k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|   117k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|   117k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|   117k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|   117k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|   117k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|   117k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|   117k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|   117k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3436|   117k|#endif
 3437|   167k|    } else {
 3438|   167k|#if CONFIG_16BPC
 3439|   167k|        assign_bitdepth_case(16);
  ------------------
  |  | 3419|   167k|        f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
  |  | 3420|   167k|        f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
  |  | 3421|   167k|        f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
  |  | 3422|   167k|        f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
  |  | 3423|   167k|        f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
  |  | 3424|   167k|        f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
  |  | 3425|   167k|        f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
  |  | 3426|   167k|        f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
  |  | 3427|   167k|        f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
  |  | 3428|   167k|        f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
  |  | 3429|   167k|        f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
  |  | 3430|   167k|        f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
  |  | 3431|   167k|        f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
  |  | 3432|   167k|        f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
  ------------------
 3440|   167k|#endif
 3441|   167k|    }
 3442|   285k|#undef assign_bitdepth_case
 3443|       |
 3444|   285k|    int ref_coded_width[7];
 3445|   285k|    if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|   285k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 85.5k, False: 199k]
  |  |  ------------------
  ------------------
 3446|  85.5k|        if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  85.5k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3446:13): [True: 65.1k, False: 20.3k]
  ------------------
 3447|  65.1k|            const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3448|  65.1k|            if (!c->refs[pri_ref].p.p.data[0]) {
  ------------------
  |  Branch (3448:17): [True: 431, False: 64.7k]
  ------------------
 3449|    431|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    431|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3450|    431|                goto error;
 3451|    431|            }
 3452|  65.1k|        }
 3453|   662k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3453:25): [True: 580k, False: 82.3k]
  ------------------
 3454|   580k|            const int refidx = f->frame_hdr->refidx[i];
 3455|   580k|            if (!c->refs[refidx].p.p.data[0] ||
  ------------------
  |  Branch (3455:17): [True: 372, False: 580k]
  ------------------
 3456|   580k|                f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3456:17): [True: 440, False: 579k]
  ------------------
 3457|   579k|                f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
  ------------------
  |  Branch (3457:17): [True: 1.28k, False: 578k]
  ------------------
 3458|   578k|                f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
  ------------------
  |  Branch (3458:17): [True: 520, False: 577k]
  ------------------
 3459|   577k|                f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
  ------------------
  |  Branch (3459:17): [True: 198, False: 577k]
  ------------------
 3460|   577k|                f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
  ------------------
  |  Branch (3460:17): [True: 0, False: 577k]
  ------------------
 3461|   577k|                bpc != c->refs[refidx].p.p.p.bpc)
  ------------------
  |  Branch (3461:17): [True: 0, False: 577k]
  ------------------
 3462|  2.81k|            {
 3463|  4.28k|                for (int j = 0; j < i; j++)
  ------------------
  |  Branch (3463:33): [True: 1.47k, False: 2.81k]
  ------------------
 3464|  1.47k|                    dav1d_thread_picture_unref(&f->refp[j]);
 3465|  2.81k|                res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  2.81k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3466|  2.81k|                goto error;
 3467|  2.81k|            }
 3468|   577k|            dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
 3469|   577k|            ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
 3470|   577k|            if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
  ------------------
  |  Branch (3470:17): [True: 101k, False: 476k]
  ------------------
 3471|   476k|                f->frame_hdr->height != c->refs[refidx].p.p.p.h)
  ------------------
  |  Branch (3471:17): [True: 2.18k, False: 474k]
  ------------------
 3472|   103k|            {
 3473|   103k|#define scale_fac(ref_sz, this_sz) \
 3474|   103k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
 3475|   103k|                f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
  ------------------
  |  | 3474|   103k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3476|   103k|                                               f->frame_hdr->width[0]);
 3477|   103k|                f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
  ------------------
  |  | 3474|   103k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3478|   103k|                                               f->frame_hdr->height);
 3479|   103k|                f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
 3480|   103k|                f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
 3481|   474k|            } else {
 3482|   474k|                f->svc[i][0].scale = f->svc[i][1].scale = 0;
 3483|   474k|            }
 3484|   577k|            f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
  ------------------
  |  Branch (3484:38): [True: 21.2k, False: 556k]
  ------------------
 3485|  21.2k|                                     !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (3485:38): [True: 18.5k, False: 2.79k]
  ------------------
 3486|  18.5k|                                     !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
  ------------------
  |  Branch (3486:38): [True: 16.9k, False: 1.54k]
  ------------------
 3487|  16.9k|                                     !f->svc[i][0].scale;
  ------------------
  |  Branch (3487:38): [True: 9.85k, False: 7.10k]
  ------------------
 3488|   577k|        }
 3489|  85.1k|    }
 3490|       |
 3491|       |    // setup entropy
 3492|   282k|    if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|   282k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (3492:9): [True: 219k, False: 62.2k]
  ------------------
 3493|   219k|        dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
 3494|   219k|    } else {
 3495|  62.2k|        const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
 3496|  62.2k|        dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
 3497|  62.2k|    }
 3498|   282k|    if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3498:9): [True: 40.8k, False: 241k]
  ------------------
 3499|  40.8k|        res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1);
 3500|  40.8k|        if (res < 0) goto error;
  ------------------
  |  Branch (3500:13): [True: 0, False: 40.8k]
  ------------------
 3501|  40.8k|    }
 3502|       |
 3503|       |    // FIXME qsort so tiles are in order (for frame threading)
 3504|   282k|    if (f->n_tile_data_alloc < c->n_tile_data) {
  ------------------
  |  Branch (3504:9): [True: 16.8k, False: 265k]
  ------------------
 3505|  16.8k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  16.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
 3506|  16.8k|        assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
  ------------------
  |  Branch (3506:9): [True: 16.8k, False: 0]
  ------------------
 3507|  16.8k|        f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile));
  ------------------
  |  |  132|  16.8k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 3508|  16.8k|        if (!f->tile) {
  ------------------
  |  Branch (3508:13): [True: 0, False: 16.8k]
  ------------------
 3509|      0|            f->n_tile_data_alloc = f->n_tile_data = 0;
 3510|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3511|      0|            goto error;
 3512|      0|        }
 3513|  16.8k|        f->n_tile_data_alloc = c->n_tile_data;
 3514|  16.8k|    }
 3515|   282k|    memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
 3516|   282k|    memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
 3517|   282k|    f->n_tile_data = c->n_tile_data;
 3518|   282k|    c->n_tile_data = 0;
 3519|       |
 3520|       |    // allocate frame
 3521|   282k|    res = dav1d_thread_picture_alloc(c, f, bpc);
 3522|   282k|    if (res < 0) goto error;
  ------------------
  |  Branch (3522:9): [True: 0, False: 282k]
  ------------------
 3523|       |
 3524|   282k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3524:9): [True: 24.1k, False: 258k]
  ------------------
 3525|  24.1k|        res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
 3526|  24.1k|        if (res < 0) goto error;
  ------------------
  |  Branch (3526:13): [True: 0, False: 24.1k]
  ------------------
 3527|   258k|    } else {
 3528|   258k|        dav1d_picture_ref(&f->cur, &f->sr_cur.p);
 3529|   258k|    }
 3530|       |
 3531|   282k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (3531:9): [True: 24.1k, False: 258k]
  ------------------
 3532|  24.1k|        f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
  ------------------
  |  | 3474|  24.1k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3533|  24.1k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 3534|  24.1k|        const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
 3535|  24.1k|        const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 3536|  24.1k|        f->resize_step[1] = scale_fac(in_cw, out_cw);
  ------------------
  |  | 3474|  24.1k|    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
  ------------------
 3537|  24.1k|#undef scale_fac
 3538|  24.1k|        f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
 3539|  24.1k|        f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
 3540|  24.1k|    }
 3541|       |
 3542|       |    // move f->cur into output queue
 3543|   282k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3543:9): [True: 0, False: 282k]
  ------------------
 3544|      0|        if (f->frame_hdr->show_frame || c->output_invisible_frames) {
  ------------------
  |  Branch (3544:13): [True: 0, False: 0]
  |  Branch (3544:41): [True: 0, False: 0]
  ------------------
 3545|      0|            dav1d_thread_picture_ref(&c->out, &f->sr_cur);
 3546|      0|            c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
 3547|      0|        }
 3548|   282k|    } else {
 3549|   282k|        dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
 3550|   282k|    }
 3551|       |
 3552|   282k|    f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
 3553|   282k|    f->h4 = (f->frame_hdr->height + 3) >> 2;
 3554|   282k|    f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
 3555|   282k|    f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
 3556|   282k|    f->sb128w = (f->bw + 31) >> 5;
 3557|   282k|    f->sb128h = (f->bh + 31) >> 5;
 3558|   282k|    f->sb_shift = 4 + f->seq_hdr->sb128;
 3559|   282k|    f->sb_step = 16 << f->seq_hdr->sb128;
 3560|   282k|    f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
 3561|   282k|    f->b4_stride = (f->bw + 31) & ~31;
 3562|   282k|    f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
 3563|   282k|    atomic_init(&f->task_thread.error, 0);
 3564|   282k|    const int uses_2pass = c->n_fc > 1;
 3565|   282k|    const int cols = f->frame_hdr->tiling.cols;
 3566|   282k|    const int rows = f->frame_hdr->tiling.rows;
 3567|   282k|    atomic_store(&f->task_thread.task_counter,
 3568|   282k|                 (cols * rows + f->sbh) << uses_2pass);
 3569|       |
 3570|       |    // ref_mvs
 3571|   282k|    if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
  ------------------
  |  |   36|   564k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 82.3k, False: 199k]
  |  |  ------------------
  ------------------
  |  Branch (3571:45): [True: 173k, False: 26.3k]
  ------------------
 3572|   255k|        f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
 3573|   255k|            sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
 3574|   255k|        if (!f->mvs_ref) {
  ------------------
  |  Branch (3574:13): [True: 0, False: 255k]
  ------------------
 3575|      0|            res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3576|      0|            goto error;
 3577|      0|        }
 3578|   255k|        f->mvs = f->mvs_ref->data;
 3579|   255k|        if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3579:13): [True: 82.3k, False: 173k]
  ------------------
 3580|   658k|            for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (3580:29): [True: 576k, False: 82.3k]
  ------------------
 3581|   576k|                f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
 3582|   173k|        } else {
 3583|   173k|            memset(f->refpoc, 0, sizeof(f->refpoc));
 3584|   173k|        }
 3585|   255k|        if (f->frame_hdr->use_ref_frame_mvs) {
  ------------------
  |  Branch (3585:13): [True: 61.8k, False: 193k]
  ------------------
 3586|   494k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3586:29): [True: 432k, False: 61.8k]
  ------------------
 3587|   432k|                const int refidx = f->frame_hdr->refidx[i];
 3588|   432k|                const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1;
 3589|   432k|                const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1;
 3590|   432k|                if (c->refs[refidx].refmvs != NULL &&
  ------------------
  |  Branch (3590:21): [True: 353k, False: 79.5k]
  ------------------
 3591|   353k|                    ref_w == f->bw && ref_h == f->bh)
  ------------------
  |  Branch (3591:21): [True: 347k, False: 5.79k]
  |  Branch (3591:39): [True: 347k, False: 367]
  ------------------
 3592|   347k|                {
 3593|   347k|                    f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
 3594|   347k|                    dav1d_ref_inc(f->ref_mvs_ref[i]);
 3595|   347k|                    f->ref_mvs[i] = c->refs[refidx].refmvs->data;
 3596|   347k|                } else {
 3597|  85.7k|                    f->ref_mvs[i] = NULL;
 3598|  85.7k|                    f->ref_mvs_ref[i] = NULL;
 3599|  85.7k|                }
 3600|   432k|                memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
 3601|   432k|                       sizeof(*f->refrefpoc));
 3602|   432k|            }
 3603|   193k|        } else {
 3604|   193k|            memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3605|   193k|        }
 3606|   255k|    } else {
 3607|  26.3k|        f->mvs_ref = NULL;
 3608|  26.3k|        memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
 3609|  26.3k|    }
 3610|       |
 3611|       |    // segmap
 3612|   282k|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (3612:9): [True: 17.1k, False: 265k]
  ------------------
 3613|       |        // By default, the previous segmentation map is not initialised.
 3614|  17.1k|        f->prev_segmap_ref = NULL;
 3615|  17.1k|        f->prev_segmap = NULL;
 3616|       |
 3617|       |        // We might need a previous frame's segmentation map. This
 3618|       |        // happens if there is either no update or a temporal update.
 3619|  17.1k|        if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3619:13): [True: 5.87k, False: 11.3k]
  |  Branch (3619:52): [True: 7.35k, False: 3.96k]
  ------------------
 3620|  13.2k|            const int pri_ref = f->frame_hdr->primary_ref_frame;
 3621|  13.2k|            assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (3621:13): [True: 13.2k, False: 0]
  ------------------
 3622|  13.2k|            const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
 3623|  13.2k|            const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
 3624|  13.2k|            if (ref_w == f->bw && ref_h == f->bh) {
  ------------------
  |  Branch (3624:17): [True: 11.7k, False: 1.49k]
  |  Branch (3624:35): [True: 10.5k, False: 1.19k]
  ------------------
 3625|  10.5k|                f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
 3626|  10.5k|                if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3626:21): [True: 8.94k, False: 1.58k]
  ------------------
 3627|  8.94k|                    dav1d_ref_inc(f->prev_segmap_ref);
 3628|  8.94k|                    f->prev_segmap = f->prev_segmap_ref->data;
 3629|  8.94k|                }
 3630|  10.5k|            }
 3631|  13.2k|        }
 3632|       |
 3633|  17.1k|        if (f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (3633:13): [True: 9.83k, False: 7.35k]
  ------------------
 3634|       |            // We're updating an existing map, but need somewhere to
 3635|       |            // put the new values. Allocate them here (the data
 3636|       |            // actually gets set elsewhere)
 3637|  9.83k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
 3638|  9.83k|                sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
 3639|  9.83k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3639:17): [True: 0, False: 9.83k]
  ------------------
 3640|      0|                dav1d_ref_dec(&f->prev_segmap_ref);
 3641|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3642|      0|                goto error;
 3643|      0|            }
 3644|  9.83k|            f->cur_segmap = f->cur_segmap_ref->data;
 3645|  9.83k|        } else if (f->prev_segmap_ref) {
  ------------------
  |  Branch (3645:20): [True: 5.12k, False: 2.22k]
  ------------------
 3646|       |            // We're not updating an existing map, and we have a valid
 3647|       |            // reference. Use that.
 3648|  5.12k|            f->cur_segmap_ref = f->prev_segmap_ref;
 3649|  5.12k|            dav1d_ref_inc(f->cur_segmap_ref);
 3650|  5.12k|            f->cur_segmap = f->prev_segmap_ref->data;
 3651|  5.12k|        } else {
 3652|       |            // We need to make a new map. Allocate one here and zero it out.
 3653|  2.22k|            const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
 3654|  2.22k|            f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
 3655|  2.22k|            if (!f->cur_segmap_ref) {
  ------------------
  |  Branch (3655:17): [True: 0, False: 2.22k]
  ------------------
 3656|      0|                res = DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 3657|      0|                goto error;
 3658|      0|            }
 3659|  2.22k|            f->cur_segmap = f->cur_segmap_ref->data;
 3660|  2.22k|            memset(f->cur_segmap, 0, segmap_size);
 3661|  2.22k|        }
 3662|   265k|    } else {
 3663|   265k|        f->cur_segmap = NULL;
 3664|   265k|        f->cur_segmap_ref = NULL;
 3665|   265k|        f->prev_segmap_ref = NULL;
 3666|   265k|    }
 3667|       |
 3668|       |    // update references etc.
 3669|   282k|    const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
 3670|  2.53M|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3670:21): [True: 2.25M, False: 282k]
  ------------------
 3671|  2.25M|        if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3671:13): [True: 1.89M, False: 360k]
  ------------------
 3672|  1.89M|            if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3672:17): [True: 1.79M, False: 103k]
  ------------------
 3673|  1.79M|                dav1d_thread_picture_unref(&c->refs[i].p);
 3674|  1.89M|            dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
 3675|       |
 3676|  1.89M|            dav1d_cdf_thread_unref(&c->cdf[i]);
 3677|  1.89M|            if (f->frame_hdr->refresh_context) {
  ------------------
  |  Branch (3677:17): [True: 147k, False: 1.75M]
  ------------------
 3678|   147k|                dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
 3679|  1.75M|            } else {
 3680|  1.75M|                dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
 3681|  1.75M|            }
 3682|       |
 3683|  1.89M|            dav1d_ref_dec(&c->refs[i].segmap);
 3684|  1.89M|            c->refs[i].segmap = f->cur_segmap_ref;
 3685|  1.89M|            if (f->cur_segmap_ref)
  ------------------
  |  Branch (3685:17): [True: 85.3k, False: 1.81M]
  ------------------
 3686|  85.3k|                dav1d_ref_inc(f->cur_segmap_ref);
 3687|  1.89M|            dav1d_ref_dec(&c->refs[i].refmvs);
 3688|  1.89M|            if (!f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (3688:17): [True: 511k, False: 1.38M]
  ------------------
 3689|   511k|                c->refs[i].refmvs = f->mvs_ref;
 3690|   511k|                if (f->mvs_ref)
  ------------------
  |  Branch (3690:21): [True: 307k, False: 203k]
  ------------------
 3691|   307k|                    dav1d_ref_inc(f->mvs_ref);
 3692|   511k|            }
 3693|  1.89M|            memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
 3694|  1.89M|        }
 3695|  2.25M|    }
 3696|       |
 3697|   282k|    if (c->n_fc == 1) {
  ------------------
  |  Branch (3697:9): [True: 0, False: 282k]
  ------------------
 3698|      0|        if ((res = dav1d_decode_frame(f)) < 0) {
  ------------------
  |  Branch (3698:13): [True: 0, False: 0]
  ------------------
 3699|      0|            dav1d_thread_picture_unref(&c->out);
 3700|      0|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (3700:29): [True: 0, False: 0]
  ------------------
 3701|      0|                if (refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (3701:21): [True: 0, False: 0]
  ------------------
 3702|      0|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (3702:25): [True: 0, False: 0]
  ------------------
 3703|      0|                        dav1d_thread_picture_unref(&c->refs[i].p);
 3704|      0|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 3705|      0|                    dav1d_ref_dec(&c->refs[i].segmap);
 3706|      0|                    dav1d_ref_dec(&c->refs[i].refmvs);
 3707|      0|                }
 3708|      0|            }
 3709|      0|            goto error;
 3710|      0|        }
 3711|   282k|    } else {
 3712|   282k|        dav1d_task_frame_init(f);
 3713|   282k|        pthread_mutex_unlock(&c->task_thread.lock);
 3714|   282k|    }
 3715|       |
 3716|   282k|    return 0;
 3717|  3.24k|error:
 3718|  3.24k|    atomic_init(&f->task_thread.error, 1);
 3719|  3.24k|    dav1d_cdf_thread_unref(&f->in_cdf);
 3720|  3.24k|    if (f->frame_hdr->refresh_context)
  ------------------
  |  Branch (3720:9): [True: 2.66k, False: 579]
  ------------------
 3721|  2.66k|        dav1d_cdf_thread_unref(&f->out_cdf);
 3722|  25.9k|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (3722:21): [True: 22.7k, False: 3.24k]
  ------------------
 3723|  22.7k|        if (f->refp[i].p.frame_hdr)
  ------------------
  |  Branch (3723:13): [True: 0, False: 22.7k]
  ------------------
 3724|      0|            dav1d_thread_picture_unref(&f->refp[i]);
 3725|  22.7k|        dav1d_ref_dec(&f->ref_mvs_ref[i]);
 3726|  22.7k|    }
 3727|  3.24k|    if (c->n_fc == 1)
  ------------------
  |  Branch (3727:9): [True: 0, False: 3.24k]
  ------------------
 3728|      0|        dav1d_thread_picture_unref(&c->out);
 3729|  3.24k|    else
 3730|  3.24k|        dav1d_thread_picture_unref(out_delayed);
 3731|  3.24k|    dav1d_picture_unref_internal(&f->cur);
 3732|  3.24k|    dav1d_thread_picture_unref(&f->sr_cur);
 3733|  3.24k|    dav1d_ref_dec(&f->mvs_ref);
 3734|  3.24k|    dav1d_ref_dec(&f->seq_hdr_ref);
 3735|  3.24k|    dav1d_ref_dec(&f->frame_hdr_ref);
 3736|  3.24k|    dav1d_data_props_copy(&c->cached_error_props, &c->in.m);
 3737|       |
 3738|  3.24k|    for (int i = 0; i < f->n_tile_data; i++)
  ------------------
  |  Branch (3738:21): [True: 0, False: 3.24k]
  ------------------
 3739|      0|        dav1d_data_unref_internal(&f->tile[i].data);
 3740|  3.24k|    f->n_tile_data = 0;
 3741|       |
 3742|  3.24k|    if (c->n_fc > 1)
  ------------------
  |  Branch (3742:9): [True: 3.24k, False: 0]
  ------------------
 3743|  3.24k|        pthread_mutex_unlock(&c->task_thread.lock);
 3744|       |
 3745|  3.24k|    return res;
 3746|   282k|}
decode.c:reset_context:
 2390|  4.94M|static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
 2391|  4.94M|    memset(ctx->intra, keyframe, sizeof(ctx->intra));
 2392|  4.94M|    memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
 2393|  4.94M|    if (keyframe)
  ------------------
  |  Branch (2393:9): [True: 1.77M, False: 3.16M]
  ------------------
 2394|  1.77M|        memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
 2395|       |
 2396|  4.94M|    if (pass == 2) return;
  ------------------
  |  Branch (2396:9): [True: 2.42M, False: 2.51M]
  ------------------
 2397|       |
 2398|  2.51M|    memset(ctx->partition, 0, sizeof(ctx->partition));
 2399|  2.51M|    memset(ctx->skip, 0, sizeof(ctx->skip));
 2400|  2.51M|    memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
 2401|  2.51M|    memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
 2402|  2.51M|    memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
 2403|  2.51M|    memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
 2404|  2.51M|    memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
 2405|  2.51M|    if (!keyframe) {
  ------------------
  |  Branch (2405:9): [True: 1.59M, False: 916k]
  ------------------
 2406|  1.59M|        memset(ctx->ref, -1, sizeof(ctx->ref));
 2407|  1.59M|        memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
 2408|  1.59M|        memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
 2409|  1.59M|    }
 2410|  2.51M|    memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
 2411|  2.51M|    memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
 2412|  2.51M|    memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
 2413|  2.51M|    memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
 2414|  2.51M|    memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
 2415|  2.51M|}
decode.c:decode_sb:
 2119|  10.6M|{
 2120|  10.6M|    const Dav1dFrameContext *const f = t->f;
 2121|  10.6M|    Dav1dTileState *const ts = t->ts;
 2122|  10.6M|    const int hsz = 16 >> bl;
 2123|  10.6M|    const int have_h_split = f->bw > t->bx + hsz;
 2124|  10.6M|    const int have_v_split = f->bh > t->by + hsz;
 2125|       |
 2126|  10.6M|    if (!have_h_split && !have_v_split) {
  ------------------
  |  Branch (2126:9): [True: 745k, False: 9.94M]
  |  Branch (2126:26): [True: 255k, False: 490k]
  ------------------
 2127|   255k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2127:9): [True: 255k, False: 0]
  ------------------
 2128|   255k|        return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
  ------------------
  |  |   51|   255k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
 2129|   255k|    }
 2130|       |
 2131|  10.4M|    uint16_t *pc;
 2132|  10.4M|    enum BlockPartition bp;
 2133|  10.4M|    int ctx, bx8, by8;
 2134|  10.4M|    if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (2134:9): [True: 6.79M, False: 3.64M]
  ------------------
 2135|  6.79M|        if (0 && bl == BL_64X64)
  ------------------
  |  Branch (2135:13): [Folded, False: 6.79M]
  |  Branch (2135:18): [True: 0, False: 0]
  ------------------
 2136|      0|            printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
 2137|      0|                   f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng);
 2138|  6.79M|        bx8 = (t->bx & 31) >> 1;
 2139|  6.79M|        by8 = (t->by & 31) >> 1;
 2140|  6.79M|        ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
 2141|  6.79M|        pc = ts->cdf.m.partition[bl][ctx];
 2142|  6.79M|    }
 2143|       |
 2144|  10.4M|    if (have_h_split && have_v_split) {
  ------------------
  |  Branch (2144:9): [True: 9.94M, False: 486k]
  |  Branch (2144:25): [True: 9.59M, False: 350k]
  ------------------
 2145|  9.59M|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2145:13): [True: 3.43M, False: 6.15M]
  ------------------
 2146|  3.43M|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2147|  3.43M|            bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
  ------------------
  |  Branch (2147:18): [True: 2.97M, False: 465k]
  ------------------
 2148|  6.15M|        } else {
 2149|  6.15M|            bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc,
  ------------------
  |  |   57|  6.15M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 2150|  6.15M|                                                  dav1d_partition_type_count[bl]);
 2151|  6.15M|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
  ------------------
  |  Branch (2151:17): [True: 56.1k, False: 6.10M]
  ------------------
 2152|  56.1k|                (bp == PARTITION_V || bp == PARTITION_V4 ||
  ------------------
  |  Branch (2152:18): [True: 433, False: 55.7k]
  |  Branch (2152:39): [True: 1.04k, False: 54.7k]
  ------------------
 2153|  54.7k|                 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
  ------------------
  |  Branch (2153:18): [True: 37, False: 54.6k]
  |  Branch (2153:50): [True: 34.7k, False: 19.9k]
  ------------------
 2154|  36.2k|            {
 2155|  36.2k|                return 1;
 2156|  36.2k|            }
 2157|  6.12M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  6.12M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 6.12M]
  |  |  ------------------
  |  |   35|  6.12M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  6.12M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2158|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2159|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
 2160|      0|                       ts->msac.rng);
 2161|  6.12M|        }
 2162|  9.56M|        const uint8_t *const b = dav1d_block_sizes[bl][bp];
 2163|       |
 2164|  9.56M|        switch (bp) {
 2165|  5.27M|        case PARTITION_NONE:
  ------------------
  |  Branch (2165:9): [True: 5.27M, False: 4.28M]
  ------------------
 2166|  5.27M|            if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
  ------------------
  |  Branch (2166:17): [True: 1.66k, False: 5.27M]
  ------------------
 2167|  1.66k|                return -1;
 2168|  5.27M|            break;
 2169|  5.27M|        case PARTITION_H:
  ------------------
  |  Branch (2169:9): [True: 821k, False: 8.73M]
  ------------------
 2170|   821k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
  ------------------
  |  Branch (2170:17): [True: 336, False: 821k]
  ------------------
 2171|    336|                return -1;
 2172|   821k|            t->by += hsz;
 2173|   821k|            if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
  ------------------
  |  Branch (2173:17): [True: 169, False: 820k]
  ------------------
 2174|    169|                return -1;
 2175|   820k|            t->by -= hsz;
 2176|   820k|            break;
 2177|   577k|        case PARTITION_V:
  ------------------
  |  Branch (2177:9): [True: 577k, False: 8.98M]
  ------------------
 2178|   577k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
  ------------------
  |  Branch (2178:17): [True: 1.44k, False: 576k]
  ------------------
 2179|  1.44k|                return -1;
 2180|   576k|            t->bx += hsz;
 2181|   576k|            if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
  ------------------
  |  Branch (2181:17): [True: 118, False: 576k]
  ------------------
 2182|    118|                return -1;
 2183|   576k|            t->bx -= hsz;
 2184|   576k|            break;
 2185|  1.72M|        case PARTITION_SPLIT:
  ------------------
  |  Branch (2185:9): [True: 1.72M, False: 7.83M]
  ------------------
 2186|  1.72M|            if (bl == BL_8X8) {
  ------------------
  |  Branch (2186:17): [True: 246k, False: 1.47M]
  ------------------
 2187|   246k|                const EdgeTip *const tip = (const EdgeTip *) node;
 2188|   246k|                assert(hsz == 1);
  ------------------
  |  Branch (2188:17): [True: 246k, False: 18.4E]
  ------------------
 2189|   246k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2189:21): [True: 106, False: 246k]
  ------------------
 2190|    106|                    return -1;
 2191|   246k|                const enum Filter2d tl_filter = t->tl_4x4_filter;
 2192|   246k|                t->bx++;
 2193|   246k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
  ------------------
  |  Branch (2193:21): [True: 228, False: 246k]
  ------------------
 2194|    228|                    return -1;
 2195|   246k|                t->bx--;
 2196|   246k|                t->by++;
 2197|   246k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
  ------------------
  |  Branch (2197:21): [True: 147, False: 245k]
  ------------------
 2198|    147|                    return -1;
 2199|   245k|                t->bx++;
 2200|   245k|                t->tl_4x4_filter = tl_filter;
 2201|   245k|                if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
  ------------------
  |  Branch (2201:21): [True: 78, False: 245k]
  ------------------
 2202|     78|                    return -1;
 2203|   245k|                t->bx--;
 2204|   245k|                t->by--;
 2205|   245k|#if ARCH_X86_64
 2206|   245k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (2206:21): [True: 245k, False: 18.4E]
  ------------------
 2207|       |                    /* In 8-bit mode with 2-pass decoding the coefficient buffer
 2208|       |                     * can end up misaligned due to skips here. Work around
 2209|       |                     * the issue by explicitly realigning the buffer. */
 2210|   245k|                    const int p = t->frame_thread.pass & 1;
 2211|   245k|                    ts->frame_thread[p].cf =
 2212|   245k|                        (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63);
 2213|   245k|                }
 2214|   245k|#endif
 2215|  1.47M|            } else {
 2216|  1.47M|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
  ------------------
  |  |   51|  1.47M|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2216:21): [True: 3.30k, False: 1.47M]
  ------------------
 2217|  3.30k|                    return 1;
 2218|  1.47M|                t->bx += hsz;
 2219|  1.47M|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
  ------------------
  |  |   51|  1.47M|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2219:21): [True: 2.45k, False: 1.47M]
  ------------------
 2220|  2.45k|                    return 1;
 2221|  1.47M|                t->bx -= hsz;
 2222|  1.47M|                t->by += hsz;
 2223|  1.47M|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
  ------------------
  |  |   51|  1.47M|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2223:21): [True: 2.67k, False: 1.47M]
  ------------------
 2224|  2.67k|                    return 1;
 2225|  1.47M|                t->bx += hsz;
 2226|  1.47M|                if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
  ------------------
  |  |   51|  1.47M|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2226:21): [True: 2.55k, False: 1.46M]
  ------------------
 2227|  2.55k|                    return 1;
 2228|  1.46M|                t->bx -= hsz;
 2229|  1.46M|                t->by -= hsz;
 2230|  1.46M|            }
 2231|  1.71M|            break;
 2232|  1.71M|        case PARTITION_T_TOP_SPLIT: {
  ------------------
  |  Branch (2232:9): [True: 142k, False: 9.41M]
  ------------------
 2233|   142k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2233:17): [True: 202, False: 142k]
  ------------------
 2234|    202|                return -1;
 2235|   142k|            t->bx += hsz;
 2236|   142k|            if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
  ------------------
  |  Branch (2236:17): [True: 423, False: 142k]
  ------------------
 2237|    423|                return -1;
 2238|   142k|            t->bx -= hsz;
 2239|   142k|            t->by += hsz;
 2240|   142k|            if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
  ------------------
  |  Branch (2240:17): [True: 374, False: 141k]
  ------------------
 2241|    374|                return -1;
 2242|   141k|            t->by -= hsz;
 2243|   141k|            break;
 2244|   142k|        }
 2245|   125k|        case PARTITION_T_BOTTOM_SPLIT: {
  ------------------
  |  Branch (2245:9): [True: 125k, False: 9.43M]
  ------------------
 2246|   125k|            if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
  ------------------
  |  Branch (2246:17): [True: 174, False: 125k]
  ------------------
 2247|    174|                return -1;
 2248|   125k|            t->by += hsz;
 2249|   125k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
  ------------------
  |  Branch (2249:17): [True: 355, False: 124k]
  ------------------
 2250|    355|                return -1;
 2251|   124k|            t->bx += hsz;
 2252|   124k|            if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
  ------------------
  |  Branch (2252:17): [True: 83, False: 124k]
  ------------------
 2253|     83|                return -1;
 2254|   124k|            t->bx -= hsz;
 2255|   124k|            t->by -= hsz;
 2256|   124k|            break;
 2257|   124k|        }
 2258|  93.8k|        case PARTITION_T_LEFT_SPLIT: {
  ------------------
  |  Branch (2258:9): [True: 93.8k, False: 9.46M]
  ------------------
 2259|  93.8k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
  ------------------
  |  Branch (2259:17): [True: 349, False: 93.4k]
  ------------------
 2260|    349|                return -1;
 2261|  93.4k|            t->by += hsz;
 2262|  93.4k|            if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
  ------------------
  |  Branch (2262:17): [True: 423, False: 93.0k]
  ------------------
 2263|    423|                return -1;
 2264|  93.0k|            t->by -= hsz;
 2265|  93.0k|            t->bx += hsz;
 2266|  93.0k|            if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
  ------------------
  |  Branch (2266:17): [True: 236, False: 92.8k]
  ------------------
 2267|    236|                return -1;
 2268|  92.8k|            t->bx -= hsz;
 2269|  92.8k|            break;
 2270|  93.0k|        }
 2271|   226k|        case PARTITION_T_RIGHT_SPLIT: {
  ------------------
  |  Branch (2271:9): [True: 226k, False: 9.33M]
  ------------------
 2272|   226k|            if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
  ------------------
  |  Branch (2272:17): [True: 1.13k, False: 225k]
  ------------------
 2273|  1.13k|                return -1;
 2274|   225k|            t->bx += hsz;
 2275|   225k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
  ------------------
  |  Branch (2275:17): [True: 613, False: 224k]
  ------------------
 2276|    613|                return -1;
 2277|   224k|            t->by += hsz;
 2278|   224k|            if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
  ------------------
  |  Branch (2278:17): [True: 209, False: 224k]
  ------------------
 2279|    209|                return -1;
 2280|   224k|            t->by -= hsz;
 2281|   224k|            t->bx -= hsz;
 2282|   224k|            break;
 2283|   224k|        }
 2284|   313k|        case PARTITION_H4: {
  ------------------
  |  Branch (2284:9): [True: 313k, False: 9.24M]
  ------------------
 2285|   313k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2286|   313k|            if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
  ------------------
  |  Branch (2286:17): [True: 310, False: 312k]
  ------------------
 2287|    310|                return -1;
 2288|   312k|            t->by += hsz >> 1;
 2289|   312k|            if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
  ------------------
  |  Branch (2289:17): [True: 216, False: 312k]
  ------------------
 2290|    216|                return -1;
 2291|   312k|            t->by += hsz >> 1;
 2292|   312k|            if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
  ------------------
  |  Branch (2292:17): [True: 100, False: 312k]
  ------------------
 2293|    100|                return -1;
 2294|   312k|            t->by += hsz >> 1;
 2295|   312k|            if (t->by < f->bh)
  ------------------
  |  Branch (2295:17): [True: 305k, False: 7.16k]
  ------------------
 2296|   305k|                if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
  ------------------
  |  Branch (2296:21): [True: 187, False: 305k]
  ------------------
 2297|    187|                    return -1;
 2298|   312k|            t->by -= hsz * 3 >> 1;
 2299|   312k|            break;
 2300|   312k|        }
 2301|   280k|        case PARTITION_V4: {
  ------------------
  |  Branch (2301:9): [True: 280k, False: 9.27M]
  ------------------
 2302|   280k|            const EdgeBranch *const branch = (const EdgeBranch *) node;
 2303|   280k|            if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
  ------------------
  |  Branch (2303:17): [True: 330, False: 280k]
  ------------------
 2304|    330|                return -1;
 2305|   280k|            t->bx += hsz >> 1;
 2306|   280k|            if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
  ------------------
  |  Branch (2306:17): [True: 1.42k, False: 279k]
  ------------------
 2307|  1.42k|                return -1;
 2308|   279k|            t->bx += hsz >> 1;
 2309|   279k|            if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
  ------------------
  |  Branch (2309:17): [True: 100, False: 278k]
  ------------------
 2310|    100|                return -1;
 2311|   278k|            t->bx += hsz >> 1;
 2312|   278k|            if (t->bx < f->bw)
  ------------------
  |  Branch (2312:17): [True: 262k, False: 15.9k]
  ------------------
 2313|   262k|                if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
  ------------------
  |  Branch (2313:21): [True: 171, False: 262k]
  ------------------
 2314|    171|                    return -1;
 2315|   278k|            t->bx -= hsz * 3 >> 1;
 2316|   278k|            break;
 2317|   278k|        }
 2318|      0|        default: assert(0);
  ------------------
  |  Branch (2318:9): [True: 0, False: 9.56M]
  |  Branch (2318:18): [Folded, False: 0]
  ------------------
 2319|  9.56M|        }
 2320|  9.56M|    } else if (have_h_split) {
  ------------------
  |  Branch (2320:16): [True: 352k, False: 484k]
  ------------------
 2321|   352k|        unsigned is_split;
 2322|   352k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2322:13): [True: 52.9k, False: 299k]
  ------------------
 2323|  52.9k|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2324|  52.9k|            is_split = b->bl != bl;
 2325|   299k|        } else {
 2326|   299k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|   299k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2327|   299k|                           gather_top_partition_prob(pc, bl));
 2328|   299k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   299k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 299k]
  |  |  ------------------
  |  |   35|   299k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   299k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2329|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2330|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2331|      0|                       is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng);
  ------------------
  |  Branch (2331:24): [True: 0, False: 0]
  ------------------
 2332|   299k|        }
 2333|       |
 2334|   352k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2334:9): [True: 352k, False: 18.4E]
  ------------------
 2335|   352k|        if (is_split) {
  ------------------
  |  Branch (2335:13): [True: 224k, False: 127k]
  ------------------
 2336|   224k|            bp = PARTITION_SPLIT;
 2337|   224k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|   224k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2337:17): [True: 2.14k, False: 222k]
  ------------------
 2338|   222k|            t->bx += hsz;
 2339|   222k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
  ------------------
  |  |   51|   222k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2339:17): [True: 1.58k, False: 221k]
  ------------------
 2340|   221k|            t->bx -= hsz;
 2341|   221k|        } else {
 2342|   127k|            bp = PARTITION_H;
 2343|   127k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
  ------------------
  |  Branch (2343:17): [True: 41, False: 127k]
  ------------------
 2344|   127k|                         PARTITION_H, node->h[0]))
 2345|     41|                return -1;
 2346|   127k|        }
 2347|   484k|    } else {
 2348|   484k|        assert(have_v_split);
  ------------------
  |  Branch (2348:9): [True: 490k, False: 18.4E]
  ------------------
 2349|   490k|        unsigned is_split;
 2350|   490k|        if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (2350:13): [True: 182k, False: 308k]
  ------------------
 2351|   182k|            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
 2352|   182k|            is_split = b->bl != bl;
 2353|   308k|        } else {
 2354|   308k|            is_split = dav1d_msac_decode_bool(&ts->msac,
  ------------------
  |  |   54|   308k|#define dav1d_msac_decode_bool           dav1d_msac_decode_bool_sse2
  ------------------
 2355|   308k|                           gather_left_partition_prob(pc, bl));
 2356|   308k|            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
  ------------------
  |  Branch (2356:17): [True: 37.5k, False: 270k]
  |  Branch (2356:63): [True: 575, False: 36.9k]
  ------------------
 2357|    575|                return 1;
 2358|   307k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   307k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 307k]
  |  |  ------------------
  |  |   35|   307k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   307k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2359|      0|                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
 2360|      0|                       f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
 2361|      0|                       is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng);
  ------------------
  |  Branch (2361:24): [True: 0, False: 0]
  ------------------
 2362|   307k|        }
 2363|       |
 2364|   490k|        assert(bl < BL_8X8);
  ------------------
  |  Branch (2364:9): [True: 490k, False: 18.4E]
  ------------------
 2365|   490k|        if (is_split) {
  ------------------
  |  Branch (2365:13): [True: 184k, False: 305k]
  ------------------
 2366|   184k|            bp = PARTITION_SPLIT;
 2367|   184k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
  ------------------
  |  |   51|   184k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2367:17): [True: 41.8k, False: 142k]
  ------------------
 2368|   142k|            t->by += hsz;
 2369|   142k|            if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
  ------------------
  |  |   51|   142k|    ((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))
  ------------------
  |  Branch (2369:17): [True: 7.99k, False: 134k]
  ------------------
 2370|   134k|            t->by -= hsz;
 2371|   305k|        } else {
 2372|   305k|            bp = PARTITION_V;
 2373|   305k|            if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
  ------------------
  |  Branch (2373:17): [True: 37, False: 305k]
  ------------------
 2374|   305k|                         PARTITION_V, node->v[0]))
 2375|     37|                return -1;
 2376|   305k|        }
 2377|   490k|    }
 2378|       |
 2379|  10.3M|    if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
  ------------------
  |  Branch (2379:9): [True: 6.68M, False: 3.64M]
  |  Branch (2379:39): [True: 5.20M, False: 1.47M]
  |  Branch (2379:64): [True: 186k, False: 1.28M]
  ------------------
 2380|  5.39M|#define set_ctx(rep_macro) \
 2381|  5.39M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
 2382|  5.39M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
 2383|  5.39M|        case_set_upto16(ulog2(hsz));
  ------------------
  |  |   80|  5.39M|    switch (var) { \
  |  |   81|  1.17M|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 2381|  1.17M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  1.17M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.17M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|  1.17M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  1.17M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.17M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 1.17M, False: 4.21M]
  |  |  ------------------
  |  |   82|  1.47M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 2381|  1.47M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|  1.47M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.47M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|  1.47M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|  1.47M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.47M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 1.47M, False: 3.92M]
  |  |  ------------------
  |  |   83|   745k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 2381|   745k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   745k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   745k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   745k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|   745k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   745k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 745k, False: 4.65M]
  |  |  ------------------
  |  |   84|  1.41M|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 2381|  1.41M|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  1.41M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.41M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|  1.41M|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  1.41M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.41M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 1.41M, False: 3.98M]
  |  |  ------------------
  |  |   85|   585k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 2381|   585k|        rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   585k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   585k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   585k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   585k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 585k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 2382|   585k|        rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp])
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|   585k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   585k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   585k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   585k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 585k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 585k, False: 4.80M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 5.39M]
  |  |  ------------------
  |  |   87|  5.39M|    }
  ------------------
  |  Branch (2383:9): [Folded, False: 0]
  ------------------
 2384|  5.39M|#undef set_ctx
 2385|  5.39M|    }
 2386|       |
 2387|  10.3M|    return 0;
 2388|  10.3M|}
decode.c:decode_b:
  687|  13.5M|                    const enum EdgeFlags intra_edge_flags) {
  688|  13.5M|    Dav1dTileState *const ts = t->ts;
  689|  13.5M|    const Dav1dFrameContext *const f = t->f;
  690|  13.5M|    Av1Block b_mem, *const b = t->frame_thread.pass ?
  ------------------
  |  Branch (690:32): [True: 13.5M, False: 18.4E]
  ------------------
  691|  18.4E|        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
  692|  13.5M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  693|  13.5M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  694|  13.5M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  695|  13.5M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  696|  13.5M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
  697|  13.5M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  698|  13.5M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
  699|  13.5M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
  700|  13.5M|    const int have_left = t->bx > ts->tiling.col_start;
  701|  13.5M|    const int have_top = t->by > ts->tiling.row_start;
  702|  13.5M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (702:28): [True: 9.25M, False: 4.26M]
  ------------------
  703|  9.25M|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (703:29): [True: 8.27M, False: 978k]
  |  Branch (703:45): [True: 488k, False: 489k]
  ------------------
  704|  8.77M|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (704:29): [True: 7.87M, False: 899k]
  |  Branch (704:45): [True: 449k, False: 449k]
  ------------------
  705|       |
  706|  13.5M|    if (t->frame_thread.pass == 2) {
  ------------------
  |  Branch (706:9): [True: 4.50M, False: 9.02M]
  ------------------
  707|  4.50M|        if (b->intra) {
  ------------------
  |  Branch (707:13): [True: 2.04M, False: 2.45M]
  ------------------
  708|  2.04M|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
  709|       |
  710|  2.04M|            const enum IntraPredMode y_mode_nofilt =
  711|  2.04M|                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (711:17): [True: 357k, False: 1.69M]
  ------------------
  712|  2.04M|#define set_ctx(rep_macro) \
  713|  2.04M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  714|  2.04M|            rep_macro(edge->intra, off, 1)
  715|  2.04M|            BlockContext *edge = t->a;
  716|  6.14M|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (716:40): [True: 4.09M, False: 2.04M]
  ------------------
  717|  4.09M|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  4.09M|    switch (var) { \
  |  |   71|   663k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  713|   663k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   663k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   663k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|   663k|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   663k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   663k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 663k, False: 3.43M]
  |  |  ------------------
  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  713|  1.03M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.03M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|  1.03M|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.03M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 1.03M, False: 3.06M]
  |  |  ------------------
  |  |   73|   854k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  713|   854k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   854k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   854k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|   854k|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   854k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   854k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 854k, False: 3.24M]
  |  |  ------------------
  |  |   74|   477k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  713|   477k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   477k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   477k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|   477k|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   477k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   477k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 477k, False: 3.61M]
  |  |  ------------------
  |  |   75|   818k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  713|   818k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   818k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   818k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   818k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   818k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 818k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|   818k|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   818k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   818k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   818k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   818k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 818k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 818k, False: 3.27M]
  |  |  ------------------
  |  |   76|   249k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  713|   249k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   249k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   249k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   249k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   249k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 249k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  714|   249k|            rep_macro(edge->intra, off, 1)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   249k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   249k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   249k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   249k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 249k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 249k, False: 3.84M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 4.09M]
  |  |  ------------------
  |  |   78|  4.09M|    }
  ------------------
  |  Branch (717:17): [Folded, False: 0]
  ------------------
  718|  4.09M|            }
  719|  2.04M|#undef set_ctx
  720|  2.04M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  2.04M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 387k, False: 1.66M]
  |  |  ------------------
  ------------------
  721|   387k|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  722|  1.85M|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (722:33): [True: 1.46M, False: 387k]
  ------------------
  723|  1.46M|                    r[x].ref.ref[0] = 0;
  724|  1.46M|                    r[x].bs = bs;
  725|  1.46M|                }
  726|   387k|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  727|  1.47M|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (727:33): [True: 1.08M, False: 387k]
  ------------------
  728|  1.08M|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
  729|  1.08M|                    rr[y][t->bx + bw4 - 1].bs = bs;
  730|  1.08M|                }
  731|   387k|            }
  732|       |
  733|  2.04M|            if (has_chroma) {
  ------------------
  |  Branch (733:17): [True: 1.22M, False: 828k]
  ------------------
  734|  1.22M|                uint8_t uv_mode = b->uv_mode;
  735|  1.22M|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
  736|  1.22M|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
  737|  1.22M|            }
  738|  2.45M|        } else {
  739|  2.45M|            if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
  ------------------
  |  |   36|  4.90M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 2.42M, False: 27.4k]
  |  |  ------------------
  ------------------
  740|  2.42M|                b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
  ------------------
  |  Branch (740:17): [True: 2.25M, False: 169k]
  |  Branch (740:52): [True: 139k, False: 2.11M]
  ------------------
  741|   139k|            {
  742|   139k|                if (b->matrix[0] == INT16_MIN) {
  ------------------
  |  Branch (742:21): [True: 10.2k, False: 129k]
  ------------------
  743|  10.2k|                    t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
  744|   129k|                } else {
  745|   129k|                    t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
  746|   129k|                    t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
  747|   129k|                    t->warpmv.matrix[3] = b->matrix[1];
  748|   129k|                    t->warpmv.matrix[4] = b->matrix[2];
  749|   129k|                    t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
  750|   129k|                    dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
  751|   129k|                                          t->bx, t->by);
  752|   129k|                    dav1d_get_shear_params(&t->warpmv);
  753|   129k|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  754|   129k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   129k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 129k]
  |  |  ------------------
  |  |   35|   129k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   129k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  755|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
  756|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
  757|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  758|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  759|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  760|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  761|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  762|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  763|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  764|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  765|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  766|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  |  753|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (753:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  767|      0|                               b->mv2d.y, b->mv2d.x);
  768|   129k|#undef signabs
  769|   129k|                }
  770|   139k|            }
  771|  2.45M|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (771:17): [True: 0, False: 2.45M]
  ------------------
  772|       |
  773|  2.45M|            const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
  774|  2.45M|            BlockContext *edge = t->a;
  775|  7.36M|            for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (775:40): [True: 4.91M, False: 2.45M]
  ------------------
  776|  4.91M|#define set_ctx(rep_macro) \
  777|  4.91M|                rep_macro(edge->filter[0], off, filter[0]); \
  778|  4.91M|                rep_macro(edge->filter[1], off, filter[1]); \
  779|  4.91M|                rep_macro(edge->intra, off, 0)
  780|  4.91M|                case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  4.91M|    switch (var) { \
  |  |   71|   507k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  777|   507k|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   507k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   507k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|   507k|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   507k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   507k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|   507k|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   507k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   507k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 507k, False: 4.40M]
  |  |  ------------------
  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  777|  1.03M|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.03M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|  1.03M|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.03M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|  1.03M|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.03M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.03M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 1.03M, False: 3.87M]
  |  |  ------------------
  |  |   73|   866k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  777|   866k|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   866k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   866k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|   866k|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   866k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   866k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|   866k|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   866k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   866k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 866k, False: 4.04M]
  |  |  ------------------
  |  |   74|   313k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  777|   313k|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   313k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   313k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|   313k|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   313k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   313k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|   313k|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   313k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   313k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 313k, False: 4.59M]
  |  |  ------------------
  |  |   75|  1.59M|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  777|  1.59M|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.59M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.59M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.59M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.59M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.59M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|  1.59M|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.59M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.59M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.59M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.59M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.59M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|  1.59M|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.59M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.59M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.59M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.59M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.59M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 1.59M, False: 3.32M]
  |  |  ------------------
  |  |   76|   599k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  |  777|   599k|                rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   599k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   599k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   599k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   599k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 599k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  778|   599k|                rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   599k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   599k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   599k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   599k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 599k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  779|   599k|                rep_macro(edge->intra, off, 0)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   599k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   599k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   599k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   599k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 599k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 599k, False: 4.31M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 4.91M]
  |  |  ------------------
  |  |   78|  4.91M|    }
  ------------------
  |  Branch (780:17): [Folded, False: 0]
  ------------------
  781|  4.91M|#undef set_ctx
  782|  4.91M|            }
  783|       |
  784|  2.45M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  2.45M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 2.42M, False: 26.8k]
  |  |  ------------------
  ------------------
  785|  2.42M|                refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
  786|  28.5M|                for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (786:33): [True: 26.1M, False: 2.42M]
  ------------------
  787|  26.1M|                    r[x].ref.ref[0] = b->ref[0] + 1;
  788|  26.1M|                    r[x].mv.mv[0] = b->mv[0];
  789|  26.1M|                    r[x].bs = bs;
  790|  26.1M|                }
  791|  2.42M|                refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
  792|  26.5M|                for (int y = 0; y < bh4 - 1; y++) {
  ------------------
  |  Branch (792:33): [True: 24.0M, False: 2.42M]
  ------------------
  793|  24.0M|                    rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
  794|  24.0M|                    rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
  795|  24.0M|                    rr[y][t->bx + bw4 - 1].bs = bs;
  796|  24.0M|                }
  797|  2.42M|            }
  798|       |
  799|  2.45M|            if (has_chroma) {
  ------------------
  |  Branch (799:17): [True: 1.13M, False: 1.31M]
  ------------------
  800|  1.13M|                dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
  801|  1.13M|                dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
  802|  1.13M|            }
  803|  2.45M|        }
  804|  4.50M|        return 0;
  805|  4.50M|    }
  806|       |
  807|  9.02M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
  808|       |
  809|  9.02M|    b->bl = bl;
  810|  9.02M|    b->bp = bp;
  811|  9.02M|    b->bs = bs;
  812|       |
  813|  9.02M|    const Dav1dSegmentationData *seg = NULL;
  814|       |
  815|       |    // segment_id (if seg_feature for skip/ref/gmv is enabled)
  816|  9.02M|    int seg_pred = 0;
  817|  9.02M|    if (f->frame_hdr->segmentation.enabled) {
  ------------------
  |  Branch (817:9): [True: 2.65M, False: 6.36M]
  ------------------
  818|  2.65M|        if (!f->frame_hdr->segmentation.update_map) {
  ------------------
  |  Branch (818:13): [True: 681k, False: 1.97M]
  ------------------
  819|   681k|            if (f->prev_segmap) {
  ------------------
  |  Branch (819:17): [True: 484k, False: 196k]
  ------------------
  820|   484k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  821|   484k|                                                       f->prev_segmap,
  822|   484k|                                                       f->b4_stride);
  823|   484k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (823:21): [True: 0, False: 484k]
  ------------------
  824|   484k|                b->seg_id = seg_id;
  825|   484k|            } else {
  826|   196k|                b->seg_id = 0;
  827|   196k|            }
  828|   681k|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  829|  1.97M|        } else if (f->frame_hdr->segmentation.seg_data.preskip) {
  ------------------
  |  Branch (829:20): [True: 1.65M, False: 315k]
  ------------------
  830|  1.65M|            if (f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (830:17): [True: 446k, False: 1.21M]
  ------------------
  831|   446k|                (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   446k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (831:17): [True: 103k, False: 342k]
  ------------------
  832|   446k|                                ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  833|   446k|                                t->l.seg_pred[by4]])))
  834|   103k|            {
  835|       |                // temporal predicted seg_id
  836|   103k|                if (f->prev_segmap) {
  ------------------
  |  Branch (836:21): [True: 81.3k, False: 22.4k]
  ------------------
  837|  81.3k|                    unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
  838|  81.3k|                                                           w4, h4,
  839|  81.3k|                                                           f->prev_segmap,
  840|  81.3k|                                                           f->b4_stride);
  841|  81.3k|                    if (seg_id >= 8) return -1;
  ------------------
  |  Branch (841:25): [True: 0, False: 81.3k]
  ------------------
  842|  81.3k|                    b->seg_id = seg_id;
  843|  81.3k|                } else {
  844|  22.4k|                    b->seg_id = 0;
  845|  22.4k|                }
  846|  1.55M|            } else {
  847|  1.55M|                int seg_ctx;
  848|  1.55M|                const unsigned pred_seg_id =
  849|  1.55M|                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  850|  1.55M|                                        &seg_ctx, f->cur_segmap, f->b4_stride);
  851|  1.55M|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  1.55M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  852|  1.55M|                                          ts->cdf.m.seg_id[seg_ctx],
  853|  1.55M|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|  1.55M|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  854|  1.55M|                const unsigned last_active_seg_id =
  855|  1.55M|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  856|  1.55M|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  857|  1.55M|                                             last_active_seg_id + 1);
  858|  1.55M|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (858:21): [True: 73.6k, False: 1.47M]
  ------------------
  859|  1.55M|                if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|  1.55M|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (859:21): [True: 0, False: 1.55M]
  ------------------
  860|  1.55M|            }
  861|       |
  862|  1.65M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.65M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.65M]
  |  |  ------------------
  |  |   35|  1.65M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.65M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  863|      0|                printf("Post-segid[preskip;%d]: r=%d\n",
  864|      0|                       b->seg_id, ts->msac.rng);
  865|       |
  866|  1.65M|            seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  867|  1.65M|        }
  868|  6.36M|    } else {
  869|  6.36M|        b->seg_id = 0;
  870|  6.36M|    }
  871|       |
  872|       |    // skip_mode
  873|  9.02M|    if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
  ------------------
  |  Branch (873:10): [True: 6.68M, False: 2.33M]
  |  Branch (873:19): [True: 716k, False: 1.62M]
  |  Branch (873:37): [True: 389k, False: 326k]
  |  Branch (873:55): [True: 330k, False: 59.5k]
  ------------------
  874|  7.02M|        f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (874:9): [True: 248k, False: 6.77M]
  |  Branch (874:44): [True: 213k, False: 35.7k]
  ------------------
  875|   213k|    {
  876|   213k|        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
  877|   213k|        b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   213k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  878|   213k|                           ts->cdf.m.skip_mode[smctx]);
  879|   213k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   213k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 213k]
  |  |  ------------------
  |  |   35|   213k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   213k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  880|      0|            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
  881|  8.80M|    } else {
  882|  8.80M|        b->skip_mode = 0;
  883|  8.80M|    }
  884|       |
  885|       |    // skip
  886|  9.02M|    if (b->skip_mode || (seg && seg->skip)) {
  ------------------
  |  Branch (886:9): [True: 24.4k, False: 8.99M]
  |  Branch (886:26): [True: 2.33M, False: 6.66M]
  |  Branch (886:33): [True: 1.74M, False: 594k]
  ------------------
  887|  1.77M|        b->skip = 1;
  888|  7.24M|    } else {
  889|  7.24M|        const int sctx = t->a->skip[bx4] + t->l.skip[by4];
  890|  7.24M|        b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
  ------------------
  |  |   52|  7.24M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  891|  7.24M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  7.24M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.24M]
  |  |  ------------------
  |  |   35|  7.24M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.24M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  892|      0|            printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
  893|  7.24M|    }
  894|       |
  895|       |    // segment_id
  896|  9.02M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (896:9): [True: 2.65M, False: 6.36M]
  ------------------
  897|  2.65M|        f->frame_hdr->segmentation.update_map &&
  ------------------
  |  Branch (897:9): [True: 1.97M, False: 678k]
  ------------------
  898|  1.97M|        !f->frame_hdr->segmentation.seg_data.preskip)
  ------------------
  |  Branch (898:9): [True: 315k, False: 1.65M]
  ------------------
  899|   315k|    {
  900|   315k|        if (!b->skip && f->frame_hdr->segmentation.temporal &&
  ------------------
  |  Branch (900:13): [True: 210k, False: 104k]
  |  Branch (900:25): [True: 22.9k, False: 187k]
  ------------------
  901|  22.9k|            (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  22.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (901:13): [True: 11.5k, False: 11.4k]
  ------------------
  902|  22.9k|                            ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
  903|  22.9k|                            t->l.seg_pred[by4]])))
  904|  11.4k|        {
  905|       |            // temporal predicted seg_id
  906|  11.4k|            if (f->prev_segmap) {
  ------------------
  |  Branch (906:17): [True: 1.81k, False: 9.68k]
  ------------------
  907|  1.81k|                unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
  908|  1.81k|                                                       f->prev_segmap,
  909|  1.81k|                                                       f->b4_stride);
  910|  1.81k|                if (seg_id >= 8) return -1;
  ------------------
  |  Branch (910:21): [True: 0, False: 1.81k]
  ------------------
  911|  1.81k|                b->seg_id = seg_id;
  912|  9.68k|            } else {
  913|  9.68k|                b->seg_id = 0;
  914|  9.68k|            }
  915|   304k|        } else {
  916|   304k|            int seg_ctx;
  917|   304k|            const unsigned pred_seg_id =
  918|   304k|                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
  919|   304k|                                    &seg_ctx, f->cur_segmap, f->b4_stride);
  920|   304k|            if (b->skip) {
  ------------------
  |  Branch (920:17): [True: 106k, False: 197k]
  ------------------
  921|   106k|                b->seg_id = pred_seg_id;
  922|   197k|            } else {
  923|   197k|                const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   197k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  924|   197k|                                          ts->cdf.m.seg_id[seg_ctx],
  925|   197k|                                          DAV1D_MAX_SEGMENTS - 1);
  ------------------
  |  |   43|   197k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  926|   197k|                const unsigned last_active_seg_id =
  927|   197k|                    f->frame_hdr->segmentation.seg_data.last_active_segid;
  928|   197k|                b->seg_id = neg_deinterleave(diff, pred_seg_id,
  929|   197k|                                             last_active_seg_id + 1);
  930|   197k|                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
  ------------------
  |  Branch (930:21): [True: 12.4k, False: 184k]
  ------------------
  931|   197k|            }
  932|   304k|            if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
  ------------------
  |  |   43|   304k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (932:17): [True: 1.40k, False: 302k]
  ------------------
  933|   304k|        }
  934|       |
  935|   315k|        seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
  936|       |
  937|   315k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   315k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 315k]
  |  |  ------------------
  |  |   35|   315k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   315k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  938|      0|            printf("Post-segid[postskip;%d]: r=%d\n",
  939|      0|                   b->seg_id, ts->msac.rng);
  940|   315k|    }
  941|       |
  942|       |    // cdef index
  943|  9.02M|    if (!b->skip) {
  ------------------
  |  Branch (943:9): [True: 4.97M, False: 4.04M]
  ------------------
  944|  4.97M|        const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
  ------------------
  |  Branch (944:25): [True: 3.75M, False: 1.21M]
  ------------------
  945|  3.75M|                                           ((t->by & 16) >> 3) : 0;
  946|  4.97M|        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
  ------------------
  |  Branch (946:13): [True: 738k, False: 4.24M]
  ------------------
  947|   738k|            const int v = dav1d_msac_decode_bools(&ts->msac,
  948|   738k|                              f->frame_hdr->cdef.n_bits);
  949|   738k|            t->cur_sb_cdef_idx_ptr[idx] = v;
  950|   738k|            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
  ------------------
  |  Branch (950:17): [True: 89.7k, False: 648k]
  ------------------
  951|   738k|            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
  ------------------
  |  Branch (951:17): [True: 90.2k, False: 648k]
  ------------------
  952|   738k|            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
  ------------------
  |  Branch (952:17): [True: 89.7k, False: 648k]
  |  Branch (952:30): [True: 78.3k, False: 11.4k]
  ------------------
  953|       |
  954|   738k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   738k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 738k]
  |  |  ------------------
  |  |   35|   738k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   738k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  955|      0|                printf("Post-cdef_idx[%d]: r=%d\n",
  956|      0|                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
  957|   738k|        }
  958|  4.97M|    }
  959|       |
  960|       |    // delta-q/lf
  961|  9.02M|    if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) {
  ------------------
  |  Branch (961:9): [True: 2.08M, False: 6.93M]
  ------------------
  962|  2.08M|        const int prev_qidx = ts->last_qidx;
  963|  2.08M|        const int have_delta_q = f->frame_hdr->delta.q.present &&
  ------------------
  |  Branch (963:34): [True: 945k, False: 1.14M]
  ------------------
  964|   945k|            (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
  ------------------
  |  Branch (964:14): [True: 129k, False: 816k]
  |  Branch (964:21): [True: 72.2k, False: 873k]
  |  Branch (964:67): [True: 15.6k, False: 800k]
  ------------------
  965|       |
  966|  2.08M|        uint32_t prev_delta_lf = ts->last_delta_lf.u32;
  967|       |
  968|  2.08M|        if (have_delta_q) {
  ------------------
  |  Branch (968:13): [True: 145k, False: 1.94M]
  ------------------
  969|   145k|            int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   145k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  970|   145k|                                                          ts->cdf.m.delta_q, 3);
  971|   145k|            if (delta_q == 3) {
  ------------------
  |  Branch (971:17): [True: 25.7k, False: 119k]
  ------------------
  972|  25.7k|                const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  973|  25.7k|                delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  974|  25.7k|                          1 + (1 << n_bits);
  975|  25.7k|            }
  976|   145k|            if (delta_q) {
  ------------------
  |  Branch (976:17): [True: 48.4k, False: 96.7k]
  ------------------
  977|  48.4k|                if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
  ------------------
  |  |   53|  48.4k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (977:21): [True: 39.2k, False: 9.20k]
  ------------------
  978|  48.4k|                delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
  979|  48.4k|            }
  980|   145k|            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
  981|   145k|            if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   145k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 145k]
  |  |  ------------------
  |  |   35|   145k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   145k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (981:17): [True: 145k, False: 18.4E]
  ------------------
  982|      0|                printf("Post-delta_q[%d->%d]: r=%d\n",
  983|      0|                       delta_q, ts->last_qidx, ts->msac.rng);
  984|       |
  985|   145k|            if (f->frame_hdr->delta.lf.present) {
  ------------------
  |  Branch (985:17): [True: 51.9k, False: 93.2k]
  ------------------
  986|  51.9k|                const int n_lfs = f->frame_hdr->delta.lf.multi ?
  ------------------
  |  Branch (986:35): [True: 39.4k, False: 12.5k]
  ------------------
  987|  39.4k|                    f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
  ------------------
  |  Branch (987:21): [True: 33.0k, False: 6.38k]
  ------------------
  988|       |
  989|   209k|                for (int i = 0; i < n_lfs; i++) {
  ------------------
  |  Branch (989:33): [True: 157k, False: 51.9k]
  ------------------
  990|   157k|                    int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   157k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  991|   157k|                        ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
  992|   157k|                    if (delta_lf == 3) {
  ------------------
  |  Branch (992:25): [True: 40.0k, False: 117k]
  ------------------
  993|  40.0k|                        const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
  994|  40.0k|                        delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
  995|  40.0k|                                   1 + (1 << n_bits);
  996|  40.0k|                    }
  997|   157k|                    if (delta_lf) {
  ------------------
  |  Branch (997:25): [True: 56.3k, False: 101k]
  ------------------
  998|  56.3k|                        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|  56.3k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (998:29): [True: 48.3k, False: 7.98k]
  ------------------
  999|  48.3k|                            delta_lf = -delta_lf;
 1000|  56.3k|                        delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
 1001|  56.3k|                    }
 1002|   157k|                    ts->last_delta_lf.i8[i] =
 1003|   157k|                        iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
 1004|   157k|                    if (have_delta_q && DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   157k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 157k]
  |  |  ------------------
  |  |   35|   157k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   157k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1004:25): [True: 157k, False: 18.4E]
  ------------------
 1005|      0|                        printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
 1006|      0|                               ts->msac.rng);
 1007|   157k|                }
 1008|  51.9k|            }
 1009|   145k|        }
 1010|  2.08M|        if (ts->last_qidx == f->frame_hdr->quant.yac) {
  ------------------
  |  Branch (1010:13): [True: 1.97M, False: 113k]
  ------------------
 1011|       |            // assign frame-wide q values to this sb
 1012|  1.97M|            ts->dq = f->dq;
 1013|  1.97M|        } else if (ts->last_qidx != prev_qidx) {
  ------------------
  |  Branch (1013:20): [True: 24.0k, False: 89.0k]
  ------------------
 1014|       |            // find sb-specific quant parameters
 1015|  24.0k|            init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
 1016|  24.0k|            ts->dq = ts->dqmem;
 1017|  24.0k|        }
 1018|  2.08M|        if (!ts->last_delta_lf.u32) {
  ------------------
  |  Branch (1018:13): [True: 2.04M, False: 39.3k]
  ------------------
 1019|       |            // assign frame-wide lf values to this sb
 1020|  2.04M|            ts->lflvl = f->lf.lvl;
 1021|  2.04M|        } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
  ------------------
  |  Branch (1021:20): [True: 13.9k, False: 25.4k]
  ------------------
 1022|       |            // find sb-specific lf lvl parameters
 1023|  13.9k|            ts->lflvl = ts->lflvlmem;
 1024|  13.9k|            dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
 1025|  13.9k|        }
 1026|  2.08M|    }
 1027|       |
 1028|  9.02M|    if (b->skip_mode) {
  ------------------
  |  Branch (1028:9): [True: 30.9k, False: 8.99M]
  ------------------
 1029|  30.9k|        b->intra = 0;
 1030|  8.99M|    } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  8.99M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 3.77M, False: 5.21M]
  |  |  ------------------
  ------------------
 1031|  3.77M|        if (seg && (seg->ref >= 0 || seg->globalmv)) {
  ------------------
  |  Branch (1031:13): [True: 1.57M, False: 2.19M]
  |  Branch (1031:21): [True: 240k, False: 1.33M]
  |  Branch (1031:38): [True: 1.01M, False: 318k]
  ------------------
 1032|  1.25M|            b->intra = !seg->ref;
 1033|  2.51M|        } else {
 1034|  2.51M|            const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
 1035|  2.51M|                                           have_top, have_left);
 1036|  2.51M|            b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  2.51M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1037|  2.51M|                            ts->cdf.m.intra[ictx]);
 1038|  2.51M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.51M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.51M]
  |  |  ------------------
  |  |   35|  2.51M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.51M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1039|      0|                printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
 1040|  2.51M|        }
 1041|  5.21M|    } else if (f->frame_hdr->allow_intrabc) {
  ------------------
  |  Branch (1041:16): [True: 3.63M, False: 1.58M]
  ------------------
 1042|  3.63M|        b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
  ------------------
  |  |   52|  3.63M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1043|  3.63M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  3.63M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.63M]
  |  |  ------------------
  |  |   35|  3.63M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.63M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1044|      0|            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
 1045|  3.63M|    } else {
 1046|  1.58M|        b->intra = 1;
 1047|  1.58M|    }
 1048|       |
 1049|       |    // intra/inter-specific stuff
 1050|  9.02M|    if (b->intra) {
  ------------------
  |  Branch (1050:9): [True: 5.04M, False: 3.97M]
  ------------------
 1051|  5.04M|        uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
  ------------------
  |  |   36|  5.04M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 526k, False: 4.51M]
  |  |  ------------------
  ------------------
 1052|   526k|            ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
 1053|  5.04M|            ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
 1054|  4.51M|                        [dav1d_intra_mode_context[t->l.mode[by4]]];
 1055|  5.04M|        b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
  ------------------
  |  |   57|  5.04M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1056|  5.04M|                                                     N_INTRA_PRED_MODES - 1);
 1057|  5.04M|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  5.04M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 5.04M]
  |  |  ------------------
  |  |   35|  5.04M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  5.04M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1058|      0|            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
 1059|       |
 1060|       |        // angle delta
 1061|  5.04M|        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
  ------------------
  |  Branch (1061:13): [True: 4.14M, False: 899k]
  |  Branch (1061:41): [True: 2.09M, False: 2.04M]
  ------------------
 1062|  2.09M|            b->y_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1062:13): [True: 1.14M, False: 952k]
  ------------------
 1063|  1.14M|        {
 1064|  1.14M|            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
 1065|  1.14M|            const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|  1.14M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1066|  1.14M|            b->y_angle = angle - 3;
 1067|  3.90M|        } else {
 1068|  3.90M|            b->y_angle = 0;
 1069|  3.90M|        }
 1070|       |
 1071|  5.04M|        if (has_chroma) {
  ------------------
  |  Branch (1071:13): [True: 3.87M, False: 1.16M]
  ------------------
 1072|  3.87M|            const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
  ------------------
  |  Branch (1072:37): [True: 64.7k, False: 3.80M]
  ------------------
 1073|  3.80M|                cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
  ------------------
  |  Branch (1073:17): [True: 29.0k, False: 35.7k]
  |  Branch (1073:30): [True: 25.1k, False: 3.90k]
  ------------------
 1074|  3.87M|            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
 1075|  3.87M|            b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
  ------------------
  |  |   57|  3.87M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1076|  3.87M|                             N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
 1077|  3.87M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  3.87M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.87M]
  |  |  ------------------
  |  |   35|  3.87M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.87M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1078|      0|                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
 1079|       |
 1080|  3.87M|            b->uv_angle = 0;
 1081|  3.87M|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1081:17): [True: 782k, False: 3.09M]
  ------------------
 1082|   782k|#define SIGN(a) (!!(a) + ((a) > 0))
 1083|   782k|                const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   782k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1084|   782k|                                     ts->cdf.m.cfl_sign, 7) + 1;
 1085|   782k|                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
 1086|   782k|                assert(sign_u == sign / 3);
  ------------------
  |  Branch (1086:17): [True: 782k, False: 18.4E]
  ------------------
 1087|   782k|                if (sign_u) {
  ------------------
  |  Branch (1087:21): [True: 739k, False: 42.9k]
  ------------------
 1088|   739k|                    const int ctx = (sign_u == 2) * 3 + sign_v;
 1089|   739k|                    b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   739k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1090|   739k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1091|   739k|                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
  ------------------
  |  Branch (1091:25): [True: 437k, False: 302k]
  ------------------
 1092|   739k|                } else {
 1093|  42.9k|                    b->cfl_alpha[0] = 0;
 1094|  42.9k|                }
 1095|   782k|                if (sign_v) {
  ------------------
  |  Branch (1095:21): [True: 562k, False: 220k]
  ------------------
 1096|   562k|                    const int ctx = (sign_v == 2) * 3 + sign_u;
 1097|   562k|                    b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   562k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1098|   562k|                                          ts->cdf.m.cfl_alpha[ctx], 15) + 1;
 1099|   562k|                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
  ------------------
  |  Branch (1099:25): [True: 199k, False: 362k]
  ------------------
 1100|   562k|                } else {
 1101|   220k|                    b->cfl_alpha[1] = 0;
 1102|   220k|                }
 1103|   782k|#undef SIGN
 1104|   782k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   782k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 782k]
  |  |  ------------------
  |  |   35|   782k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   782k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1105|      0|                    printf("Post-uvalphas[%d/%d]: r=%d\n",
 1106|      0|                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
 1107|  3.09M|            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
  ------------------
  |  Branch (1107:24): [True: 2.69M, False: 399k]
  |  Branch (1107:52): [True: 1.49M, False: 1.19M]
  ------------------
 1108|  1.49M|                       b->uv_mode <= VERT_LEFT_PRED)
  ------------------
  |  Branch (1108:24): [True: 766k, False: 730k]
  ------------------
 1109|   766k|            {
 1110|   766k|                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
 1111|   766k|                const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
  ------------------
  |  |   48|   766k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1112|   766k|                b->uv_angle = angle - 3;
 1113|   766k|            }
 1114|  3.87M|        }
 1115|       |
 1116|  5.04M|        b->pal_sz[0] = b->pal_sz[1] = 0;
 1117|  5.04M|        if (f->frame_hdr->allow_screen_content_tools &&
  ------------------
  |  Branch (1117:13): [True: 3.07M, False: 1.96M]
  ------------------
 1118|  3.07M|            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
  ------------------
  |  Branch (1118:13): [True: 2.91M, False: 161k]
  |  Branch (1118:37): [True: 2.45M, False: 457k]
  ------------------
 1119|  2.45M|        {
 1120|  2.45M|            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
 1121|  2.45M|            if (b->y_mode == DC_PRED) {
  ------------------
  |  Branch (1121:17): [True: 1.28M, False: 1.17M]
  ------------------
 1122|  1.28M|                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
 1123|  1.28M|                const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.28M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1124|  1.28M|                                          ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
 1125|  1.28M|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.28M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.28M]
  |  |  ------------------
  |  |   35|  1.28M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.28M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1126|      0|                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
 1127|  1.28M|                if (use_y_pal)
  ------------------
  |  Branch (1127:21): [True: 119k, False: 1.16M]
  ------------------
 1128|   119k|                    f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
 1129|  1.28M|            }
 1130|       |
 1131|  2.45M|            if (has_chroma && b->uv_mode == DC_PRED) {
  ------------------
  |  Branch (1131:17): [True: 2.05M, False: 408k]
  |  Branch (1131:31): [True: 690k, False: 1.36M]
  ------------------
 1132|   690k|                const int pal_ctx = b->pal_sz[0] > 0;
 1133|   690k|                const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   690k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1134|   690k|                                           ts->cdf.m.pal_uv[pal_ctx]);
 1135|   690k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   690k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 690k]
  |  |  ------------------
  |  |   35|   690k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   690k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1136|      0|                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
 1137|   690k|                if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
  ------------------
  |  Branch (1137:21): [True: 25.2k, False: 665k]
  ------------------
 1138|  25.2k|                    f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
 1139|   690k|            }
 1140|  2.45M|        }
 1141|       |
 1142|  5.04M|        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
  ------------------
  |  Branch (1142:13): [True: 2.44M, False: 2.59M]
  |  Branch (1142:37): [True: 2.32M, False: 119k]
  ------------------
 1143|  2.32M|            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
  ------------------
  |  Branch (1143:13): [True: 1.65M, False: 674k]
  |  Branch (1143:46): [True: 1.30M, False: 342k]
  ------------------
 1144|  1.30M|        {
 1145|  1.30M|            const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.30M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1146|  1.30M|                                      ts->cdf.m.use_filter_intra[bs]);
 1147|  1.30M|            if (is_filter) {
  ------------------
  |  Branch (1147:17): [True: 839k, False: 470k]
  ------------------
 1148|   839k|                b->y_mode = FILTER_PRED;
 1149|   839k|                b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   839k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1150|   839k|                                 ts->cdf.m.filter_intra, 4);
 1151|   839k|            }
 1152|  1.30M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.30M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.30M]
  |  |  ------------------
  |  |   35|  1.30M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.30M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1153|      0|                printf("Post-filterintramode[%d/%d]: r=%d\n",
 1154|      0|                       b->y_mode, b->y_angle, ts->msac.rng);
 1155|  1.30M|        }
 1156|       |
 1157|  5.04M|        if (b->pal_sz[0]) {
  ------------------
  |  Branch (1157:13): [True: 119k, False: 4.92M]
  ------------------
 1158|   119k|            uint8_t *pal_idx;
 1159|   119k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1159:17): [True: 119k, False: 9]
  ------------------
 1160|   119k|                const int p = t->frame_thread.pass & 1;
 1161|   119k|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1161:17): [True: 119k, False: 18.4E]
  ------------------
 1162|   119k|                pal_idx = ts->frame_thread[p].pal_idx;
 1163|   119k|                ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1164|   119k|            } else
 1165|      9|                pal_idx = t->scratch.pal_idx_y;
 1166|   119k|            read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4);
 1167|   119k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   119k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 119k]
  |  |  ------------------
  |  |   35|   119k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   119k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1168|      0|                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
 1169|   119k|        }
 1170|       |
 1171|  5.04M|        if (has_chroma && b->pal_sz[1]) {
  ------------------
  |  Branch (1171:13): [True: 3.87M, False: 1.16M]
  |  Branch (1171:27): [True: 25.2k, False: 3.85M]
  ------------------
 1172|  25.2k|            uint8_t *pal_idx;
 1173|  25.2k|            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1173:17): [True: 25.2k, False: 0]
  ------------------
 1174|  25.2k|                const int p = t->frame_thread.pass & 1;
 1175|  25.2k|                assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1175:17): [True: 25.2k, False: 0]
  ------------------
 1176|  25.2k|                pal_idx = ts->frame_thread[p].pal_idx;
 1177|  25.2k|                ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1178|  25.2k|            } else
 1179|      0|                pal_idx = t->scratch.pal_idx_uv;
 1180|  25.2k|            read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4);
 1181|  25.2k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  25.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 25.2k]
  |  |  ------------------
  |  |   35|  25.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  25.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1182|      0|                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
 1183|  25.2k|        }
 1184|       |
 1185|  5.04M|        const TxfmInfo *t_dim;
 1186|  5.04M|        if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1186:13): [True: 115k, False: 4.92M]
  ------------------
 1187|   115k|            b->tx = b->uvtx = (int) TX_4X4;
 1188|   115k|            t_dim = &dav1d_txfm_dimensions[TX_4X4];
 1189|  4.92M|        } else {
 1190|  4.92M|            b->tx = dav1d_max_txfm_size_for_bs[bs][0];
 1191|  4.92M|            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
 1192|  4.92M|            t_dim = &dav1d_txfm_dimensions[b->tx];
 1193|  4.92M|            if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
  ------------------
  |  Branch (1193:17): [True: 1.34M, False: 3.58M]
  |  Branch (1193:67): [True: 1.23M, False: 105k]
  ------------------
 1194|  1.23M|                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
 1195|  1.23M|                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
 1196|  1.23M|                int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
  ------------------
  |  |   47|  1.23M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1197|  1.23M|                                imin(t_dim->max, 2));
 1198|       |
 1199|  1.79M|                while (depth--) {
  ------------------
  |  Branch (1199:24): [True: 558k, False: 1.23M]
  ------------------
 1200|   558k|                    b->tx = t_dim->sub;
 1201|   558k|                    t_dim = &dav1d_txfm_dimensions[b->tx];
 1202|   558k|                }
 1203|  1.23M|            }
 1204|  4.92M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  4.92M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 4.92M]
  |  |  ------------------
  |  |   35|  4.92M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  4.92M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1205|      0|                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
 1206|  4.92M|        }
 1207|       |
 1208|       |        // reconstruction
 1209|  5.04M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1209:13): [True: 5.04M, False: 18.4E]
  ------------------
 1210|  5.04M|            f->bd_fn.read_coef_blocks(t, bs, b);
 1211|  18.4E|        } else {
 1212|  18.4E|            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
 1213|  18.4E|        }
 1214|       |
 1215|  5.04M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1215:13): [True: 1.49M, False: 3.54M]
  ------------------
 1216|  3.54M|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1216:13): [True: 291k, False: 3.25M]
  ------------------
 1217|  1.78M|        {
 1218|  1.78M|            dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
 1219|  1.78M|                                       (const uint8_t (*)[8][2])
 1220|  1.78M|                                       &ts->lflvl[b->seg_id][0][0][0],
 1221|  1.78M|                                       t->bx, t->by, f->w4, f->h4, bs,
 1222|  1.78M|                                       b->tx, b->uvtx, f->cur.p.layout,
 1223|  1.78M|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1224|  1.78M|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1224:40): [True: 1.25M, False: 537k]
  ------------------
 1225|  1.78M|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1225:40): [True: 1.25M, False: 537k]
  ------------------
 1226|  1.78M|        }
 1227|       |        // update contexts
 1228|  5.04M|        const enum IntraPredMode y_mode_nofilt =
 1229|  5.04M|            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
  ------------------
  |  Branch (1229:13): [True: 839k, False: 4.20M]
  ------------------
 1230|  5.04M|        BlockContext *edge = t->a;
 1231|  15.1M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1231:36): [True: 10.0M, False: 5.04M]
  ------------------
 1232|  10.0M|            int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh
 1233|  10.0M|#define set_ctx(rep_macro) \
 1234|  10.0M|            rep_macro(edge->tx_intra, off, t_lsz); \
 1235|  10.0M|            rep_macro(edge->tx, off, t_lsz); \
 1236|  10.0M|            rep_macro(edge->mode, off, y_mode_nofilt); \
 1237|  10.0M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
 1238|  10.0M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1239|  10.0M|            rep_macro(edge->skip_mode, off, 0); \
 1240|  10.0M|            rep_macro(edge->intra, off, 1); \
 1241|  10.0M|            rep_macro(edge->skip, off, b->skip); \
 1242|       |            /* see aomedia bug 2183 for why we use luma coordinates here */ \
 1243|  10.0M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
 1244|  10.0M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
 1245|  10.0M|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
 1246|  10.0M|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
 1247|  10.0M|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
 1248|  10.0M|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1249|  10.0M|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
 1250|  10.0M|            }
 1251|  10.0M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  10.0M|    switch (var) { \
  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.78M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.78M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.78M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.78M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.78M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.78M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.78M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.78M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  1.78M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.78M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.78M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|  1.78M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  3.56M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (56:43): [True: 1.09M, False: 689k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.78M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.78M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 226k, False: 1.55M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|   226k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   226k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   226k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|   226k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   226k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   226k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|   226k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   226k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   226k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|   226k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   226k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   226k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|   226k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   226k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   226k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|   226k|            }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 1.78M, False: 8.28M]
  |  |  ------------------
  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1234|  2.98M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  2.98M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  2.98M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  2.98M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  2.98M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  2.98M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  2.98M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  2.98M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.98M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  2.98M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  2.98M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  2.98M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  5.97M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (58:45): [True: 2.53M, False: 454k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  2.98M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  2.98M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 390k, False: 2.59M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|   390k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   390k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   390k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|   390k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   390k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   390k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|   390k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   390k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   390k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|   390k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   390k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   390k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|   390k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   390k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   390k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|   390k|            }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 2.98M, False: 7.08M]
  |  |  ------------------
  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1234|  2.50M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  2.50M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  2.50M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  2.50M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  2.50M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  2.50M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  2.50M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  2.50M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  2.50M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  2.50M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  2.50M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  2.50M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  5.01M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (60:45): [True: 2.15M, False: 356k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  2.50M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  2.50M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 286k, False: 2.22M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|   286k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   286k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   286k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|   286k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   286k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   286k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|   286k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   286k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   286k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|   286k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   286k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   286k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|   286k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   286k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   286k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|   286k|            }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 2.50M, False: 7.55M]
  |  |  ------------------
  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.37M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.37M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.37M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.37M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.37M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.37M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.37M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.37M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.37M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.37M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.37M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  1.37M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  2.74M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (62:45): [True: 1.23M, False: 135k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.37M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.37M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 82.9k, False: 1.28M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  82.9k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  82.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  82.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  82.9k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  82.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  82.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  82.9k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  82.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  82.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  82.9k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  82.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  82.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  82.9k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  82.9k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  82.9k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  82.9k|            }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 1.37M, False: 8.69M]
  |  |  ------------------
  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1234|  1.08M|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|  1.08M|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|  1.08M|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|  1.08M|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|  1.08M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|  1.08M|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|  1.08M|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|  1.08M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.08M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|  1.08M|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|  1.08M|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.08M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.08M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  2.17M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (64:29): [True: 451k, False: 635k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   65|  1.08M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.08M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|  1.08M|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  1.08M|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 47.5k, False: 1.04M]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  47.5k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  47.5k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  47.5k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  47.5k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  47.5k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 47.5k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  47.5k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  47.5k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  47.5k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  47.5k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  47.5k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 47.5k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  47.5k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  47.5k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  47.5k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  47.5k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  47.5k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 47.5k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  47.5k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  47.5k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  47.5k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  47.5k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  47.5k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 47.5k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  47.5k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  47.5k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  47.5k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  47.5k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  47.5k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 47.5k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  47.5k|            }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 1.08M, False: 8.98M]
  |  |  ------------------
  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1234|   368k|            rep_macro(edge->tx_intra, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1235|   368k|            rep_macro(edge->tx, off, t_lsz); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1236|   368k|            rep_macro(edge->mode, off, y_mode_nofilt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1237|   368k|            rep_macro(edge->pal_sz, off, b->pal_sz[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1238|   368k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1239|   368k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1240|   368k|            rep_macro(edge->intra, off, 1); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1241|   368k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   368k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1242|   368k|            /* see aomedia bug 2183 for why we use luma coordinates here */ \
  |  |  |  | 1243|   368k|            rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   368k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   368k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   737k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (67:29): [True: 294k, False: 74.3k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   68|   368k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 368k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1244|   368k|            if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|   368k|    ((frame_header)->frame_type & 1)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (36:5): [True: 16.8k, False: 351k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1245|  16.8k|                rep_macro(edge->comp_type, off, COMP_INTER_NONE); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  16.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  16.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  16.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  16.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 16.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1246|  16.8k|                rep_macro(edge->ref[0], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  16.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  16.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  16.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  16.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 16.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1247|  16.8k|                rep_macro(edge->ref[1], off, ((uint8_t) -1)); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  16.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  16.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  16.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  16.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 16.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1248|  16.8k|                rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  16.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  16.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  16.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  16.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 16.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1249|  16.8k|                rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  16.8k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  16.8k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  16.8k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  16.8k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 16.8k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1250|  16.8k|            }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 368k, False: 9.69M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 10.0M]
  |  |  ------------------
  |  |   78|  10.0M|    }
  ------------------
  |  Branch (1251:13): [Folded, False: 0]
  ------------------
 1252|  10.0M|#undef set_ctx
 1253|  10.0M|        }
 1254|  5.04M|        if (b->pal_sz[0])
  ------------------
  |  Branch (1254:13): [True: 119k, False: 4.92M]
  ------------------
 1255|   119k|            f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
 1256|  5.04M|        if (has_chroma) {
  ------------------
  |  Branch (1256:13): [True: 3.87M, False: 1.16M]
  ------------------
 1257|  3.87M|            uint8_t uv_mode = b->uv_mode;
 1258|  3.87M|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode);
 1259|  3.87M|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode);
 1260|  3.87M|            if (b->pal_sz[1])
  ------------------
  |  Branch (1260:17): [True: 25.2k, False: 3.84M]
  ------------------
 1261|  25.2k|                f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
 1262|  3.87M|        }
 1263|  5.04M|        if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
  ------------------
  |  |   36|  10.0M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 515k, False: 4.52M]
  |  |  ------------------
  ------------------
  |  Branch (1263:49): [True: 2.90M, False: 1.62M]
  ------------------
 1264|  3.42M|            splat_intraref(f->c, t, bs, bw4, bh4);
 1265|  5.04M|    } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|  3.97M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  3.97M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 745k, False: 3.23M]
  |  |  ------------------
  ------------------
 1266|       |        // intra block copy
 1267|   745k|        refmvs_candidate mvstack[8];
 1268|   745k|        int n_mvs, ctx;
 1269|   745k|        dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1270|   745k|                          (union refmvs_refpair) { .ref = { 0, -1 }},
 1271|   745k|                          bs, intra_edge_flags, t->by, t->bx);
 1272|       |
 1273|   745k|        if (mvstack[0].mv.mv[0].n)
  ------------------
  |  Branch (1273:13): [True: 613k, False: 132k]
  ------------------
 1274|   613k|            b->mv[0] = mvstack[0].mv.mv[0];
 1275|   132k|        else if (mvstack[1].mv.mv[0].n)
  ------------------
  |  Branch (1275:18): [True: 0, False: 132k]
  ------------------
 1276|      0|            b->mv[0] = mvstack[1].mv.mv[0];
 1277|   132k|        else {
 1278|   132k|            if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
  ------------------
  |  Branch (1278:17): [True: 129k, False: 2.58k]
  ------------------
 1279|   129k|                b->mv[0].y = 0;
 1280|   129k|                b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
 1281|   129k|            } else {
 1282|  2.58k|                b->mv[0].y = -(512 << f->seq_hdr->sb128);
 1283|  2.58k|                b->mv[0].x = 0;
 1284|  2.58k|            }
 1285|   132k|        }
 1286|       |
 1287|   745k|        const union mv ref = b->mv[0];
 1288|   745k|        read_mv_residual(ts, &b->mv[0], -1);
 1289|       |
 1290|       |        // clip intrabc motion vector to decoded parts of current tile
 1291|   745k|        int border_left = ts->tiling.col_start * 4;
 1292|   745k|        int border_top  = ts->tiling.row_start * 4;
 1293|   745k|        if (has_chroma) {
  ------------------
  |  Branch (1293:13): [True: 693k, False: 52.0k]
  ------------------
 1294|   693k|            if (bw4 < 2 &&  ss_hor)
  ------------------
  |  Branch (1294:17): [True: 223k, False: 470k]
  |  Branch (1294:29): [True: 9.18k, False: 214k]
  ------------------
 1295|  9.18k|                border_left += 4;
 1296|   693k|            if (bh4 < 2 &&  ss_ver)
  ------------------
  |  Branch (1296:17): [True: 222k, False: 470k]
  |  Branch (1296:29): [True: 12.2k, False: 210k]
  ------------------
 1297|  12.2k|                border_top  += 4;
 1298|   693k|        }
 1299|   745k|        int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
 1300|   745k|        int src_top    = t->by * 4 + (b->mv[0].y >> 3);
 1301|   745k|        int src_right  = src_left + bw4 * 4;
 1302|   745k|        int src_bottom = src_top  + bh4 * 4;
 1303|   745k|        const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
 1304|       |
 1305|       |        // check against left or right tile boundary and adjust if necessary
 1306|   745k|        if (src_left < border_left) {
  ------------------
  |  Branch (1306:13): [True: 126k, False: 618k]
  ------------------
 1307|   126k|            src_right += border_left - src_left;
 1308|   126k|            src_left  += border_left - src_left;
 1309|   618k|        } else if (src_right > border_right) {
  ------------------
  |  Branch (1309:20): [True: 198k, False: 420k]
  ------------------
 1310|   198k|            src_left  -= src_right - border_right;
 1311|   198k|            src_right -= src_right - border_right;
 1312|   198k|        }
 1313|       |        // check against top tile boundary and adjust if necessary
 1314|   745k|        if (src_top < border_top) {
  ------------------
  |  Branch (1314:13): [True: 463k, False: 281k]
  ------------------
 1315|   463k|            src_bottom += border_top - src_top;
 1316|   463k|            src_top    += border_top - src_top;
 1317|   463k|        }
 1318|       |
 1319|   745k|        const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1320|   745k|        const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
 1321|   745k|        const int sb_size = 1 << (6 + f->seq_hdr->sb128);
 1322|       |        // check for overlap with current superblock
 1323|   745k|        if (src_bottom > sby && src_right > sbx) {
  ------------------
  |  Branch (1323:13): [True: 712k, False: 33.3k]
  |  Branch (1323:33): [True: 210k, False: 501k]
  ------------------
 1324|   210k|            if (src_top - border_top >= src_bottom - sby) {
  ------------------
  |  Branch (1324:17): [True: 1.16k, False: 208k]
  ------------------
 1325|       |                // if possible move src up into the previous suberblock row
 1326|  1.16k|                src_top    -= src_bottom - sby;
 1327|  1.16k|                src_bottom -= src_bottom - sby;
 1328|   208k|            } else if (src_left - border_left >= src_right - sbx) {
  ------------------
  |  Branch (1328:24): [True: 197k, False: 11.7k]
  ------------------
 1329|       |                // if possible move src left into the previous suberblock
 1330|   197k|                src_left  -= src_right - sbx;
 1331|   197k|                src_right -= src_right - sbx;
 1332|   197k|            }
 1333|   210k|        }
 1334|       |        // move src up if it is below current superblock row
 1335|   745k|        if (src_bottom > sby + sb_size) {
  ------------------
  |  Branch (1335:13): [True: 7.57k, False: 737k]
  ------------------
 1336|  7.57k|            src_top    -= src_bottom - (sby + sb_size);
 1337|  7.57k|            src_bottom -= src_bottom - (sby + sb_size);
 1338|  7.57k|        }
 1339|       |        // error out if mv still overlaps with the current superblock
 1340|   745k|        if (src_bottom > sby && src_right > sbx)
  ------------------
  |  Branch (1340:13): [True: 710k, False: 34.5k]
  |  Branch (1340:33): [True: 11.7k, False: 699k]
  ------------------
 1341|  11.7k|            return -1;
 1342|       |
 1343|   733k|        b->mv[0].x = (src_left - t->bx * 4) * 8;
 1344|   733k|        b->mv[0].y = (src_top  - t->by * 4) * 8;
 1345|       |
 1346|   733k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   733k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 733k]
  |  |  ------------------
  |  |   35|   733k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   733k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1347|      0|            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
 1348|      0|                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
 1349|      0|                   mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
 1350|   733k|        read_vartx_tree(t, b, bs, bx4, by4);
 1351|       |
 1352|       |        // reconstruction
 1353|   734k|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1353:13): [True: 734k, False: 18.4E]
  ------------------
 1354|   734k|            f->bd_fn.read_coef_blocks(t, bs, b);
 1355|   734k|            b->filter2d = FILTER_2D_BILINEAR;
 1356|  18.4E|        } else {
 1357|  18.4E|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1357:17): [True: 0, False: 18.4E]
  ------------------
 1358|  18.4E|        }
 1359|       |
 1360|   733k|        splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
 1361|   733k|        BlockContext *edge = t->a;
 1362|  2.20M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1362:36): [True: 1.46M, False: 734k]
  ------------------
 1363|  1.46M|#define set_ctx(rep_macro) \
 1364|  1.46M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1365|  1.46M|            rep_macro(edge->mode, off, DC_PRED); \
 1366|  1.46M|            rep_macro(edge->pal_sz, off, 0); \
 1367|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1368|  1.46M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1369|  1.46M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1370|  1.46M|            rep_macro(edge->skip_mode, off, 0); \
 1371|  1.46M|            rep_macro(edge->intra, off, 0); \
 1372|  1.46M|            rep_macro(edge->skip, off, b->skip)
 1373|  1.46M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  1.46M|    switch (var) { \
  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1364|   505k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   505k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   505k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   505k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   505k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   505k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   505k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   505k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   505k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   505k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   505k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 505k, False: 960k]
  |  |  ------------------
  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1364|   243k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   243k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   243k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   243k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   243k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   243k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   243k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   243k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   243k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   243k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   243k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 243k, False: 1.22M]
  |  |  ------------------
  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1364|   250k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   250k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   250k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   250k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   250k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   250k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   250k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   250k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   250k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|   250k|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|   250k|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 250k, False: 1.21M]
  |  |  ------------------
  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1364|  86.6k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  86.6k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  86.6k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  86.6k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  86.6k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  86.6k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  86.6k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  86.6k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  86.6k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|  86.6k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  86.6k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 86.6k, False: 1.37M]
  |  |  ------------------
  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1364|   324k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|   324k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|   324k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|   324k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|   324k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|   324k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|   324k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|   324k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|   324k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|   324k|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|   324k|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|   324k|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|   324k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 324k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 324k, False: 1.14M]
  |  |  ------------------
  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1364|  58.1k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1365|  58.1k|            rep_macro(edge->mode, off, DC_PRED); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1366|  58.1k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1367|  58.1k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1368|  58.1k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1369|  58.1k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1370|  58.1k|            rep_macro(edge->skip_mode, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1371|  58.1k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1372|  58.1k|            rep_macro(edge->skip, off, b->skip)
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  58.1k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  58.1k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  58.1k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  58.1k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 58.1k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 58.1k, False: 1.40M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 1.46M]
  |  |  ------------------
  |  |   78|  1.46M|    }
  ------------------
  |  Branch (1373:13): [Folded, False: 0]
  ------------------
 1374|  1.46M|#undef set_ctx
 1375|  1.46M|        }
 1376|   734k|        if (has_chroma) {
  ------------------
  |  Branch (1376:13): [True: 685k, False: 48.4k]
  ------------------
 1377|   685k|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1378|   685k|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1379|   685k|        }
 1380|  3.23M|    } else {
 1381|       |        // inter-specific mode/mv coding
 1382|  3.23M|        int is_comp, has_subpel_filter;
 1383|       |
 1384|  3.23M|        if (b->skip_mode) {
  ------------------
  |  Branch (1384:13): [True: 30.9k, False: 3.20M]
  ------------------
 1385|  30.9k|            is_comp = 1;
 1386|  3.20M|        } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
  ------------------
  |  Branch (1386:21): [True: 1.70M, False: 1.50M]
  |  Branch (1386:30): [True: 1.27M, False: 225k]
  |  Branch (1386:48): [True: 257k, False: 1.01M]
  |  Branch (1386:66): [True: 234k, False: 23.3k]
  ------------------
 1387|  1.98M|                   f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
  ------------------
  |  Branch (1387:20): [True: 1.00M, False: 977k]
  |  Branch (1387:58): [True: 759k, False: 245k]
  ------------------
 1388|   759k|        {
 1389|   759k|            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
 1390|   759k|                                         have_top, have_left);
 1391|   759k|            is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   759k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1392|   759k|                          ts->cdf.m.comp[ctx]);
 1393|   759k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   759k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 759k]
  |  |  ------------------
  |  |   35|   759k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   759k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1394|      0|                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
 1395|  2.44M|        } else {
 1396|  2.44M|            is_comp = 0;
 1397|  2.44M|        }
 1398|       |
 1399|  3.23M|        if (b->skip_mode) {
  ------------------
  |  Branch (1399:13): [True: 30.9k, False: 3.20M]
  ------------------
 1400|  30.9k|            b->ref[0] = f->frame_hdr->skip_mode_refs[0];
 1401|  30.9k|            b->ref[1] = f->frame_hdr->skip_mode_refs[1];
 1402|  30.9k|            b->comp_type = COMP_INTER_AVG;
 1403|  30.9k|            b->inter_mode = NEARESTMV_NEARESTMV;
 1404|  30.9k|            b->drl_idx = NEAREST_DRL;
 1405|  30.9k|            has_subpel_filter = 0;
 1406|       |
 1407|  30.9k|            refmvs_candidate mvstack[8];
 1408|  30.9k|            int n_mvs, ctx;
 1409|  30.9k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1410|  30.9k|                              (union refmvs_refpair) { .ref = {
 1411|  30.9k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1412|  30.9k|                              bs, intra_edge_flags, t->by, t->bx);
 1413|       |
 1414|  30.9k|            b->mv[0] = mvstack[0].mv.mv[0];
 1415|  30.9k|            b->mv[1] = mvstack[0].mv.mv[1];
 1416|  30.9k|            fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1417|  30.9k|            fix_mv_precision(f->frame_hdr, &b->mv[1]);
 1418|  30.9k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  30.9k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 30.9k]
  |  |  ------------------
  |  |   35|  30.9k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  30.9k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1419|      0|                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
 1420|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1421|      0|                       b->ref[0], b->ref[1]);
 1422|  3.20M|        } else if (is_comp) {
  ------------------
  |  Branch (1422:20): [True: 312k, False: 2.89M]
  ------------------
 1423|   312k|            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
 1424|   312k|                                                 have_top, have_left);
 1425|   312k|            if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   312k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1425:17): [True: 268k, False: 44.2k]
  ------------------
 1426|   312k|                    ts->cdf.m.comp_dir[dir_ctx]))
 1427|   268k|            {
 1428|       |                // bidir - first reference (fw)
 1429|   268k|                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
 1430|   268k|                                                     have_top, have_left);
 1431|   268k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   268k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1431:21): [True: 88.4k, False: 179k]
  ------------------
 1432|   268k|                        ts->cdf.m.comp_fwd_ref[0][ctx1]))
 1433|  88.4k|                {
 1434|  88.4k|                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
 1435|  88.4k|                                                           have_top, have_left);
 1436|  88.4k|                    b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  88.4k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1437|  88.4k|                                        ts->cdf.m.comp_fwd_ref[2][ctx2]);
 1438|   179k|                } else {
 1439|   179k|                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1440|   179k|                                                           have_top, have_left);
 1441|   179k|                    b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   179k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1442|   179k|                                    ts->cdf.m.comp_fwd_ref[1][ctx2]);
 1443|   179k|                }
 1444|       |
 1445|       |                // second reference (bw)
 1446|   268k|                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
 1447|   268k|                                                     have_top, have_left);
 1448|   268k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   268k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1448:21): [True: 151k, False: 116k]
  ------------------
 1449|   268k|                        ts->cdf.m.comp_bwd_ref[0][ctx3]))
 1450|   151k|                {
 1451|   151k|                    b->ref[1] = 6;
 1452|   151k|                } else {
 1453|   116k|                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
 1454|   116k|                                                           have_top, have_left);
 1455|   116k|                    b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   116k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1456|   116k|                                        ts->cdf.m.comp_bwd_ref[1][ctx4]);
 1457|   116k|                }
 1458|   268k|            } else {
 1459|       |                // unidir
 1460|  44.2k|                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  280|  44.2k|#define av1_get_uni_p_ctx av1_get_ref_ctx
  ------------------
 1461|  44.2k|                                                     have_top, have_left);
 1462|  44.2k|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  44.2k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1462:21): [True: 11.2k, False: 32.9k]
  ------------------
 1463|  44.2k|                        ts->cdf.m.comp_uni_ref[0][uctx_p]))
 1464|  11.2k|                {
 1465|  11.2k|                    b->ref[0] = 4;
 1466|  11.2k|                    b->ref[1] = 6;
 1467|  32.9k|                } else {
 1468|  32.9k|                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
 1469|  32.9k|                                                           have_top, have_left);
 1470|  32.9k|                    b->ref[0] = 0;
 1471|  32.9k|                    b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  32.9k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1472|  32.9k|                                        ts->cdf.m.comp_uni_ref[1][uctx_p1]);
 1473|  32.9k|                    if (b->ref[1] == 2) {
  ------------------
  |  Branch (1473:25): [True: 20.5k, False: 12.3k]
  ------------------
 1474|  20.5k|                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  281|  20.5k|#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1475|  20.5k|                                                               have_top, have_left);
 1476|  20.5k|                        b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  20.5k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1477|  20.5k|                                         ts->cdf.m.comp_uni_ref[2][uctx_p2]);
 1478|  20.5k|                    }
 1479|  32.9k|                }
 1480|  44.2k|            }
 1481|   312k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   312k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 312k]
  |  |  ------------------
  |  |   35|   312k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   312k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1482|      0|                printf("Post-refs[%d/%d]: r=%d\n",
 1483|      0|                       b->ref[0], b->ref[1], ts->msac.rng);
 1484|       |
 1485|   312k|            refmvs_candidate mvstack[8];
 1486|   312k|            int n_mvs, ctx;
 1487|   312k|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1488|   312k|                              (union refmvs_refpair) { .ref = {
 1489|   312k|                                    b->ref[0] + 1, b->ref[1] + 1 }},
 1490|   312k|                              bs, intra_edge_flags, t->by, t->bx);
 1491|       |
 1492|   312k|            b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   312k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 1493|   312k|                                ts->cdf.m.comp_inter_mode[ctx],
 1494|   312k|                                N_COMP_INTER_PRED_MODES - 1);
 1495|   312k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   312k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 312k]
  |  |  ------------------
  |  |   35|   312k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   312k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1496|      0|                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
 1497|      0|                       b->inter_mode, ctx, n_mvs, ts->msac.rng);
 1498|       |
 1499|   312k|            const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
 1500|   312k|            b->drl_idx = NEAREST_DRL;
 1501|   312k|            if (b->inter_mode == NEWMV_NEWMV) {
  ------------------
  |  Branch (1501:17): [True: 61.4k, False: 250k]
  ------------------
 1502|  61.4k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1502:21): [True: 61.4k, False: 4]
  ------------------
 1503|  61.4k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1504|  61.4k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  61.4k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1505|  61.4k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1506|  61.4k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
  ------------------
  |  Branch (1506:25): [True: 40.6k, False: 20.7k]
  |  Branch (1506:53): [True: 9.43k, False: 31.2k]
  ------------------
 1507|  9.43k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1508|  9.43k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  9.43k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1509|  9.43k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1510|  9.43k|                    }
 1511|  61.4k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  61.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 61.4k]
  |  |  ------------------
  |  |   35|  61.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  61.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1512|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1513|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1514|  61.4k|                }
 1515|   250k|            } else if (im[0] == NEARMV || im[1] == NEARMV) {
  ------------------
  |  Branch (1515:24): [True: 60.3k, False: 190k]
  |  Branch (1515:43): [True: 8.98k, False: 181k]
  ------------------
 1516|  69.4k|                b->drl_idx = NEARER_DRL;
 1517|  69.4k|                if (n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1517:21): [True: 20.3k, False: 49.1k]
  ------------------
 1518|  20.3k|                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1519|  20.3k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  20.3k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1520|  20.3k|                                      ts->cdf.m.drl_bit[drl_ctx_v2]);
 1521|  20.3k|                    if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
  ------------------
  |  Branch (1521:25): [True: 9.10k, False: 11.2k]
  |  Branch (1521:51): [True: 6.48k, False: 2.61k]
  ------------------
 1522|  6.48k|                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
 1523|  6.48k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  6.48k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1524|  6.48k|                                          ts->cdf.m.drl_bit[drl_ctx_v3]);
 1525|  6.48k|                    }
 1526|  20.3k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  20.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 20.3k]
  |  |  ------------------
  |  |   35|  20.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  20.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1527|      0|                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
 1528|      0|                               b->drl_idx, n_mvs, ts->msac.rng);
 1529|  20.3k|                }
 1530|  69.4k|            }
 1531|   312k|            assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1531:13): [True: 312k, False: 183]
  |  Branch (1531:13): [True: 312k, False: 18.4E]
  ------------------
 1532|       |
 1533|   312k|#define assign_comp_mv(idx) \
 1534|   312k|            switch (im[idx]) { \
 1535|   312k|            case NEARMV: \
 1536|   312k|            case NEARESTMV: \
 1537|   312k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1538|   312k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
 1539|   312k|                break; \
 1540|   312k|            case GLOBALMV: \
 1541|   312k|                has_subpel_filter |= \
 1542|   312k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
 1543|   312k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
 1544|   312k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
 1545|   312k|                break; \
 1546|   312k|            case NEWMV: \
 1547|   312k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
 1548|   312k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
 1549|   312k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
 1550|   312k|                break; \
 1551|   312k|            }
 1552|   312k|            has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1552:33): [True: 55, False: 312k]
  ------------------
 1553|   312k|                                b->inter_mode != GLOBALMV_GLOBALMV;
  ------------------
  |  Branch (1553:33): [True: 279k, False: 32.3k]
  ------------------
 1554|   312k|            assign_comp_mv(0);
  ------------------
  |  | 1534|   312k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 312k, False: 18.4E]
  |  |  ------------------
  |  | 1535|  60.4k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 60.4k, False: 251k]
  |  |  ------------------
  |  | 1536|   193k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 133k, False: 179k]
  |  |  ------------------
  |  | 1537|   193k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   193k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   193k|                break; \
  |  | 1540|  60.4k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 32.3k, False: 279k]
  |  |  ------------------
  |  | 1541|  32.3k|                has_subpel_filter |= \
  |  | 1542|  32.3k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  32.3k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  32.3k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  32.3k|                break; \
  |  | 1546|  86.4k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 86.4k, False: 225k]
  |  |  ------------------
  |  | 1547|  86.4k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  86.4k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  86.4k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  86.4k|                break; \
  |  | 1551|   312k|            }
  ------------------
 1555|   312k|            assign_comp_mv(1);
  ------------------
  |  | 1534|   312k|            switch (im[idx]) { \
  |  |  ------------------
  |  |  |  Branch (1534:21): [True: 312k, False: 18.4E]
  |  |  ------------------
  |  | 1535|  58.3k|            case NEARMV: \
  |  |  ------------------
  |  |  |  Branch (1535:13): [True: 58.3k, False: 253k]
  |  |  ------------------
  |  | 1536|   187k|            case NEARESTMV: \
  |  |  ------------------
  |  |  |  Branch (1536:13): [True: 129k, False: 183k]
  |  |  ------------------
  |  | 1537|   187k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1538|   187k|                fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
  |  | 1539|   187k|                break; \
  |  | 1540|  58.3k|            case GLOBALMV: \
  |  |  ------------------
  |  |  |  Branch (1540:13): [True: 32.3k, False: 279k]
  |  |  ------------------
  |  | 1541|  32.3k|                has_subpel_filter |= \
  |  | 1542|  32.3k|                    f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
  |  | 1543|  32.3k|                b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
  |  | 1544|  32.3k|                                        t->bx, t->by, bw4, bh4, f->frame_hdr); \
  |  | 1545|  32.3k|                break; \
  |  | 1546|  92.4k|            case NEWMV: \
  |  |  ------------------
  |  |  |  Branch (1546:13): [True: 92.4k, False: 219k]
  |  |  ------------------
  |  | 1547|  92.4k|                b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
  |  | 1548|  92.4k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
  |  | 1549|  92.4k|                read_mv_residual(ts, &b->mv[idx], mv_prec); \
  |  | 1550|  92.4k|                break; \
  |  | 1551|   312k|            }
  ------------------
 1556|   312k|#undef assign_comp_mv
 1557|   312k|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   312k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 312k]
  |  |  ------------------
  |  |   35|   312k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   312k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1558|      0|                printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
 1559|      0|                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
 1560|      0|                       ts->msac.rng);
 1561|       |
 1562|       |            // jnt_comp vs. seg vs. wedge
 1563|   312k|            int is_segwedge = 0;
 1564|   312k|            if (f->seq_hdr->masked_compound) {
  ------------------
  |  Branch (1564:17): [True: 236k, False: 76.1k]
  ------------------
 1565|   236k|                const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
 1566|       |
 1567|   236k|                is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   236k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1568|   236k|                                  ts->cdf.m.mask_comp[mask_ctx]);
 1569|   236k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   236k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 236k]
  |  |  ------------------
  |  |   35|   236k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   236k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1570|      0|                    printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
 1571|      0|                           is_segwedge, mask_ctx, ts->msac.rng);
 1572|   236k|            }
 1573|       |
 1574|   312k|            if (!is_segwedge) {
  ------------------
  |  Branch (1574:17): [True: 246k, False: 66.2k]
  ------------------
 1575|   246k|                if (f->seq_hdr->jnt_comp) {
  ------------------
  |  Branch (1575:21): [True: 167k, False: 78.3k]
  ------------------
 1576|   167k|                    const int jnt_ctx =
 1577|   167k|                        get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
 1578|   167k|                                         f->cur.frame_hdr->frame_offset,
 1579|   167k|                                         f->refp[b->ref[0]].p.frame_hdr->frame_offset,
 1580|   167k|                                         f->refp[b->ref[1]].p.frame_hdr->frame_offset,
 1581|   167k|                                         t->a, &t->l, by4, bx4);
 1582|   167k|                    b->comp_type = COMP_INTER_WEIGHTED_AVG +
 1583|   167k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   167k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1584|   167k|                                       ts->cdf.m.jnt_comp[jnt_ctx]);
 1585|   167k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   167k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 167k]
  |  |  ------------------
  |  |   35|   167k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   167k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1586|      0|                        printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
 1587|      0|                               b->comp_type == COMP_INTER_AVG,
 1588|      0|                               jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
 1589|      0|                               t->l.comp_type[by4], t->l.ref[0][by4],
 1590|      0|                               ts->msac.rng);
 1591|   167k|                } else {
 1592|  78.3k|                    b->comp_type = COMP_INTER_AVG;
 1593|  78.3k|                }
 1594|   246k|            } else {
 1595|  66.2k|                if (wedge_allowed_mask & (1 << bs)) {
  ------------------
  |  Branch (1595:21): [True: 52.0k, False: 14.2k]
  ------------------
 1596|  52.0k|                    const int ctx = dav1d_wedge_ctx_lut[bs];
 1597|  52.0k|                    b->comp_type = COMP_INTER_WEDGE -
 1598|  52.0k|                                   dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  52.0k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1599|  52.0k|                                       ts->cdf.m.wedge_comp[ctx]);
 1600|  52.0k|                    if (b->comp_type == COMP_INTER_WEDGE)
  ------------------
  |  Branch (1600:25): [True: 21.4k, False: 30.5k]
  ------------------
 1601|  21.4k|                        b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  21.4k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1602|  52.0k|                                           ts->cdf.m.wedge_idx[ctx], 15);
 1603|  52.0k|                } else {
 1604|  14.2k|                    b->comp_type = COMP_INTER_SEG;
 1605|  14.2k|                }
 1606|  66.2k|                b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  66.2k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
 1607|  66.2k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  66.2k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 66.2k]
  |  |  ------------------
  |  |   35|  66.2k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  66.2k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1608|      0|                    printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
 1609|      0|                           b->comp_type == COMP_INTER_WEDGE,
 1610|      0|                           b->wedge_idx, b->mask_sign, ts->msac.rng);
 1611|  66.2k|            }
 1612|  2.89M|        } else {
 1613|  2.89M|            b->comp_type = COMP_INTER_NONE;
 1614|       |
 1615|       |            // ref
 1616|  2.89M|            if (seg && seg->ref > 0) {
  ------------------
  |  Branch (1616:17): [True: 1.43M, False: 1.45M]
  |  Branch (1616:24): [True: 225k, False: 1.20M]
  ------------------
 1617|   225k|                b->ref[0] = seg->ref - 1;
 1618|  2.66M|            } else if (seg && (seg->globalmv || seg->skip)) {
  ------------------
  |  Branch (1618:24): [True: 1.20M, False: 1.45M]
  |  Branch (1618:32): [True: 1.01M, False: 186k]
  |  Branch (1618:49): [True: 23.3k, False: 163k]
  ------------------
 1619|  1.04M|                b->ref[0] = 0;
 1620|  1.62M|            } else {
 1621|  1.62M|                const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
 1622|  1.62M|                                                 have_top, have_left);
 1623|  1.62M|                if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.62M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1623:21): [True: 473k, False: 1.14M]
  ------------------
 1624|  1.62M|                                                 ts->cdf.m.ref[0][ctx1]))
 1625|   473k|                {
 1626|   473k|                    const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  275|   473k|#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx
  ------------------
 1627|   473k|                                                       have_top, have_left);
 1628|   473k|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   473k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1628:25): [True: 335k, False: 137k]
  ------------------
 1629|   473k|                                                     ts->cdf.m.ref[1][ctx2]))
 1630|   335k|                    {
 1631|   335k|                        b->ref[0] = 6;
 1632|   335k|                    } else {
 1633|   137k|                        const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  279|   137k|#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx
  ------------------
 1634|   137k|                                                           have_top, have_left);
 1635|   137k|                        b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   137k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1636|   137k|                                            ts->cdf.m.ref[5][ctx3]);
 1637|   137k|                    }
 1638|  1.14M|                } else {
 1639|  1.14M|                    const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  276|  1.14M|#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx
  ------------------
 1640|  1.14M|                                                       have_top, have_left);
 1641|  1.14M|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.14M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1641:25): [True: 147k, False: 1.00M]
  ------------------
 1642|  1.14M|                                                     ts->cdf.m.ref[2][ctx2]))
 1643|   147k|                    {
 1644|   147k|                        const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  278|   147k|#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx
  ------------------
 1645|   147k|                                                           have_top, have_left);
 1646|   147k|                        b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   147k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1647|   147k|                                            ts->cdf.m.ref[4][ctx3]);
 1648|  1.00M|                    } else {
 1649|  1.00M|                        const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
  ------------------
  |  |  277|  1.00M|#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx
  ------------------
 1650|  1.00M|                                                           have_top, have_left);
 1651|  1.00M|                        b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.00M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1652|  1.00M|                                        ts->cdf.m.ref[3][ctx3]);
 1653|  1.00M|                    }
 1654|  1.14M|                }
 1655|  1.62M|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.62M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.62M]
  |  |  ------------------
  |  |   35|  1.62M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.62M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1656|      0|                    printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
 1657|  1.62M|            }
 1658|  2.89M|            b->ref[1] = -1;
 1659|       |
 1660|  2.89M|            refmvs_candidate mvstack[8];
 1661|  2.89M|            int n_mvs, ctx;
 1662|  2.89M|            dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
 1663|  2.89M|                              (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
 1664|  2.89M|                              bs, intra_edge_flags, t->by, t->bx);
 1665|       |
 1666|       |            // mode parsing and mv derivation from ref_mvs
 1667|  2.89M|            if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1667:18): [True: 1.43M, False: 1.45M]
  |  Branch (1667:26): [True: 1.25M, False: 176k]
  |  Branch (1667:39): [True: 7.50k, False: 168k]
  ------------------
 1668|  1.67M|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.67M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1668:17): [True: 1.10M, False: 573k]
  ------------------
 1669|  1.67M|                                             ts->cdf.m.newmv_mode[ctx & 7]))
 1670|  2.36M|            {
 1671|  2.36M|                if ((seg && (seg->skip || seg->globalmv)) ||
  ------------------
  |  Branch (1671:22): [True: 1.36M, False: 1.00M]
  |  Branch (1671:30): [True: 1.25M, False: 108k]
  |  Branch (1671:43): [True: 7.51k, False: 101k]
  ------------------
 1672|  1.10M|                    !dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.10M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1672:21): [True: 95.0k, False: 1.00M]
  ------------------
 1673|  1.10M|                         ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
 1674|  1.35M|                {
 1675|  1.35M|                    b->inter_mode = GLOBALMV;
 1676|  1.35M|                    b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
 1677|  1.35M|                                          t->bx, t->by, bw4, bh4, f->frame_hdr);
 1678|  1.35M|                    has_subpel_filter = imin(bw4, bh4) == 1 ||
  ------------------
  |  Branch (1678:41): [True: 118k, False: 1.24M]
  ------------------
 1679|  1.24M|                        f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
  ------------------
  |  Branch (1679:25): [True: 24.1k, False: 1.21M]
  ------------------
 1680|  1.35M|                } else {
 1681|  1.00M|                    has_subpel_filter = 1;
 1682|  1.00M|                    if (dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  1.00M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1682:25): [True: 378k, False: 627k]
  ------------------
 1683|  1.00M|                            ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
 1684|   378k|                    { // NEAREST, NEARER, NEAR or NEARISH
 1685|   378k|                        b->inter_mode = NEARMV;
 1686|   378k|                        b->drl_idx = NEARER_DRL;
 1687|   378k|                        if (n_mvs > 2) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1687:29): [True: 170k, False: 208k]
  ------------------
 1688|   170k|                            const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1689|   170k|                            b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   170k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1690|   170k|                                              ts->cdf.m.drl_bit[drl_ctx_v2]);
 1691|   170k|                            if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
  ------------------
  |  Branch (1691:33): [True: 81.8k, False: 88.2k]
  |  Branch (1691:59): [True: 50.1k, False: 31.7k]
  ------------------
 1692|  50.1k|                                const int drl_ctx_v3 =
 1693|  50.1k|                                    get_drl_context(mvstack, 2);
 1694|  50.1k|                                b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  50.1k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1695|  50.1k|                                                  ts->cdf.m.drl_bit[drl_ctx_v3]);
 1696|  50.1k|                            }
 1697|   170k|                        }
 1698|   627k|                    } else {
 1699|   627k|                        b->inter_mode = NEARESTMV;
 1700|   627k|                        b->drl_idx = NEAREST_DRL;
 1701|   627k|                    }
 1702|  1.00M|                    assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1702:21): [True: 1.00M, False: 18.4E]
  |  Branch (1702:21): [True: 1.00M, False: 18.4E]
  ------------------
 1703|  1.00M|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1704|  1.00M|                    if (b->drl_idx < NEAR_DRL)
  ------------------
  |  Branch (1704:25): [True: 926k, False: 81.0k]
  ------------------
 1705|   926k|                        fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1706|  1.00M|                }
 1707|       |
 1708|  2.36M|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.36M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.36M]
  |  |  ------------------
  |  |   35|  2.36M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.36M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1709|      0|                    printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
 1710|      0|                           b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
 1711|      0|                           ts->msac.rng);
 1712|  2.36M|            } else {
 1713|   524k|                has_subpel_filter = 1;
 1714|   524k|                b->inter_mode = NEWMV;
 1715|   524k|                b->drl_idx = NEAREST_DRL;
 1716|   524k|                if (n_mvs > 1) { // NEARER, NEAR or NEARISH
  ------------------
  |  Branch (1716:21): [True: 456k, False: 68.4k]
  ------------------
 1717|   456k|                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
 1718|   456k|                    b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   456k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1719|   456k|                                      ts->cdf.m.drl_bit[drl_ctx_v1]);
 1720|   456k|                    if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
  ------------------
  |  Branch (1720:25): [True: 167k, False: 288k]
  |  Branch (1720:53): [True: 106k, False: 60.6k]
  ------------------
 1721|   106k|                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
 1722|   106k|                        b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   106k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1723|   106k|                                          ts->cdf.m.drl_bit[drl_ctx_v2]);
 1724|   106k|                    }
 1725|   456k|                }
 1726|   524k|                assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
  ------------------
  |  Branch (1726:17): [True: 573k, False: 18.4E]
  |  Branch (1726:17): [True: 573k, False: 18.4E]
  ------------------
 1727|   573k|                if (n_mvs > 1) {
  ------------------
  |  Branch (1727:21): [True: 456k, False: 117k]
  ------------------
 1728|   456k|                    b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
 1729|   456k|                } else {
 1730|   117k|                    assert(!b->drl_idx);
  ------------------
  |  Branch (1730:21): [True: 117k, False: 18.4E]
  ------------------
 1731|   117k|                    b->mv[0] = mvstack[0].mv.mv[0];
 1732|   117k|                    fix_mv_precision(f->frame_hdr, &b->mv[0]);
 1733|   117k|                }
 1734|   573k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   573k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 573k]
  |  |  ------------------
  |  |   35|   573k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   573k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1735|      0|                    printf("Post-intermode[%d,drl=%d]: r=%d\n",
 1736|      0|                           b->inter_mode, b->drl_idx, ts->msac.rng);
 1737|   573k|                const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
 1738|   573k|                read_mv_residual(ts, &b->mv[0], mv_prec);
 1739|   573k|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   573k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 573k]
  |  |  ------------------
  |  |   35|   573k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   573k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1740|      0|                    printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
 1741|      0|                           b->mv[0].y, b->mv[0].x, ts->msac.rng);
 1742|   573k|            }
 1743|       |
 1744|       |            // interintra flags
 1745|  2.94M|            const int ii_sz_grp = dav1d_ymode_size_context[bs];
 1746|  2.94M|            if (f->seq_hdr->inter_intra &&
  ------------------
  |  Branch (1746:17): [True: 2.54M, False: 393k]
  ------------------
 1747|  2.54M|                interintra_allowed_mask & (1 << bs) &&
  ------------------
  |  Branch (1747:17): [True: 848k, False: 1.69M]
  ------------------
 1748|   848k|                dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   848k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  |  Branch (1748:17): [True: 142k, False: 705k]
  ------------------
 1749|   848k|                                             ts->cdf.m.interintra[ii_sz_grp]))
 1750|   142k|            {
 1751|   142k|                b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   142k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1752|   142k|                                         ts->cdf.m.interintra_mode[ii_sz_grp],
 1753|   142k|                                         N_INTER_INTRA_PRED_MODES - 1);
 1754|   142k|                const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
 1755|   142k|                b->interintra_type = INTER_INTRA_BLEND +
 1756|   142k|                                     dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   142k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1757|   142k|                                         ts->cdf.m.interintra_wedge[wedge_ctx]);
 1758|   142k|                if (b->interintra_type == INTER_INTRA_WEDGE)
  ------------------
  |  Branch (1758:21): [True: 40.2k, False: 102k]
  ------------------
 1759|  40.2k|                    b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|  40.2k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
 1760|   142k|                                       ts->cdf.m.wedge_idx[wedge_ctx], 15);
 1761|  2.79M|            } else {
 1762|  2.79M|                b->interintra_type = INTER_INTRA_NONE;
 1763|  2.79M|            }
 1764|  2.94M|            if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
  ------------------
  |  |   34|  2.94M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.94M]
  |  |  ------------------
  |  |   35|  2.94M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.94M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1764:37): [True: 0, False: 0]
  ------------------
 1765|      0|                interintra_allowed_mask & (1 << bs))
  ------------------
  |  Branch (1765:17): [True: 0, False: 0]
  ------------------
 1766|      0|            {
 1767|      0|                printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
 1768|      0|                       b->interintra_type, b->interintra_mode,
 1769|      0|                       b->wedge_idx, ts->msac.rng);
 1770|      0|            }
 1771|       |
 1772|       |            // motion variation
 1773|  2.94M|            if (f->frame_hdr->switchable_motion_mode &&
  ------------------
  |  Branch (1773:17): [True: 2.74M, False: 195k]
  ------------------
 1774|  2.74M|                b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
  ------------------
  |  Branch (1774:17): [True: 2.60M, False: 136k]
  |  Branch (1774:59): [True: 2.02M, False: 579k]
  ------------------
 1775|       |                // is not warped global motion
 1776|  2.02M|                !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
  ------------------
  |  Branch (1776:19): [True: 1.97M, False: 55.8k]
  |  Branch (1776:54): [True: 1.10M, False: 866k]
  ------------------
 1777|  1.10M|                  f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
  ------------------
  |  Branch (1777:19): [True: 542k, False: 565k]
  ------------------
 1778|       |                // has overlappable neighbours
 1779|  1.48M|                ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
  ------------------
  |  Branch (1779:19): [True: 1.10M, False: 386k]
  |  Branch (1779:32): [True: 1.02M, False: 75.7k]
  ------------------
 1780|   460k|                 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
  ------------------
  |  Branch (1780:19): [True: 448k, False: 11.7k]
  |  Branch (1780:31): [True: 428k, False: 20.5k]
  ------------------
 1781|  1.45M|            {
 1782|       |                // reaching here means the block allows obmc - check warp by
 1783|       |                // finding matching-ref blocks in top/left edges
 1784|  1.45M|                uint64_t mask[2] = { 0, 0 };
 1785|  1.45M|                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
 1786|  1.45M|                                  have_left, have_top, b->ref[0], mask);
 1787|  1.45M|                const int allow_warp = !f->svc[b->ref[0]][0].scale &&
  ------------------
  |  Branch (1787:40): [True: 1.27M, False: 180k]
  ------------------
 1788|  1.27M|                    !f->frame_hdr->force_integer_mv &&
  ------------------
  |  Branch (1788:21): [True: 1.24M, False: 29.4k]
  ------------------
 1789|  1.24M|                    f->frame_hdr->warp_motion && (mask[0] | mask[1]);
  ------------------
  |  Branch (1789:21): [True: 691k, False: 552k]
  |  Branch (1789:50): [True: 621k, False: 69.7k]
  ------------------
 1790|       |
 1791|  1.45M|                b->motion_mode = allow_warp ?
  ------------------
  |  Branch (1791:34): [True: 621k, False: 832k]
  ------------------
 1792|   621k|                    dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   621k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1793|   621k|                        ts->cdf.m.motion_mode[bs], 2) :
 1794|  1.45M|                    dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
  ------------------
  |  |   52|   832k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 1795|  1.45M|                if (b->motion_mode == MM_WARP) {
  ------------------
  |  Branch (1795:21): [True: 169k, False: 1.28M]
  ------------------
 1796|   169k|                    has_subpel_filter = 0;
 1797|   169k|                    derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
 1798|   169k|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
 1799|   169k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   169k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 169k]
  |  |  ------------------
  |  |   35|   169k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   169k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1800|      0|                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
 1801|      0|                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
 1802|      0|                               "mv=y:%d,x:%d\n",
 1803|      0|                               signabs(t->warpmv.matrix[0]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1804|      0|                               signabs(t->warpmv.matrix[1]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1805|      0|                               signabs(t->warpmv.matrix[2]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1806|      0|                               signabs(t->warpmv.matrix[3]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1807|      0|                               signabs(t->warpmv.matrix[4]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1808|      0|                               signabs(t->warpmv.matrix[5]),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1809|      0|                               signabs(t->warpmv.u.p.alpha),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1810|      0|                               signabs(t->warpmv.u.p.beta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1811|      0|                               signabs(t->warpmv.u.p.gamma),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1812|      0|                               signabs(t->warpmv.u.p.delta),
  ------------------
  |  | 1798|      0|#define signabs(v) v < 0 ? '-' : ' ', abs(v)
  |  |  ------------------
  |  |  |  Branch (1798:20): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1813|      0|                               b->mv[0].y, b->mv[0].x);
 1814|   169k|#undef signabs
 1815|   169k|                    if (t->frame_thread.pass) {
  ------------------
  |  Branch (1815:25): [True: 169k, False: 169]
  ------------------
 1816|   169k|                        if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1816:29): [True: 155k, False: 14.1k]
  ------------------
 1817|   155k|                            b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
 1818|   155k|                            b->matrix[1] = t->warpmv.matrix[3];
 1819|   155k|                            b->matrix[2] = t->warpmv.matrix[4];
 1820|   155k|                            b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
 1821|   155k|                        } else {
 1822|  14.1k|                            b->matrix[0] = INT16_MIN;
 1823|  14.1k|                        }
 1824|   169k|                    }
 1825|   169k|                }
 1826|       |
 1827|  1.45M|                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  1.45M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.45M]
  |  |  ------------------
  |  |   35|  1.45M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.45M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1828|      0|                    printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%"
 1829|      0|                           PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0],
 1830|      0|                            mask[1]);
 1831|  1.48M|            } else {
 1832|  1.48M|                b->motion_mode = MM_TRANSLATION;
 1833|  1.48M|            }
 1834|  2.94M|        }
 1835|       |
 1836|       |        // subpel filter
 1837|  3.28M|        enum Dav1dFilterMode filter[2];
 1838|  3.28M|        if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
  ------------------
  |  Branch (1838:13): [True: 1.96M, False: 1.31M]
  ------------------
 1839|  1.96M|            if (has_subpel_filter) {
  ------------------
  |  Branch (1839:17): [True: 984k, False: 981k]
  ------------------
 1840|   984k|                const int comp = b->comp_type != COMP_INTER_NONE;
 1841|   984k|                const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
 1842|   984k|                                                by4, bx4);
 1843|   984k|                filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   984k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1844|   984k|                               ts->cdf.m.filter[0][ctx1],
 1845|   984k|                               DAV1D_N_SWITCHABLE_FILTERS - 1);
 1846|   984k|                if (f->seq_hdr->dual_filter) {
  ------------------
  |  Branch (1846:21): [True: 548k, False: 436k]
  ------------------
 1847|   548k|                    const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
 1848|   548k|                                                    b->ref[0], by4, bx4);
 1849|   548k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   548k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 548k]
  |  |  ------------------
  |  |   35|   548k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   548k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1850|      0|                        printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
 1851|      0|                               filter[0], ctx1, ts->msac.rng);
 1852|   548k|                    filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   548k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 1853|   548k|                                    ts->cdf.m.filter[1][ctx2],
 1854|   548k|                                    DAV1D_N_SWITCHABLE_FILTERS - 1);
 1855|   548k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   548k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 548k]
  |  |  ------------------
  |  |   35|   548k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   548k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1856|      0|                        printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
 1857|      0|                               filter[1], ctx2, ts->msac.rng);
 1858|   548k|                } else {
 1859|   436k|                    filter[1] = filter[0];
 1860|   436k|                    if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   436k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 436k]
  |  |  ------------------
  |  |   35|   436k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   436k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1861|      0|                        printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
 1862|      0|                               filter[0], ctx1, ts->msac.rng);
 1863|   436k|                }
 1864|   984k|            } else {
 1865|   981k|                filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
 1866|   981k|            }
 1867|  1.96M|        } else {
 1868|  1.31M|            filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
 1869|  1.31M|        }
 1870|  3.28M|        b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
 1871|       |
 1872|  3.28M|        read_vartx_tree(t, b, bs, bx4, by4);
 1873|       |
 1874|       |        // reconstruction
 1875|  3.28M|        if (t->frame_thread.pass == 1) {
  ------------------
  |  Branch (1875:13): [True: 3.28M, False: 570]
  ------------------
 1876|  3.28M|            f->bd_fn.read_coef_blocks(t, bs, b);
 1877|  3.28M|        } else {
 1878|    570|            if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
  ------------------
  |  Branch (1878:17): [True: 0, False: 570]
  ------------------
 1879|    570|        }
 1880|       |
 1881|  3.28M|        if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (1881:13): [True: 2.79M, False: 489k]
  ------------------
 1882|   489k|            f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (1882:13): [True: 200k, False: 288k]
  ------------------
 1883|  2.99M|        {
 1884|  2.99M|            const int is_globalmv =
 1885|  2.99M|                b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
  ------------------
  |  Branch (1885:35): [True: 270k, False: 2.72M]
  ------------------
 1886|  2.99M|            const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
 1887|  2.99M|                &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
 1888|  2.99M|            const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1889|  2.99M|            enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx;
 1890|  2.99M|            if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
  ------------------
  |  Branch (1890:17): [True: 26.5k, False: 2.96M]
  ------------------
 1891|  26.5k|                ytx  = (enum RectTxfmSize) TX_4X4;
 1892|  26.5k|                uvtx = (enum RectTxfmSize) TX_4X4;
 1893|  26.5k|            }
 1894|  2.99M|            dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
 1895|  2.99M|                                       t->bx, t->by, f->w4, f->h4, b->skip, bs,
 1896|  2.99M|                                       ytx, tx_split, uvtx, f->cur.p.layout,
 1897|  2.99M|                                       &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
 1898|  2.99M|                                       has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
  ------------------
  |  Branch (1898:40): [True: 1.26M, False: 1.72M]
  ------------------
 1899|  2.99M|                                       has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
  ------------------
  |  Branch (1899:40): [True: 1.26M, False: 1.72M]
  ------------------
 1900|  2.99M|        }
 1901|       |
 1902|       |        // context updates
 1903|  3.28M|        if (is_comp)
  ------------------
  |  Branch (1903:13): [True: 343k, False: 2.94M]
  ------------------
 1904|   343k|            splat_tworef_mv(f->c, t, bs, b, bw4, bh4);
 1905|  2.94M|        else
 1906|  2.94M|            splat_oneref_mv(f->c, t, bs, b, bw4, bh4);
 1907|  3.28M|        BlockContext *edge = t->a;
 1908|  9.82M|        for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) {
  ------------------
  |  Branch (1908:36): [True: 6.53M, False: 3.28M]
  ------------------
 1909|  6.53M|#define set_ctx(rep_macro) \
 1910|  6.53M|            rep_macro(edge->seg_pred, off, seg_pred); \
 1911|  6.53M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
 1912|  6.53M|            rep_macro(edge->intra, off, 0); \
 1913|  6.53M|            rep_macro(edge->skip, off, b->skip); \
 1914|  6.53M|            rep_macro(edge->pal_sz, off, 0); \
 1915|       |            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
 1916|  6.53M|            rep_macro(t->pal_sz_uv[i], off, 0); \
 1917|  6.53M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
 1918|  6.53M|            rep_macro(edge->comp_type, off, b->comp_type); \
 1919|  6.53M|            rep_macro(edge->filter[0], off, filter[0]); \
 1920|  6.53M|            rep_macro(edge->filter[1], off, filter[1]); \
 1921|  6.53M|            rep_macro(edge->mode, off, b->inter_mode); \
 1922|  6.53M|            rep_macro(edge->ref[0], off, b->ref[0]); \
 1923|  6.53M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
 1924|  6.53M|            case_set(b_dim[2 + i]);
  ------------------
  |  |   70|  6.53M|    switch (var) { \
  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1910|   783k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   783k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   783k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   783k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   783k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   783k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   783k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   783k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   783k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   783k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   783k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   783k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   783k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   783k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   783k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   783k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 783k, False: 5.75M]
  |  |  ------------------
  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1910|  1.52M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|  1.52M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|  1.52M|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|  1.52M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|  1.52M|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|  1.52M|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|  1.52M|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|  1.52M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|  1.52M|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|  1.52M|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|  1.52M|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|  1.52M|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|  1.52M|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|  1.52M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|  1.52M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.52M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 1.52M, False: 5.01M]
  |  |  ------------------
  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1910|  1.31M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|  1.31M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|  1.31M|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|  1.31M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|  1.31M|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|  1.31M|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|  1.31M|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|  1.31M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|  1.31M|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|  1.31M|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|  1.31M|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|  1.31M|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|  1.31M|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|  1.31M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.31M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.31M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 1.31M, False: 5.22M]
  |  |  ------------------
  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1910|   503k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   503k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   503k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   503k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   503k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   503k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   503k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   503k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   503k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   503k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   503k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   503k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   503k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   503k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   503k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   503k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 503k, False: 6.03M]
  |  |  ------------------
  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1910|  1.77M|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|  1.77M|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|  1.77M|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|  1.77M|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|  1.77M|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|  1.77M|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|  1.77M|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|  1.77M|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|  1.77M|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|  1.77M|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|  1.77M|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|  1.77M|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|  1.77M|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|  1.77M|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  1.77M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.77M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.77M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.77M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.77M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 1.77M, False: 4.76M]
  |  |  ------------------
  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1910|   639k|            rep_macro(edge->seg_pred, off, seg_pred); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1911|   639k|            rep_macro(edge->skip_mode, off, b->skip_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1912|   639k|            rep_macro(edge->intra, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1913|   639k|            rep_macro(edge->skip, off, b->skip); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1914|   639k|            rep_macro(edge->pal_sz, off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1915|   639k|            /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
  |  |  |  | 1916|   639k|            rep_macro(t->pal_sz_uv[i], off, 0); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1917|   639k|            rep_macro(edge->tx_intra, off, b_dim[2 + i]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1918|   639k|            rep_macro(edge->comp_type, off, b->comp_type); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1919|   639k|            rep_macro(edge->filter[0], off, filter[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1920|   639k|            rep_macro(edge->filter[1], off, filter[1]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1921|   639k|            rep_macro(edge->mode, off, b->inter_mode); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1922|   639k|            rep_macro(edge->ref[0], off, b->ref[0]); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1923|   639k|            rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1]))
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|   639k|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|   639k|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|   639k|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|   639k|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 639k]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 639k, False: 5.89M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 6.53M]
  |  |  ------------------
  |  |   78|  6.53M|    }
  ------------------
  |  Branch (1924:13): [Folded, False: 0]
  ------------------
 1925|  6.53M|#undef set_ctx
 1926|  6.53M|        }
 1927|  3.28M|        if (has_chroma) {
  ------------------
  |  Branch (1927:13): [True: 1.47M, False: 1.80M]
  ------------------
 1928|  1.47M|            dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED);
 1929|  1.47M|            dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED);
 1930|  1.47M|        }
 1931|  3.28M|    }
 1932|       |
 1933|       |    // update contexts
 1934|  9.05M|    if (f->frame_hdr->segmentation.enabled &&
  ------------------
  |  Branch (1934:9): [True: 2.65M, False: 6.39M]
  ------------------
 1935|  2.65M|        f->frame_hdr->segmentation.update_map)
  ------------------
  |  Branch (1935:9): [True: 1.97M, False: 680k]
  ------------------
 1936|  1.97M|    {
 1937|  1.97M|        uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
 1938|  1.97M|#define set_ctx(rep_macro) \
 1939|  1.97M|        for (int y = 0; y < bh4; y++) { \
 1940|  1.97M|            rep_macro(seg_ptr, 0, b->seg_id); \
 1941|  1.97M|            seg_ptr += f->b4_stride; \
 1942|  1.97M|        }
 1943|  1.97M|        case_set(b_dim[2]);
  ------------------
  |  |   70|  1.97M|    switch (var) { \
  |  |   71|   166k|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  | 1939|   502k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 336k, False: 166k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   336k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   71|   336k|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|   336k|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   336k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   336k|        }
  |  |  ------------------
  |  |  |  Branch (71:5): [True: 166k, False: 1.81M]
  |  |  ------------------
  |  |   72|   292k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  | 1939|  1.10M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 809k, False: 292k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   809k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   72|   809k|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|   809k|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   809k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   809k|        }
  |  |  ------------------
  |  |  |  Branch (72:5): [True: 292k, False: 1.68M]
  |  |  ------------------
  |  |   73|   362k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  | 1939|  2.11M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 1.75M, False: 362k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  1.75M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   73|  1.75M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.75M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  1.75M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  1.75M|        }
  |  |  ------------------
  |  |  |  Branch (73:5): [True: 362k, False: 1.61M]
  |  |  ------------------
  |  |   74|   129k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  | 1939|   855k|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 725k, False: 129k]
  |  |  |  |  ------------------
  |  |  |  | 1940|   725k|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   74|   725k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   725k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|   725k|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|   725k|        }
  |  |  ------------------
  |  |  |  Branch (74:5): [True: 129k, False: 1.84M]
  |  |  ------------------
  |  |   75|   652k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  | 1939|  10.9M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 10.3M, False: 652k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  10.3M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   75|  10.3M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  10.3M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  10.3M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  10.3M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 10.3M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  10.3M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  10.3M|        }
  |  |  ------------------
  |  |  |  Branch (75:5): [True: 652k, False: 1.32M]
  |  |  ------------------
  |  |   76|   376k|    case 5: set_ctx(set_ctx32); break; \
  |  |  ------------------
  |  |  |  | 1939|  12.3M|        for (int y = 0; y < bh4; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (1939:25): [True: 11.9M, False: 376k]
  |  |  |  |  ------------------
  |  |  |  | 1940|  11.9M|            rep_macro(seg_ptr, 0, b->seg_id); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   76|  11.9M|    case 5: set_ctx(set_ctx32); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   66|  11.9M|#define set_ctx32(var, off, val) do { \
  |  |  |  |  |  |  |  |   67|  11.9M|        memset(&(var)[off], val, 32); \
  |  |  |  |  |  |  |  |   68|  11.9M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (68:14): [Folded, False: 11.9M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  | 1941|  11.9M|            seg_ptr += f->b4_stride; \
  |  |  |  | 1942|  11.9M|        }
  |  |  ------------------
  |  |  |  Branch (76:5): [True: 376k, False: 1.60M]
  |  |  ------------------
  |  |   77|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (77:5): [True: 0, False: 1.97M]
  |  |  ------------------
  |  |   78|  1.97M|    }
  ------------------
  |  Branch (1943:9): [Folded, False: 0]
  ------------------
 1944|  1.97M|#undef set_ctx
 1945|  1.97M|    }
 1946|  9.05M|    if (!b->skip) {
  ------------------
  |  Branch (1946:9): [True: 4.98M, False: 4.07M]
  ------------------
 1947|  4.98M|        uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
 1948|  4.98M|        const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
 1949|  4.98M|        const int bx_idx = (bx4 & 16) >> 4;
 1950|  16.2M|        for (int y = 0; y < bh4; y += 2, noskip_mask++) {
  ------------------
  |  Branch (1950:25): [True: 11.2M, False: 4.98M]
  ------------------
 1951|  11.2M|            (*noskip_mask)[bx_idx] |= mask;
 1952|  11.2M|            if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
  ------------------
  |  Branch (1952:17): [True: 1.34M, False: 9.93M]
  ------------------
 1953|  1.34M|                (*noskip_mask)[1] |= mask;
 1954|  11.2M|        }
 1955|  4.98M|    }
 1956|       |
 1957|  9.06M|    if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) {
  ------------------
  |  |   36|  4.01M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 3.28M, False: 734k]
  |  |  ------------------
  ------------------
  |  Branch (1957:9): [True: 9.06M, False: 18.4E]
  |  Branch (1957:38): [True: 4.01M, False: 5.05M]
  ------------------
 1958|  3.28M|        const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
 1959|  3.28M|        int (*const lowest_px)[2] = ts->lowest_pixel[sby];
 1960|       |
 1961|       |        // keep track of motion vectors for each reference
 1962|  3.28M|        if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1962:13): [True: 2.93M, False: 342k]
  ------------------
 1963|       |            // y
 1964|  2.93M|            if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1964:17): [True: 2.29M, False: 641k]
  ------------------
 1965|  2.29M|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1965:19): [True: 1.23M, False: 1.05M]
  |  Branch (1965:48): [True: 500k, False: 738k]
  ------------------
 1966|  1.79M|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1966:19): [True: 169k, False: 1.62M]
  |  Branch (1966:48): [True: 155k, False: 14.1k]
  ------------------
 1967|   655k|            {
 1968|   655k|                affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim,
 1969|   655k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1969:39): [True: 155k, False: 500k]
  ------------------
 1970|   655k|                                      &f->frame_hdr->gmv[b->ref[0]]);
 1971|  2.28M|            } else {
 1972|  2.28M|                mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y,
 1973|  2.28M|                             0, &f->svc[b->ref[0]][1]);
 1974|  2.28M|                if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1974:21): [True: 420k, False: 1.86M]
  ------------------
 1975|   420k|                    obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4);
 1976|   420k|                }
 1977|  2.28M|            }
 1978|       |
 1979|       |            // uv
 1980|  2.93M|            if (has_chroma) {
  ------------------
  |  Branch (1980:17): [True: 1.30M, False: 1.63M]
  ------------------
 1981|       |                // sub8x8 derivation
 1982|  1.30M|                int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1982:33): [True: 83.0k, False: 1.22M]
  |  Branch (1982:50): [True: 65.2k, False: 1.15M]
  ------------------
 1983|  1.30M|                refmvs_block *const *r;
 1984|  1.30M|                if (is_sub8x8) {
  ------------------
  |  Branch (1984:21): [True: 148k, False: 1.15M]
  ------------------
 1985|   148k|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1985:21): [True: 148k, False: 1]
  ------------------
 1986|   148k|                    r = &t->rt.r[(t->by & 31) + 5];
 1987|   148k|                    if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1987:25): [True: 83.3k, False: 65.3k]
  ------------------
 1988|   148k|                    if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1988:25): [True: 83.7k, False: 64.9k]
  ------------------
 1989|   148k|                    if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1989:25): [True: 83.3k, False: 65.3k]
  |  Branch (1989:37): [True: 18.3k, False: 64.9k]
  ------------------
 1990|  18.3k|                        is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1991|   148k|                }
 1992|       |
 1993|       |                // chroma prediction
 1994|  1.30M|                if (is_sub8x8) {
  ------------------
  |  Branch (1994:21): [True: 130k, False: 1.17M]
  ------------------
 1995|   130k|                    assert(ss_hor == 1);
  ------------------
  |  Branch (1995:21): [True: 130k, False: 18.4E]
  ------------------
 1996|   130k|                    if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1996:25): [True: 72.9k, False: 57.1k]
  |  Branch (1996:37): [True: 14.5k, False: 58.4k]
  ------------------
 1997|  14.5k|                        const refmvs_block *const rr = &r[-1][t->bx - 1];
 1998|  14.5k|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 1999|  14.5k|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2000|  14.5k|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2001|  14.5k|                    }
 2002|   130k|                    if (bw4 == 1) {
  ------------------
  |  Branch (2002:25): [True: 72.9k, False: 57.1k]
  ------------------
 2003|  72.9k|                        const refmvs_block *const rr = &r[0][t->bx - 1];
 2004|  72.9k|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2005|  72.9k|                                     t->by, bh4, rr->mv.mv[0].y, ss_ver,
 2006|  72.9k|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2007|  72.9k|                    }
 2008|   130k|                    if (bh4 == ss_ver) {
  ------------------
  |  Branch (2008:25): [True: 71.6k, False: 58.4k]
  ------------------
 2009|  71.6k|                        const refmvs_block *const rr = &r[-1][t->bx];
 2010|  71.6k|                        mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
 2011|  71.6k|                                     t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
 2012|  71.6k|                                     &f->svc[rr->ref.ref[0] - 1][1]);
 2013|  71.6k|                    }
 2014|   130k|                    mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4,
 2015|   130k|                                 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2016|  1.17M|                } else {
 2017|  1.17M|                    if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (2017:25): [True: 613k, False: 560k]
  ------------------
 2018|   613k|                        ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (2018:27): [True: 120k, False: 492k]
  |  Branch (2018:56): [True: 22.0k, False: 98.3k]
  ------------------
 2019|   591k|                         (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (2019:27): [True: 64.9k, False: 526k]
  |  Branch (2019:56): [True: 62.6k, False: 2.38k]
  ------------------
 2020|  84.6k|                    {
 2021|  84.6k|                        affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim,
 2022|  84.6k|                                                b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (2022:49): [True: 62.6k, False: 22.0k]
  ------------------
 2023|  84.6k|                                                &f->frame_hdr->gmv[b->ref[0]]);
 2024|  1.08M|                    } else {
 2025|  1.08M|                        mc_lowest_px(&lowest_px[b->ref[0]][1],
 2026|  1.08M|                                     t->by & ~ss_ver, bh4 << (bh4 == ss_ver),
 2027|  1.08M|                                     b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
 2028|  1.08M|                        if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (2028:29): [True: 333k, False: 755k]
  ------------------
 2029|   333k|                            obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4);
 2030|   333k|                        }
 2031|  1.08M|                    }
 2032|  1.17M|                }
 2033|  1.30M|            }
 2034|  2.93M|        } else {
 2035|       |            // y
 2036|  1.02M|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2036:29): [True: 685k, False: 342k]
  ------------------
 2037|   685k|                if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (2037:21): [True: 64.7k, False: 620k]
  |  Branch (2037:59): [True: 17.0k, False: 47.6k]
  ------------------
 2038|  17.0k|                    affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim,
 2039|  17.0k|                                          &f->frame_hdr->gmv[b->ref[i]]);
 2040|   668k|                } else {
 2041|   668k|                    mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4,
 2042|   668k|                                 b->mv[i].y, 0, &f->svc[b->ref[i]][1]);
 2043|   668k|                }
 2044|   685k|            }
 2045|       |
 2046|       |            // uv
 2047|   521k|            if (has_chroma) for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (2047:17): [True: 173k, False: 168k]
  |  Branch (2047:45): [True: 347k, False: 173k]
  ------------------
 2048|   347k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (2048:21): [True: 27.3k, False: 319k]
  ------------------
 2049|  27.3k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (2049:21): [True: 14.7k, False: 12.6k]
  |  Branch (2049:45): [True: 3.98k, False: 10.7k]
  ------------------
 2050|  3.98k|                {
 2051|  3.98k|                    affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim,
 2052|  3.98k|                                            &f->frame_hdr->gmv[b->ref[i]]);
 2053|   343k|                } else {
 2054|   343k|                    mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4,
 2055|   343k|                                 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]);
 2056|   343k|                }
 2057|   347k|            }
 2058|   342k|        }
 2059|  3.28M|    }
 2060|       |
 2061|  9.05M|    return 0;
 2062|  9.05M|}
decode.c:get_prev_frame_segid:
  499|   567k|{
  500|   567k|    assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (500:5): [True: 567k, False: 18.4E]
  ------------------
  501|       |
  502|   567k|    unsigned seg_id = 8;
  503|   567k|    ref_seg_map += by * stride + bx;
  504|   665k|    do {
  505|  8.75M|        for (int x = 0; x < w4; x++)
  ------------------
  |  Branch (505:25): [True: 8.08M, False: 665k]
  ------------------
  506|  8.08M|            seg_id = imin(seg_id, ref_seg_map[x]);
  507|   665k|        ref_seg_map += stride;
  508|   665k|    } while (--h4 > 0 && seg_id);
  ------------------
  |  Branch (508:14): [True: 627k, False: 37.5k]
  |  Branch (508:26): [True: 97.5k, False: 530k]
  ------------------
  509|   567k|    assert(seg_id < 8);
  ------------------
  |  Branch (509:5): [True: 564k, False: 2.77k]
  ------------------
  510|       |
  511|   564k|    return seg_id;
  512|   567k|}
decode.c:neg_deinterleave:
  169|  1.75M|static int neg_deinterleave(int diff, int ref, int max) {
  170|  1.75M|    if (!ref) return diff;
  ------------------
  |  Branch (170:9): [True: 1.43M, False: 322k]
  ------------------
  171|   322k|    if (ref >= (max - 1)) return max - diff - 1;
  ------------------
  |  Branch (171:9): [True: 73.7k, False: 248k]
  ------------------
  172|   248k|    if (2 * ref < max) {
  ------------------
  |  Branch (172:9): [True: 165k, False: 82.7k]
  ------------------
  173|   165k|        if (diff <= 2 * ref) {
  ------------------
  |  Branch (173:13): [True: 132k, False: 33.1k]
  ------------------
  174|   132k|            if (diff & 1)
  ------------------
  |  Branch (174:17): [True: 16.7k, False: 115k]
  ------------------
  175|  16.7k|                return ref + ((diff + 1) >> 1);
  176|   115k|            else
  177|   115k|                return ref - (diff >> 1);
  178|   132k|        }
  179|  33.1k|        return diff;
  180|   165k|    } else {
  181|  82.7k|        if (diff <= 2 * (max - ref - 1)) {
  ------------------
  |  Branch (181:13): [True: 69.5k, False: 13.2k]
  ------------------
  182|  69.5k|            if (diff & 1)
  ------------------
  |  Branch (182:17): [True: 10.8k, False: 58.6k]
  ------------------
  183|  10.8k|                return ref + ((diff + 1) >> 1);
  184|  58.6k|            else
  185|  58.6k|                return ref - (diff >> 1);
  186|  69.5k|        }
  187|  13.2k|        return max - (diff + 1);
  188|  82.7k|    }
  189|   248k|}
decode.c:read_pal_indices:
  419|   144k|{
  420|   144k|    Dav1dTileState *const ts = t->ts;
  421|   144k|    const ptrdiff_t stride = bw4 * 4;
  422|   144k|    assert(pal_idx);
  ------------------
  |  Branch (422:5): [True: 144k, False: 15]
  ------------------
  423|   144k|    uint8_t *const pal_tmp = t->scratch.pal_idx_uv;
  424|   144k|    pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz);
  425|   144k|    uint16_t (*const color_map_cdf)[8] =
  426|   144k|        ts->cdf.m.color_map[pl][pal_sz - 2];
  427|   144k|    uint8_t (*const order)[8] = t->scratch.pal_order;
  428|   144k|    uint8_t *const ctx = t->scratch.pal_ctx;
  429|  4.15M|    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
  ------------------
  |  Branch (429:21): [True: 4.00M, False: 144k]
  ------------------
  430|       |        // top/left-to-bottom/right diagonals ("wave-front")
  431|  4.00M|        const int first = imin(i, w4 * 4 - 1);
  432|  4.00M|        const int last = imax(0, i - h4 * 4 + 1);
  433|  4.00M|        order_palette(pal_tmp, stride, i, first, last, order, ctx);
  434|  44.7M|        for (int j = first, m = 0; j >= last; j--, m++) {
  ------------------
  |  Branch (434:36): [True: 40.7M, False: 4.00M]
  ------------------
  435|  40.7M|            const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  40.7M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  436|  40.7M|                                      color_map_cdf[ctx[m]], pal_sz - 1);
  437|  40.7M|            pal_tmp[(i - j) * stride + j] = order[m][color_idx];
  438|  40.7M|        }
  439|  4.00M|    }
  440|       |
  441|   144k|    t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
  442|   144k|                                 w4 * 4, h4 * 4);
  443|   144k|}
decode.c:order_palette:
  356|  4.01M|{
  357|  4.01M|    int have_top = i > first;
  358|       |
  359|  4.01M|    assert(pal_idx);
  ------------------
  |  Branch (359:5): [True: 4.00M, False: 976]
  ------------------
  360|  4.00M|    pal_idx += first + (i - first) * stride;
  361|  43.3M|    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
  ------------------
  |  Branch (361:32): [True: 39.3M, False: 3.92M]
  ------------------
  362|  39.3M|        const int have_left = j > 0;
  363|       |
  364|  39.3M|        assert(have_left || have_top);
  ------------------
  |  Branch (364:9): [True: 37.5M, False: 1.79M]
  |  Branch (364:9): [True: 1.79M, False: 0]
  ------------------
  365|       |
  366|  39.3M|#define add(v_in) do { \
  367|  39.3M|        const int v = v_in; \
  368|  39.3M|        assert((unsigned)v < 8U); \
  369|  39.3M|        order[n][o_idx++] = v; \
  370|  39.3M|        mask |= 1 << v; \
  371|  39.3M|    } while (0)
  372|       |
  373|  39.3M|        unsigned mask = 0;
  374|  39.3M|        int o_idx = 0;
  375|  39.3M|        if (!have_left) {
  ------------------
  |  Branch (375:13): [True: 1.79M, False: 37.5M]
  ------------------
  376|  1.79M|            ctx[n] = 0;
  377|  1.79M|            add(pal_idx[-stride]);
  ------------------
  |  |  366|  1.79M|#define add(v_in) do { \
  |  |  367|  1.79M|        const int v = v_in; \
  |  |  368|  1.79M|        assert((unsigned)v < 8U); \
  |  |  369|  1.79M|        order[n][o_idx++] = v; \
  |  |  370|  1.79M|        mask |= 1 << v; \
  |  |  371|  1.79M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 1.79M]
  |  |  ------------------
  ------------------
  |  Branch (377:13): [True: 1.79M, False: 18.4E]
  ------------------
  378|  37.5M|        } else if (!have_top) {
  ------------------
  |  Branch (378:20): [True: 2.22M, False: 35.3M]
  ------------------
  379|  2.22M|            ctx[n] = 0;
  380|  2.22M|            add(pal_idx[-1]);
  ------------------
  |  |  366|  2.22M|#define add(v_in) do { \
  |  |  367|  2.22M|        const int v = v_in; \
  |  |  368|  2.22M|        assert((unsigned)v < 8U); \
  |  |  369|  2.22M|        order[n][o_idx++] = v; \
  |  |  370|  2.22M|        mask |= 1 << v; \
  |  |  371|  2.22M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 2.22M]
  |  |  ------------------
  ------------------
  |  Branch (380:13): [True: 2.22M, False: 18.4E]
  ------------------
  381|  35.3M|        } else {
  382|  35.3M|            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
  383|  35.3M|            const int same_t_l = t == l;
  384|  35.3M|            const int same_t_tl = t == tl;
  385|  35.3M|            const int same_l_tl = l == tl;
  386|  35.3M|            const int same_all = same_t_l & same_t_tl & same_l_tl;
  387|       |
  388|  35.3M|            if (same_all) {
  ------------------
  |  Branch (388:17): [True: 16.0M, False: 19.2M]
  ------------------
  389|  16.0M|                ctx[n] = 4;
  390|  16.0M|                add(t);
  ------------------
  |  |  366|  16.0M|#define add(v_in) do { \
  |  |  367|  16.0M|        const int v = v_in; \
  |  |  368|  16.0M|        assert((unsigned)v < 8U); \
  |  |  369|  16.0M|        order[n][o_idx++] = v; \
  |  |  370|  16.0M|        mask |= 1 << v; \
  |  |  371|  16.0M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 16.0M]
  |  |  ------------------
  ------------------
  |  Branch (390:17): [True: 16.0M, False: 6.17k]
  ------------------
  391|  19.2M|            } else if (same_t_l) {
  ------------------
  |  Branch (391:24): [True: 3.63M, False: 15.6M]
  ------------------
  392|  3.63M|                ctx[n] = 3;
  393|  3.63M|                add(t);
  ------------------
  |  |  366|  3.63M|#define add(v_in) do { \
  |  |  367|  3.63M|        const int v = v_in; \
  |  |  368|  3.63M|        assert((unsigned)v < 8U); \
  |  |  369|  3.63M|        order[n][o_idx++] = v; \
  |  |  370|  3.63M|        mask |= 1 << v; \
  |  |  371|  3.63M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 3.63M]
  |  |  ------------------
  ------------------
  |  Branch (393:17): [True: 3.63M, False: 173]
  ------------------
  394|  3.63M|                add(tl);
  ------------------
  |  |  366|  3.63M|#define add(v_in) do { \
  |  |  367|  3.63M|        const int v = v_in; \
  |  |  368|  3.63M|        assert((unsigned)v < 8U); \
  |  |  369|  3.63M|        order[n][o_idx++] = v; \
  |  |  370|  3.63M|        mask |= 1 << v; \
  |  |  371|  3.63M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 3.63M]
  |  |  ------------------
  ------------------
  |  Branch (394:17): [True: 3.63M, False: 18.4E]
  ------------------
  395|  15.6M|            } else if (same_t_tl | same_l_tl) {
  ------------------
  |  Branch (395:24): [True: 12.2M, False: 3.36M]
  ------------------
  396|  12.2M|                ctx[n] = 2;
  397|  12.2M|                add(tl);
  ------------------
  |  |  366|  12.2M|#define add(v_in) do { \
  |  |  367|  12.2M|        const int v = v_in; \
  |  |  368|  12.2M|        assert((unsigned)v < 8U); \
  |  |  369|  12.2M|        order[n][o_idx++] = v; \
  |  |  370|  12.2M|        mask |= 1 << v; \
  |  |  371|  12.2M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 12.2M]
  |  |  ------------------
  ------------------
  |  Branch (397:17): [True: 12.2M, False: 18.4E]
  ------------------
  398|  12.2M|                add(same_t_tl ? l : t);
  ------------------
  |  |  366|  12.2M|#define add(v_in) do { \
  |  |  367|  24.5M|        const int v = v_in; \
  |  |  ------------------
  |  |  |  Branch (367:23): [True: 6.06M, False: 6.23M]
  |  |  ------------------
  |  |  368|  12.2M|        assert((unsigned)v < 8U); \
  |  |  369|  12.3M|        order[n][o_idx++] = v; \
  |  |  370|  12.3M|        mask |= 1 << v; \
  |  |  371|  12.3M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 12.3M]
  |  |  ------------------
  ------------------
  |  Branch (398:17): [True: 12.3M, False: 18.4E]
  ------------------
  399|  12.2M|            } else {
  400|  3.36M|                ctx[n] = 1;
  401|  3.36M|                add(imin(t, l));
  ------------------
  |  |  366|  3.36M|#define add(v_in) do { \
  |  |  367|  3.36M|        const int v = v_in; \
  |  |  368|  3.36M|        assert((unsigned)v < 8U); \
  |  |  369|  5.11M|        order[n][o_idx++] = v; \
  |  |  370|  5.11M|        mask |= 1 << v; \
  |  |  371|  5.11M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 5.11M]
  |  |  ------------------
  ------------------
  |  Branch (401:17): [True: 5.11M, False: 18.4E]
  ------------------
  402|  5.11M|                add(imax(t, l));
  ------------------
  |  |  366|  5.11M|#define add(v_in) do { \
  |  |  367|  5.11M|        const int v = v_in; \
  |  |  368|  5.11M|        assert((unsigned)v < 8U); \
  |  |  369|  5.11M|        order[n][o_idx++] = v; \
  |  |  370|  5.11M|        mask |= 1 << v; \
  |  |  371|  5.11M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 5.11M]
  |  |  ------------------
  ------------------
  |  Branch (402:17): [True: 5.11M, False: 3.33k]
  ------------------
  403|  5.11M|                add(tl);
  ------------------
  |  |  366|  5.11M|#define add(v_in) do { \
  |  |  367|  5.11M|        const int v = v_in; \
  |  |  368|  5.11M|        assert((unsigned)v < 8U); \
  |  |  369|  5.11M|        order[n][o_idx++] = v; \
  |  |  370|  5.11M|        mask |= 1 << v; \
  |  |  371|  5.11M|    } while (0)
  |  |  ------------------
  |  |  |  Branch (371:14): [Folded, False: 5.11M]
  |  |  ------------------
  ------------------
  |  Branch (403:17): [True: 5.11M, False: 3.66k]
  ------------------
  404|  5.11M|            }
  405|  35.3M|        }
  406|   344M|        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
  ------------------
  |  Branch (406:39): [True: 303M, False: 41.0M]
  ------------------
  407|   303M|            if (!(mask & m))
  ------------------
  |  Branch (407:17): [True: 242M, False: 61.6M]
  ------------------
  408|   242M|                order[n][o_idx++] = bit;
  409|       |        assert(o_idx == 8);
  ------------------
  |  Branch (409:9): [True: 39.2M, False: 1.80M]
  ------------------
  410|  41.0M|#undef add
  411|  41.0M|    }
  412|  4.00M|}
decode.c:splat_intraref:
  566|  3.42M|{
  567|  3.42M|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  568|  3.42M|        .ref.ref = { 0, -1 },
  569|  3.42M|        .mv.mv[0].n = INVALID_MV,
  ------------------
  |  |   40|  3.42M|#define INVALID_MV 0x80008000
  ------------------
  570|  3.42M|        .bs = bs,
  571|  3.42M|        .mf = 0,
  572|  3.42M|    };
  573|  3.42M|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  574|  3.42M|}
decode.c:read_mv_residual:
  109|  1.49M|{
  110|  1.49M|    MsacContext *const msac = &ts->msac;
  111|  1.49M|    const enum MVJoint mv_joint =
  112|  1.49M|        dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
  ------------------
  |  |   47|  1.49M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  113|  1.49M|    if (mv_joint & MV_JOINT_V)
  ------------------
  |  Branch (113:9): [True: 1.12M, False: 371k]
  ------------------
  114|  1.12M|        ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
  115|  1.49M|    if (mv_joint & MV_JOINT_H)
  ------------------
  |  Branch (115:9): [True: 1.07M, False: 421k]
  ------------------
  116|  1.07M|        ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
  117|  1.49M|}
decode.c:read_mv_component_diff:
   79|  2.20M|{
   80|  2.20M|    const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
  ------------------
  |  |   52|  2.20M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   81|  2.20M|    const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
  ------------------
  |  |   57|  2.20M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
   82|  2.20M|    int up, fp = 3, hp = 1;
   83|       |
   84|  2.20M|    if (!cl) {
  ------------------
  |  Branch (84:9): [True: 1.00M, False: 1.19M]
  ------------------
   85|  1.00M|        up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
  ------------------
  |  |   52|  1.00M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   86|  1.00M|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (86:13): [True: 715k, False: 292k]
  ------------------
   87|   715k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
  ------------------
  |  |   47|   715k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   88|   715k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (88:17): [True: 387k, False: 327k]
  ------------------
   89|   387k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
  ------------------
  |  |   52|   387k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   90|   715k|        }
   91|  1.19M|    } else {
   92|  1.19M|        up = 1 << cl;
   93|  11.8M|        for (int n = 0; n < cl; n++)
  ------------------
  |  Branch (93:25): [True: 10.6M, False: 1.19M]
  ------------------
   94|  10.6M|            up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
  ------------------
  |  |   52|  10.6M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   95|  1.19M|        if (mv_prec >= 0) {  // !force_integer_mv
  ------------------
  |  Branch (95:13): [True: 234k, False: 960k]
  ------------------
   96|   234k|            fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
  ------------------
  |  |   47|   234k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
   97|   234k|            if (mv_prec > 0) // allow_high_precision_mv
  ------------------
  |  Branch (97:17): [True: 118k, False: 115k]
  ------------------
   98|   118k|                hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
  ------------------
  |  |   52|   118k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
   99|   234k|        }
  100|  1.19M|    }
  101|       |
  102|  2.20M|    const int diff = ((up << 3) | (fp << 1) | hp) + 1;
  103|       |
  104|  2.20M|    return sign ? -diff : diff;
  ------------------
  |  Branch (104:12): [True: 1.60M, False: 597k]
  ------------------
  105|  2.20M|}
decode.c:read_vartx_tree:
  448|  4.01M|{
  449|  4.01M|    const Dav1dFrameContext *const f = t->f;
  450|  4.01M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  451|  4.01M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  452|       |
  453|       |    // var-tx tree coding
  454|  4.01M|    uint16_t tx_split[2] = { 0 };
  455|  4.01M|    b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
  456|  4.01M|    if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
  ------------------
  |  Branch (456:9): [True: 1.46M, False: 2.55M]
  |  Branch (456:22): [True: 11.0k, False: 1.45M]
  ------------------
  457|  1.45M|                     b->max_ytx == TX_4X4))
  ------------------
  |  Branch (457:22): [True: 79.4k, False: 1.37M]
  ------------------
  458|  91.1k|    {
  459|  91.1k|        b->max_ytx = b->uvtx = TX_4X4;
  460|  91.1k|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (460:13): [True: 41.4k, False: 49.6k]
  ------------------
  461|  41.4k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4);
  462|  41.4k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4);
  463|  41.4k|        }
  464|  3.92M|    } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
  ------------------
  |  Branch (464:16): [True: 2.68M, False: 1.24M]
  |  Branch (464:66): [True: 569k, False: 676k]
  ------------------
  465|  3.25M|        if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
  ------------------
  |  Branch (465:13): [True: 569k, False: 2.68M]
  ------------------
  466|   569k|            dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]);
  467|   569k|            dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]);
  468|   569k|        }
  469|  3.25M|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  470|  3.25M|    } else {
  471|   675k|        assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
  ------------------
  |  Branch (471:9): [True: 670k, False: 5.28k]
  |  Branch (471:9): [True: 2.49k, False: 2.79k]
  |  Branch (471:9): [True: 2.79k, False: 0]
  ------------------
  472|   676k|        int y, x, y_off, x_off;
  473|   676k|        const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
  474|  1.35M|        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
  ------------------
  |  Branch (474:32): [True: 681k, False: 676k]
  ------------------
  475|  1.37M|            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
  ------------------
  |  Branch (475:36): [True: 689k, False: 681k]
  ------------------
  476|   689k|                read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
  477|       |                // contexts are updated inside read_tx_tree()
  478|   689k|                t->bx += ytx->w;
  479|   689k|            }
  480|   681k|            t->bx -= x;
  481|   681k|            t->by += ytx->h;
  482|   681k|        }
  483|   676k|        t->by -= y;
  484|   676k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   676k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 676k]
  |  |  ------------------
  |  |   35|   676k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   676k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  485|      0|            printf("Post-vartxtree[%x/%x]: r=%d\n",
  486|      0|                   tx_split[0], tx_split[1], t->ts->msac.rng);
  487|   676k|        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
  488|   676k|    }
  489|  4.01M|    assert(!(tx_split[0] & ~0x33));
  ------------------
  |  Branch (489:5): [True: 4.01M, False: 137]
  ------------------
  490|  4.01M|    b->tx_split0 = (uint8_t)tx_split[0];
  491|  4.01M|    b->tx_split1 = tx_split[1];
  492|  4.01M|}
decode.c:read_tx_tree:
  123|  1.24M|{
  124|  1.24M|    const Dav1dFrameContext *const f = t->f;
  125|  1.24M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  126|  1.24M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
  127|  1.24M|    const int txw = t_dim->lw, txh = t_dim->lh;
  128|  1.24M|    int is_split;
  129|       |
  130|  1.24M|    if (depth < 2 && from > (int) TX_4X4) {
  ------------------
  |  Branch (130:9): [True: 1.05M, False: 194k]
  |  Branch (130:22): [True: 1.05M, False: 1]
  ------------------
  131|  1.05M|        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
  132|  1.05M|        const int a = t->a->tx[bx4] < txw;
  133|  1.05M|        const int l = t->l.tx[by4] < txh;
  134|       |
  135|  1.05M|        is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
  ------------------
  |  |   52|  1.05M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  136|  1.05M|                       t->ts->cdf.m.txpart[cat][a + l]);
  137|  1.05M|        if (is_split)
  ------------------
  |  Branch (137:13): [True: 269k, False: 780k]
  ------------------
  138|   269k|            masks[depth] |= 1 << (y_off * 4 + x_off);
  139|  1.05M|    } else {
  140|   194k|        is_split = 0;
  141|   194k|    }
  142|       |
  143|  1.24M|    if (is_split && t_dim->max > TX_8X8) {
  ------------------
  |  Branch (143:9): [True: 269k, False: 974k]
  |  Branch (143:21): [True: 193k, False: 76.0k]
  ------------------
  144|   193k|        const enum RectTxfmSize sub = t_dim->sub;
  145|   193k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  146|   193k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  147|       |
  148|   193k|        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
  149|   193k|        t->bx += txsw;
  150|   193k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (150:13): [True: 149k, False: 44.8k]
  |  Branch (150:27): [True: 147k, False: 1.09k]
  ------------------
  151|   147k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
  152|   193k|        t->bx -= txsw;
  153|   193k|        t->by += txsh;
  154|   193k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (154:13): [True: 131k, False: 62.2k]
  |  Branch (154:27): [True: 129k, False: 1.71k]
  ------------------
  155|   129k|            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
  156|   129k|            t->bx += txsw;
  157|   129k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (157:17): [True: 85.0k, False: 44.7k]
  |  Branch (157:31): [True: 84.0k, False: 994]
  ------------------
  158|  84.0k|                read_tx_tree(t, sub, depth + 1, masks,
  159|  84.0k|                             x_off * 2 + 1, y_off * 2 + 1);
  160|   129k|            t->bx -= txsw;
  161|   129k|        }
  162|   193k|        t->by -= txsh;
  163|  1.05M|    } else {
  164|  1.05M|        dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw);
  ------------------
  |  Branch (164:54): [True: 76.0k, False: 974k]
  ------------------
  165|  1.05M|        dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh);
  ------------------
  |  Branch (165:53): [True: 76.0k, False: 974k]
  ------------------
  166|  1.05M|    }
  167|  1.24M|}
decode.c:splat_intrabc_mv:
  535|   734k|{
  536|   734k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  537|   734k|        .ref.ref = { 0, -1 },
  538|   734k|        .mv.mv[0] = b->mv[0],
  539|   734k|        .bs = bs,
  540|   734k|        .mf = 0,
  541|   734k|    };
  542|   734k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  543|   734k|}
decode.c:findoddzero:
  339|  1.54M|static inline int findoddzero(const uint8_t *buf, int len) {
  340|  1.78M|    for (int n = 0; n < len; n++)
  ------------------
  |  Branch (340:21): [True: 1.68M, False: 96.3k]
  ------------------
  341|  1.68M|        if (!buf[n * 2]) return 1;
  ------------------
  |  Branch (341:13): [True: 1.45M, False: 234k]
  ------------------
  342|  96.3k|    return 0;
  343|  1.54M|}
decode.c:find_matching_ref:
  197|  1.45M|{
  198|  1.45M|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  199|  1.45M|    int count = 0;
  200|  1.45M|    int have_topleft = have_top && have_left;
  ------------------
  |  Branch (200:24): [True: 1.36M, False: 87.9k]
  |  Branch (200:36): [True: 988k, False: 376k]
  ------------------
  201|  1.45M|    int have_topright = imax(bw4, bh4) < 32 &&
  ------------------
  |  Branch (201:25): [True: 1.22M, False: 230k]
  ------------------
  202|  1.22M|                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
  ------------------
  |  Branch (202:25): [True: 1.15M, False: 70.6k]
  |  Branch (202:37): [True: 957k, False: 194k]
  ------------------
  203|   957k|                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  ------------------
  |  Branch (203:25): [True: 636k, False: 321k]
  ------------------
  204|       |
  205|  1.45M|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  206|  1.45M|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  207|       |
  208|  1.45M|    if (have_top) {
  ------------------
  |  Branch (208:9): [True: 1.36M, False: 87.7k]
  ------------------
  209|  1.36M|        const refmvs_block *r2 = &r[-1][t->bx];
  210|  1.36M|        if (matches(r2)) {
  ------------------
  |  |  206|  1.36M|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 1.18M, False: 178k]
  |  |  |  Branch (206:53): [True: 1.10M, False: 80.4k]
  |  |  ------------------
  ------------------
  211|  1.10M|            masks[0] |= 1;
  212|  1.10M|            count = 1;
  213|  1.10M|        }
  214|  1.36M|        int aw4 = bs(r2)[0];
  ------------------
  |  |  205|  1.36M|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  215|  1.36M|        if (aw4 >= bw4) {
  ------------------
  |  Branch (215:13): [True: 1.23M, False: 132k]
  ------------------
  216|  1.23M|            const int off = t->bx & (aw4 - 1);
  217|  1.23M|            if (off) have_topleft = 0;
  ------------------
  |  Branch (217:17): [True: 139k, False: 1.09M]
  ------------------
  218|  1.23M|            if (aw4 - off > bw4) have_topright = 0;
  ------------------
  |  Branch (218:17): [True: 143k, False: 1.09M]
  ------------------
  219|  1.23M|        } else {
  220|   132k|            unsigned mask = 1 << aw4;
  221|   322k|            for (int x = aw4; x < w4; x += aw4) {
  ------------------
  |  Branch (221:31): [True: 190k, False: 132k]
  ------------------
  222|   190k|                r2 += aw4;
  223|   190k|                if (matches(r2)) {
  ------------------
  |  |  206|   190k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 134k, False: 56.0k]
  |  |  |  Branch (206:53): [True: 122k, False: 11.6k]
  |  |  ------------------
  ------------------
  224|   122k|                    masks[0] |= mask;
  225|   122k|                    if (++count >= 8) return;
  ------------------
  |  Branch (225:25): [True: 478, False: 122k]
  ------------------
  226|   122k|                }
  227|   189k|                aw4 = bs(r2)[0];
  ------------------
  |  |  205|   189k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  228|   189k|                mask <<= aw4;
  229|   189k|            }
  230|   132k|        }
  231|  1.36M|    }
  232|  1.45M|    if (have_left) {
  ------------------
  |  Branch (232:9): [True: 1.07M, False: 375k]
  ------------------
  233|  1.07M|        /*const*/ refmvs_block *const *r2 = r;
  234|  1.07M|        if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|  1.07M|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 891k, False: 185k]
  |  |  |  Branch (206:53): [True: 813k, False: 78.0k]
  |  |  ------------------
  ------------------
  235|   813k|            masks[1] |= 1;
  236|   813k|            if (++count >= 8) return;
  ------------------
  |  Branch (236:17): [True: 250, False: 813k]
  ------------------
  237|   813k|        }
  238|  1.07M|        int lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|  1.07M|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  239|  1.07M|        if (lh4 >= bh4) {
  ------------------
  |  Branch (239:13): [True: 929k, False: 147k]
  ------------------
  240|   929k|            if (t->by & (lh4 - 1)) have_topleft = 0;
  ------------------
  |  Branch (240:17): [True: 153k, False: 776k]
  ------------------
  241|   929k|        } else {
  242|   147k|            unsigned mask = 1 << lh4;
  243|   354k|            for (int y = lh4; y < h4; y += lh4) {
  ------------------
  |  Branch (243:31): [True: 209k, False: 145k]
  ------------------
  244|   209k|                r2 += lh4;
  245|   209k|                if (matches(&r2[0][t->bx - 1])) {
  ------------------
  |  |  206|   209k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 147k, False: 61.2k]
  |  |  |  Branch (206:53): [True: 136k, False: 11.8k]
  |  |  ------------------
  ------------------
  246|   136k|                    masks[1] |= mask;
  247|   136k|                    if (++count >= 8) return;
  ------------------
  |  Branch (247:25): [True: 1.39k, False: 134k]
  ------------------
  248|   136k|                }
  249|   207k|                lh4 = bs(&r2[0][t->bx - 1])[1];
  ------------------
  |  |  205|   207k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  250|   207k|                mask <<= lh4;
  251|   207k|            }
  252|   147k|        }
  253|  1.07M|    }
  254|  1.45M|    if (have_topleft && matches(&r[-1][t->bx - 1])) {
  ------------------
  |  |  206|   695k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 552k, False: 142k]
  |  |  |  Branch (206:53): [True: 502k, False: 49.8k]
  |  |  ------------------
  ------------------
  |  Branch (254:9): [True: 695k, False: 756k]
  ------------------
  255|   502k|        masks[1] |= 1ULL << 32;
  256|   502k|        if (++count >= 8) return;
  ------------------
  |  Branch (256:13): [True: 1.16k, False: 501k]
  ------------------
  257|   502k|    }
  258|  1.45M|    if (have_topright && matches(&r[-1][t->bx + bw4])) {
  ------------------
  |  |  206|   493k|#define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
  |  |  ------------------
  |  |  |  Branch (206:22): [True: 393k, False: 99.9k]
  |  |  |  Branch (206:53): [True: 362k, False: 31.6k]
  |  |  ------------------
  ------------------
  |  Branch (258:9): [True: 493k, False: 956k]
  ------------------
  259|   362k|        masks[0] |= 1ULL << 32;
  260|   362k|    }
  261|  1.45M|#undef matches
  262|  1.45M|}
decode.c:derive_warpmv:
  268|   169k|{
  269|   169k|    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
  270|   169k|    /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
  271|       |
  272|   169k|#define add_sample(dx, dy, sx, sy, rp) do { \
  273|   169k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  274|   169k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  275|   169k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  276|   169k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  277|   169k|    np++; \
  278|   169k|} while (0)
  279|       |
  280|       |    // use masks[] to find the projectable motion vectors in the edges
  281|   169k|    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
  ------------------
  |  Branch (281:9): [True: 117k, False: 51.9k]
  |  Branch (281:37): [True: 58.1k, False: 59.4k]
  ------------------
  282|  58.1k|        const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
  ------------------
  |  |  205|  58.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  283|  58.1k|        add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
  ------------------
  |  |  272|  58.1k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  58.1k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  58.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  58.1k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  58.1k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  58.1k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  58.1k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  58.1k|    np++; \
  |  |  278|  58.1k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 58.1k]
  |  |  ------------------
  ------------------
  284|   207k|    } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
  ------------------
  |  Branch (284:64): [True: 206k, False: 279]
  |  Branch (284:74): [True: 95.6k, False: 111k]
  ------------------
  285|  95.7k|        const int tz = ctz(xmask);
  286|  95.7k|        off += tz;
  287|  95.7k|        xmask >>= tz;
  288|  95.7k|        add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
  ------------------
  |  |  272|  95.7k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  95.7k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  95.7k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  95.7k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  95.7k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  95.7k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  95.7k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  95.7k|    np++; \
  |  |  278|  95.7k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 95.7k]
  |  |  ------------------
  ------------------
  289|  95.7k|        xmask &= ~1;
  290|  95.7k|    }
  291|   169k|    if (np < 8 && masks[1] == 1) {
  ------------------
  |  Branch (291:9): [True: 169k, False: 282]
  |  Branch (291:19): [True: 55.5k, False: 113k]
  ------------------
  292|  55.5k|        const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
  ------------------
  |  |  205|  55.5k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  ------------------
  293|  55.5k|        add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
  ------------------
  |  |  272|  55.5k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  55.5k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  55.5k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  55.5k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  55.5k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  55.5k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  55.5k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  55.5k|    np++; \
  |  |  278|  55.5k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 55.5k]
  |  |  ------------------
  ------------------
  294|   214k|    } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
  ------------------
  |  Branch (294:64): [True: 214k, False: 658]
  |  Branch (294:74): [True: 100k, False: 113k]
  ------------------
  295|   100k|        const int tz = ctz(ymask);
  296|   100k|        off += tz;
  297|   100k|        ymask >>= tz;
  298|   100k|        add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
  ------------------
  |  |  272|   100k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|   100k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|   100k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|   100k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|   100k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|   100k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|   100k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|   100k|    np++; \
  |  |  278|   100k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 100k]
  |  |  ------------------
  ------------------
  299|   100k|        ymask &= ~1;
  300|   100k|    }
  301|   169k|    if (np < 8 && masks[1] >> 32) // top/left
  ------------------
  |  Branch (301:9): [True: 168k, False: 771]
  |  Branch (301:19): [True: 78.4k, False: 90.2k]
  ------------------
  302|  78.4k|        add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
  ------------------
  |  |  272|  78.4k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  78.4k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  78.4k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  78.4k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  78.4k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  78.4k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  78.4k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  78.4k|    np++; \
  |  |  278|  78.4k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 78.4k]
  |  |  ------------------
  ------------------
  303|   169k|    if (np < 8 && masks[0] >> 32) // top/right
  ------------------
  |  Branch (303:9): [True: 168k, False: 992]
  |  Branch (303:19): [True: 51.4k, False: 117k]
  ------------------
  304|  51.4k|        add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
  ------------------
  |  |  272|  51.4k|#define add_sample(dx, dy, sx, sy, rp) do { \
  |  |  273|  51.4k|    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  51.4k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  274|  51.4k|    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
  |  |  ------------------
  |  |  |  |  205|  51.4k|#define bs(rp) dav1d_block_dimensions[(rp)->bs]
  |  |  ------------------
  |  |  275|  51.4k|    pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
  |  |  276|  51.4k|    pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
  |  |  277|  51.4k|    np++; \
  |  |  278|  51.4k|} while (0)
  |  |  ------------------
  |  |  |  Branch (278:10): [Folded, False: 51.4k]
  |  |  ------------------
  ------------------
  305|   169k|    assert(np > 0 && np <= 8);
  ------------------
  |  Branch (305:5): [True: 169k, False: 155]
  |  Branch (305:5): [True: 169k, False: 0]
  ------------------
  306|   169k|#undef bs
  307|       |
  308|       |    // select according to motion vector difference against a threshold
  309|   169k|    int mvd[8], ret = 0;
  310|   169k|    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
  311|   609k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (311:21): [True: 439k, False: 169k]
  ------------------
  312|   439k|        mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
  313|   439k|                 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
  314|   439k|        if (mvd[i] > thresh)
  ------------------
  |  Branch (314:13): [True: 127k, False: 312k]
  ------------------
  315|   127k|            mvd[i] = -1;
  316|   312k|        else
  317|   312k|            ret++;
  318|   439k|    }
  319|   169k|    if (!ret) {
  ------------------
  |  Branch (319:9): [True: 28.7k, False: 140k]
  ------------------
  320|  28.7k|        ret = 1;
  321|   162k|    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
  ------------------
  |  Branch (321:47): [True: 54.7k, False: 107k]
  ------------------
  322|  99.3k|        while (mvd[i] != -1) i++;
  ------------------
  |  Branch (322:16): [True: 44.6k, False: 54.7k]
  ------------------
  323|   102k|        while (mvd[j] == -1) j--;
  ------------------
  |  Branch (323:16): [True: 47.7k, False: 54.7k]
  ------------------
  324|  54.7k|        assert(i != j);
  ------------------
  |  Branch (324:9): [True: 54.7k, False: 0]
  ------------------
  325|  54.7k|        if (i > j) break;
  ------------------
  |  Branch (325:13): [True: 33.1k, False: 21.6k]
  ------------------
  326|       |        // replace the discarded samples;
  327|  21.6k|        mvd[i] = mvd[j];
  328|  21.6k|        memcpy(pts[i], pts[j], sizeof(*pts));
  329|  21.6k|    }
  330|       |
  331|   169k|    if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
  ------------------
  |  Branch (331:9): [True: 162k, False: 6.87k]
  ------------------
  332|   162k|        !dav1d_get_shear_params(wmp))
  ------------------
  |  Branch (332:9): [True: 155k, False: 7.32k]
  ------------------
  333|   155k|    {
  334|   155k|        wmp->type = DAV1D_WM_TYPE_AFFINE;
  335|   155k|    } else
  336|  14.1k|        wmp->type = DAV1D_WM_TYPE_IDENTITY;
  337|   169k|}
decode.c:splat_tworef_mv:
  550|   343k|{
  551|   343k|    assert(bw4 >= 2 && bh4 >= 2);
  ------------------
  |  Branch (551:5): [True: 343k, False: 18]
  |  Branch (551:5): [True: 343k, False: 25]
  ------------------
  552|   343k|    const enum CompInterPredMode mode = b->inter_mode;
  553|   343k|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  554|   343k|        .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
  555|   343k|        .mv.mv = { b->mv[0], b->mv[1] },
  556|   343k|        .bs = bs,
  557|   343k|        .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
  558|   343k|    };
  559|   343k|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  560|   343k|}
decode.c:splat_oneref_mv:
  519|  2.93M|{
  520|  2.93M|    const enum InterPredMode mode = b->inter_mode;
  521|  2.93M|    const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
  522|  2.93M|        .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
  ------------------
  |  Branch (522:37): [True: 142k, False: 2.79M]
  ------------------
  523|  2.93M|        .mv.mv[0] = b->mv[0],
  524|  2.93M|        .bs = bs,
  525|  2.93M|        .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
  ------------------
  |  Branch (525:16): [True: 1.35M, False: 1.58M]
  |  Branch (525:36): [True: 1.23M, False: 118k]
  ------------------
  526|  2.93M|    };
  527|  2.93M|    c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
  528|  2.93M|}
decode.c:affine_lowest_px_luma:
  619|   687k|{
  620|   687k|    affine_lowest_px(t, dst, b_dim, wmp, 0, 0);
  621|   687k|}
decode.c:affine_lowest_px:
  597|   761k|{
  598|   761k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
  599|   761k|    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
  ------------------
  |  Branch (599:5): [True: 761k, False: 1]
  |  Branch (599:5): [True: 761k, False: 18.4E]
  ------------------
  600|   761k|    const int32_t *const mat = wmp->matrix;
  601|   761k|    const int y = b_dim[1] * v_mul - 8; // lowest y
  602|       |
  603|   761k|    const int src_y = t->by * 4 + ((y + 4) << ss_ver);
  604|   761k|    const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
  605|       |    // check left- and right-most blocks
  606|  2.18M|    for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) {
  ------------------
  |  Branch (606:21): [True: 1.42M, False: 761k]
  ------------------
  607|       |        // calculate transformation relative to center of 8x8 block in
  608|       |        // luma pixel units
  609|  1.42M|        const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
  610|  1.42M|        const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
  611|  1.42M|        const int dy = (int) (mvy >> 16) - 4;
  612|  1.42M|        *dst = imax(*dst, dy + 4 + 8);
  613|  1.42M|    }
  614|   761k|}
decode.c:mc_lowest_px:
  579|  5.97M|{
  580|  5.97M|    const int v_mul = 4 >> ss_ver;
  581|  5.97M|    if (!smp->scale) {
  ------------------
  |  Branch (581:9): [True: 4.60M, False: 1.36M]
  ------------------
  582|  4.60M|        const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver);
  583|  4.60M|        *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy);
  584|  4.60M|    } else {
  585|  1.36M|        int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver);
  586|  1.36M|        const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8;
  587|  1.36M|        y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32;
  588|  1.36M|        const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4;
  589|  1.36M|        *dst = imax(*dst, bottom);
  590|  1.36M|    }
  591|  5.97M|}
decode.c:obmc_lowest_px:
  639|   753k|{
  640|   753k|    assert(!(t->bx & 1) && !(t->by & 1));
  ------------------
  |  Branch (640:5): [True: 753k, False: 18.4E]
  |  Branch (640:5): [True: 753k, False: 18.4E]
  ------------------
  641|   753k|    const Dav1dFrameContext *const f = t->f;
  642|   753k|    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
  643|   753k|    const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (643:24): [True: 333k, False: 420k]
  |  Branch (643:37): [True: 216k, False: 117k]
  ------------------
  644|   753k|    const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (644:24): [True: 333k, False: 420k]
  |  Branch (644:37): [True: 218k, False: 115k]
  ------------------
  645|   753k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
  646|       |
  647|   753k|    if (t->by > t->ts->tiling.row_start &&
  ------------------
  |  Branch (647:9): [True: 689k, False: 64.2k]
  ------------------
  648|   689k|        (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
  ------------------
  |  Branch (648:10): [True: 383k, False: 306k]
  |  Branch (648:24): [True: 153k, False: 153k]
  ------------------
  649|   536k|    {
  650|  1.13M|        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
  ------------------
  |  Branch (650:32): [True: 603k, False: 534k]
  |  Branch (650:42): [True: 600k, False: 2.05k]
  ------------------
  651|       |            // only odd blocks are considered for overlap handling, hence +1
  652|   601k|            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
  653|   601k|            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
  654|       |
  655|   601k|            if (a_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (655:17): [True: 565k, False: 35.8k]
  ------------------
  656|   565k|                const int oh4 = imin(b_dim[1], 16) >> 1;
  657|   565k|                mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by,
  658|   565k|                             (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver,
  659|   565k|                             &f->svc[a_r->ref.ref[0] - 1][1]);
  660|   565k|                i++;
  661|   565k|            }
  662|   601k|            x += imax(a_b_dim[0], 2);
  663|   601k|        }
  664|   536k|    }
  665|       |
  666|   753k|    if (t->bx > t->ts->tiling.col_start)
  ------------------
  |  Branch (666:9): [True: 730k, False: 23.6k]
  ------------------
  667|  1.54M|        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
  ------------------
  |  Branch (667:32): [True: 812k, False: 727k]
  |  Branch (667:42): [True: 810k, False: 2.14k]
  ------------------
  668|       |            // only odd blocks are considered for overlap handling, hence +1
  669|   810k|            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
  670|   810k|            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
  671|       |
  672|   810k|            if (l_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (672:17): [True: 760k, False: 49.4k]
  ------------------
  673|   760k|                const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
  674|   760k|                mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma],
  675|   760k|                             t->by + y, oh4, l_r->mv.mv[0].y, ss_ver,
  676|   760k|                             &f->svc[l_r->ref.ref[0] - 1][1]);
  677|   760k|                i++;
  678|   760k|            }
  679|   810k|            y += imax(l_b_dim[1], 2);
  680|   810k|        }
  681|   753k|}
decode.c:affine_lowest_px_chroma:
  626|  88.6k|{
  627|  88.6k|    const Dav1dFrameContext *const f = t->f;
  628|  88.6k|    assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400);
  ------------------
  |  Branch (628:5): [True: 88.6k, False: 1]
  ------------------
  629|  88.6k|    if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444)
  ------------------
  |  Branch (629:9): [True: 14.9k, False: 73.6k]
  ------------------
  630|  14.9k|        affine_lowest_px_luma(t, dst, b_dim, wmp);
  631|  73.6k|    else
  632|  73.6k|        affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1);
  633|  88.6k|}
decode.c:read_restoration_info:
 2514|   217k|{
 2515|   217k|    const Dav1dFrameContext *const f = t->f;
 2516|   217k|    Dav1dTileState *const ts = t->ts;
 2517|       |
 2518|   217k|    if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
  ------------------
  |  Branch (2518:9): [True: 103k, False: 114k]
  ------------------
 2519|   103k|        const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
  ------------------
  |  |   47|   103k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
 2520|   103k|                               ts->cdf.m.restore_switchable, 2);
 2521|   103k|        lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */
 2522|   114k|    } else {
 2523|   114k|        const unsigned type =
 2524|   114k|            dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   114k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
 2525|   114k|                frame_type == DAV1D_RESTORATION_WIENER ?
  ------------------
  |  Branch (2525:17): [True: 33.1k, False: 81.4k]
  ------------------
 2526|  81.4k|                ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
 2527|   114k|        lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
  ------------------
  |  Branch (2527:20): [True: 43.7k, False: 70.8k]
  ------------------
 2528|   114k|    }
 2529|       |
 2530|   217k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (2530:9): [True: 29.3k, False: 188k]
  ------------------
 2531|  29.3k|        lr->filter_v[0] = p ? 0 :
  ------------------
  |  Branch (2531:27): [True: 15.9k, False: 13.4k]
  ------------------
 2532|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2533|  13.4k|                ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
 2534|  29.3k|        lr->filter_v[1] =
 2535|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2536|  29.3k|                ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
 2537|  29.3k|        lr->filter_v[2] =
 2538|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2539|  29.3k|                ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
 2540|       |
 2541|  29.3k|        lr->filter_h[0] = p ? 0 :
  ------------------
  |  Branch (2541:27): [True: 15.9k, False: 13.4k]
  ------------------
 2542|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2543|  13.4k|                ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
 2544|  29.3k|        lr->filter_h[1] =
 2545|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2546|  29.3k|                ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
 2547|  29.3k|        lr->filter_h[2] =
 2548|  29.3k|            dav1d_msac_decode_subexp(&ts->msac,
 2549|  29.3k|                ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
 2550|  29.3k|        memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
 2551|  29.3k|        ts->lr_ref[p] = lr;
 2552|  29.3k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  29.3k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 29.3k]
  |  |  ------------------
  |  |   35|  29.3k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  29.3k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2553|      0|            printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
 2554|      0|                   p, lr->filter_v[0], lr->filter_v[1],
 2555|      0|                   lr->filter_v[2], lr->filter_h[0],
 2556|      0|                   lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
 2557|   188k|    } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
  ------------------
  |  Branch (2557:16): [True: 42.1k, False: 146k]
  ------------------
 2558|  42.1k|        const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
 2559|  42.1k|        const uint16_t *const sgr_params = dav1d_sgr_params[idx];
 2560|  42.1k|        lr->type += idx;
 2561|  42.1k|        lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2561:30): [True: 34.7k, False: 7.36k]
  ------------------
 2562|  34.7k|            ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
 2563|  42.1k|        lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
  ------------------
  |  Branch (2563:30): [True: 23.1k, False: 19.0k]
  ------------------
 2564|  23.1k|            ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
 2565|  42.1k|        memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
 2566|  42.1k|        memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
 2567|  42.1k|        ts->lr_ref[p] = lr;
 2568|  42.1k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  42.1k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 42.1k]
  |  |  ------------------
  |  |   35|  42.1k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  42.1k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2569|      0|            printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
 2570|      0|                   p, idx, lr->sgr_weights[0],
 2571|      0|                   lr->sgr_weights[1], ts->msac.rng);
 2572|  42.1k|    }
 2573|   217k|}
decode.c:init_quant_tables:
   57|   305k|{
   58|   788k|    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
  ------------------
  |  Branch (58:21): [True: 483k, False: 305k]
  |  Branch (58:26): [True: 229k, False: 559k]
  ------------------
   59|   483k|        const int yac = frame_hdr->segmentation.enabled ?
  ------------------
  |  Branch (59:25): [True: 203k, False: 279k]
  ------------------
   60|   279k|            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
   61|   483k|        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
   62|   483k|        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
   63|   483k|        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
   64|   483k|        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
   65|   483k|        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
   66|       |
   67|   483k|        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
   68|   483k|        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
   69|   483k|        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
   70|   483k|        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
   71|   483k|        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
   72|   483k|        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
   73|   483k|    }
   74|   305k|}
decode.c:setup_tile:
 2430|   245k|{
 2431|   245k|    const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
 2432|   245k|    const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
 2433|   245k|    const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
 2434|   245k|    const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
 2435|   245k|    const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
 2436|   245k|    const int sb_shift = f->sb_shift;
 2437|       |
 2438|   245k|    const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
 2439|   734k|    for (int p = 0; p < 2; p++) {
  ------------------
  |  Branch (2439:21): [True: 489k, False: 245k]
  ------------------
 2440|   489k|        ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ?
  ------------------
  |  Branch (2440:39): [True: 423k, False: 66.0k]
  ------------------
 2441|   423k|            &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] :
 2442|   489k|            NULL;
 2443|   489k|        ts->frame_thread[p].cbi = f->frame_thread.cbi ?
  ------------------
  |  Branch (2443:35): [True: 489k, False: 32]
  ------------------
 2444|   489k|            &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] :
 2445|   489k|            NULL;
 2446|   489k|        ts->frame_thread[p].cf = f->frame_thread.cf ?
  ------------------
  |  Branch (2446:34): [True: 489k, False: 30]
  ------------------
 2447|   489k|            (uint8_t*)f->frame_thread.cf +
 2448|   489k|                (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
 2449|   489k|            NULL;
 2450|   489k|    }
 2451|       |
 2452|   245k|    dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
 2453|   245k|    ts->last_qidx = f->frame_hdr->quant.yac;
 2454|   245k|    ts->last_delta_lf.u32 = 0;
 2455|       |
 2456|   245k|    dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
 2457|       |
 2458|   245k|    ts->tiling.row = tile_row;
 2459|   245k|    ts->tiling.col = tile_col;
 2460|   245k|    ts->tiling.col_start = col_sb_start << sb_shift;
 2461|   245k|    ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
 2462|   245k|    ts->tiling.row_start = row_sb_start << sb_shift;
 2463|   245k|    ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
 2464|       |
 2465|       |    // Reference Restoration Unit (used for exp coding)
 2466|   245k|    int sb_idx, unit_idx;
 2467|   245k|    if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2467:9): [True: 12.5k, False: 232k]
  ------------------
 2468|       |        // vertical components only
 2469|  12.5k|        sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
 2470|  12.5k|        unit_idx = (ts->tiling.row_start & 16) >> 3;
 2471|   232k|    } else {
 2472|   232k|        sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
 2473|   232k|        unit_idx = ((ts->tiling.row_start & 16) >> 3) +
 2474|   232k|                   ((ts->tiling.col_start & 16) >> 4);
 2475|   232k|    }
 2476|   979k|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (2476:21): [True: 734k, False: 245k]
  ------------------
 2477|   734k|        if (!((f->lf.restore_planes >> p) & 1U))
  ------------------
  |  Branch (2477:13): [True: 680k, False: 54.2k]
  ------------------
 2478|   680k|            continue;
 2479|       |
 2480|  54.2k|        if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
  ------------------
  |  Branch (2480:13): [True: 13.4k, False: 40.8k]
  ------------------
 2481|  13.4k|            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2481:32): [True: 9.08k, False: 4.33k]
  |  Branch (2481:37): [True: 5.92k, False: 3.16k]
  ------------------
 2482|  13.4k|            const int d = f->frame_hdr->super_res.width_scale_denominator;
 2483|  13.4k|            const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
 2484|  13.4k|            const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
 2485|  13.4k|            const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
 2486|  13.4k|            const int px_x = x << (unit_size_log2 + ss_hor);
 2487|  13.4k|            const int u_idx = unit_idx + ((px_x & 64) >> 6);
 2488|  13.4k|            const int sb128x = px_x >> 7;
 2489|  13.4k|            if (sb128x >= f->sr_sb128w) continue;
  ------------------
  |  Branch (2489:17): [True: 187, False: 13.2k]
  ------------------
 2490|  13.2k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
 2491|  40.8k|        } else {
 2492|  40.8k|            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
 2493|  40.8k|        }
 2494|       |
 2495|  54.0k|        ts->lr_ref[p]->filter_v[0] = 3;
 2496|  54.0k|        ts->lr_ref[p]->filter_v[1] = -7;
 2497|  54.0k|        ts->lr_ref[p]->filter_v[2] = 15;
 2498|  54.0k|        ts->lr_ref[p]->filter_h[0] = 3;
 2499|  54.0k|        ts->lr_ref[p]->filter_h[1] = -7;
 2500|  54.0k|        ts->lr_ref[p]->filter_h[2] = 15;
 2501|  54.0k|        ts->lr_ref[p]->sgr_weights[0] = -32;
 2502|  54.0k|        ts->lr_ref[p]->sgr_weights[1] = 31;
 2503|  54.0k|    }
 2504|       |
 2505|   245k|    if (f->c->n_tc > 1) {
  ------------------
  |  Branch (2505:9): [True: 244k, False: 270]
  ------------------
 2506|   734k|        for (int p = 0; p < 2; p++)
  ------------------
  |  Branch (2506:25): [True: 489k, False: 244k]
  ------------------
 2507|   489k|            atomic_init(&ts->progress[p], row_sb_start);
 2508|   244k|    }
 2509|   245k|}
decode.c:get_upscale_x0:
 3321|  48.3k|static int get_upscale_x0(const int in_w, const int out_w, const int step) {
 3322|  48.3k|    const int err = out_w * step - (in_w << 14);
 3323|  48.3k|    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2);
 3324|  48.3k|    return x0 & 0x3fff;
 3325|  48.3k|}

obu.c:get_poc_diff:
  239|  1.21M|{
  240|  1.21M|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 0, False: 1.21M]
  ------------------
  241|  1.21M|    const int mask = 1 << (order_hint_n_bits - 1);
  242|  1.21M|    const int diff = poc0 - poc1;
  243|  1.21M|    return (diff & (mask - 1)) - (diff & mask);
  244|  1.21M|}
refmvs.c:get_gmv_2d:
  482|  3.62M|{
  483|  3.62M|    switch (gmv->type) {
  484|   843k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 843k, False: 2.77M]
  ------------------
  485|   843k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 843k, False: 18.4E]
  ------------------
  486|   843k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 843k, False: 18.4E]
  ------------------
  487|       |        // fall-through
  488|   843k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 3.62M]
  ------------------
  489|  1.01M|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 167k, False: 3.45M]
  ------------------
  490|  1.01M|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|  1.01M|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|  1.01M|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|  1.01M|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|  1.01M|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|  1.01M|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|  1.01M|        const int shift = 16 - (3 - !hdr->hp);
  497|  1.01M|        const int round = (1 << shift) >> 1;
  498|  1.01M|        mv res = (mv) {
  499|  1.01M|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|  1.01M|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|  1.01M|        };
  502|  1.01M|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 29.1k, False: 981k]
  ------------------
  503|  29.1k|            fix_int_mv_precision(&res);
  504|  1.01M|        return res;
  505|   843k|    }
  506|  61.3k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 61.3k, False: 3.56M]
  ------------------
  507|  61.3k|        mv res = (mv) {
  508|  61.3k|            .y = gmv->matrix[0] >> 13,
  509|  61.3k|            .x = gmv->matrix[1] >> 13,
  510|  61.3k|        };
  511|  61.3k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 2.09k, False: 59.2k]
  ------------------
  512|  2.09k|            fix_int_mv_precision(&res);
  513|  61.3k|        return res;
  514|   843k|    }
  515|  2.55M|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 2.55M, False: 1.07M]
  ------------------
  516|  2.55M|        return (mv) { .x = 0, .y = 0 };
  517|  3.62M|    }
  518|  3.62M|}
refmvs.c:fix_int_mv_precision:
  462|  83.5k|static inline void fix_int_mv_precision(mv *const mv) {
  463|  83.5k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|  83.5k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|  83.5k|}
refmvs.c:fix_mv_precision:
  469|  3.32M|{
  470|  3.32M|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 52.2k, False: 3.27M]
  ------------------
  471|  52.2k|        fix_int_mv_precision(mv);
  472|  3.27M|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 340k, False: 2.93M]
  ------------------
  473|   340k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|   340k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|   340k|    }
  476|  3.32M|}
refmvs.c:get_poc_diff:
  239|  4.75M|{
  240|  4.75M|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 2.42M, False: 2.32M]
  ------------------
  241|  2.32M|    const int mask = 1 << (order_hint_n_bits - 1);
  242|  2.32M|    const int diff = poc0 - poc1;
  243|  2.32M|    return (diff & (mask - 1)) - (diff & mask);
  244|  4.75M|}
decode.c:get_partition_ctx:
   87|  6.79M|{
   88|  6.79M|    return ((a->partition[xb8] >> (4 - bl)) & 1) +
   89|  6.79M|          (((l->partition[yb8] >> (4 - bl)) & 1) << 1);
   90|  6.79M|}
decode.c:get_cur_frame_segid:
  445|  1.85M|{
  446|  1.85M|    cur_seg_map += bx + by * stride;
  447|  1.85M|    if (have_left && have_top) {
  ------------------
  |  Branch (447:9): [True: 1.19M, False: 668k]
  |  Branch (447:22): [True: 1.02M, False: 166k]
  ------------------
  448|  1.02M|        const int l = cur_seg_map[-1];
  449|  1.02M|        const int a = cur_seg_map[-stride];
  450|  1.02M|        const int al = cur_seg_map[-(stride + 1)];
  451|       |
  452|  1.02M|        if (l == a && al == l) *seg_ctx = 2;
  ------------------
  |  Branch (452:13): [True: 767k, False: 256k]
  |  Branch (452:23): [True: 744k, False: 22.9k]
  ------------------
  453|   279k|        else if (l == a || al == l || a == al) *seg_ctx = 1;
  ------------------
  |  Branch (453:18): [True: 22.0k, False: 257k]
  |  Branch (453:28): [True: 111k, False: 146k]
  |  Branch (453:39): [True: 94.7k, False: 51.9k]
  ------------------
  454|  50.8k|        else *seg_ctx = 0;
  455|  1.02M|        return a == al ? a : l;
  ------------------
  |  Branch (455:16): [True: 838k, False: 185k]
  ------------------
  456|  1.02M|    } else {
  457|   835k|        *seg_ctx = 0;
  458|   835k|        return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0;
  ------------------
  |  Branch (458:16): [True: 168k, False: 667k]
  |  Branch (458:46): [True: 660k, False: 6.19k]
  ------------------
  459|   835k|    }
  460|  1.85M|}
decode.c:get_intra_ctx:
   63|  2.51M|{
   64|  2.51M|    if (have_left) {
  ------------------
  |  Branch (64:9): [True: 2.41M, False: 97.1k]
  ------------------
   65|  2.41M|        if (have_top) {
  ------------------
  |  Branch (65:13): [True: 2.22M, False: 193k]
  ------------------
   66|  2.22M|            const int ctx = l->intra[yb4] + a->intra[xb4];
   67|  2.22M|            return ctx + (ctx == 2);
   68|  2.22M|        } else
   69|   193k|            return l->intra[yb4] * 2;
   70|  2.41M|    } else {
   71|  97.1k|        return have_top ? a->intra[xb4] * 2 : 0;
  ------------------
  |  Branch (71:16): [True: 84.0k, False: 13.0k]
  ------------------
   72|  97.1k|    }
   73|  2.51M|}
decode.c:get_tx_ctx:
   79|  1.23M|{
   80|  1.23M|    return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw);
   81|  1.23M|}
decode.c:get_comp_ctx:
  160|   759k|{
  161|   759k|    if (have_top) {
  ------------------
  |  Branch (161:9): [True: 650k, False: 108k]
  ------------------
  162|   650k|        if (have_left) {
  ------------------
  |  Branch (162:13): [True: 612k, False: 38.6k]
  ------------------
  163|   612k|            if (a->comp_type[xb4]) {
  ------------------
  |  Branch (163:17): [True: 225k, False: 386k]
  ------------------
  164|   225k|                if (l->comp_type[yb4]) {
  ------------------
  |  Branch (164:21): [True: 139k, False: 85.9k]
  ------------------
  165|   139k|                    return 4;
  166|   139k|                } else {
  167|       |                    // 4U means intra (-1) or bwd (>= 4)
  168|  85.9k|                    return 2 + ((unsigned)l->ref[0][yb4] >= 4U);
  169|  85.9k|                }
  170|   386k|            } else if (l->comp_type[yb4]) {
  ------------------
  |  Branch (170:24): [True: 75.5k, False: 311k]
  ------------------
  171|       |                // 4U means intra (-1) or bwd (>= 4)
  172|  75.5k|                return 2 + ((unsigned)a->ref[0][xb4] >= 4U);
  173|   311k|            } else {
  174|   311k|                return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4);
  175|   311k|            }
  176|   612k|        } else {
  177|  38.6k|            return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4;
  ------------------
  |  Branch (177:20): [True: 12.4k, False: 26.2k]
  ------------------
  178|  38.6k|        }
  179|   650k|    } else if (have_left) {
  ------------------
  |  Branch (179:16): [True: 102k, False: 5.92k]
  ------------------
  180|   102k|        return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4;
  ------------------
  |  Branch (180:16): [True: 48.2k, False: 54.2k]
  ------------------
  181|   102k|    } else {
  182|  5.92k|        return 1;
  183|  5.92k|    }
  184|   759k|}
decode.c:fix_mv_precision:
  469|  1.48M|{
  470|  1.48M|    if (hdr->force_integer_mv) {
  ------------------
  |  Branch (470:9): [True: 41.2k, False: 1.44M]
  ------------------
  471|  41.2k|        fix_int_mv_precision(mv);
  472|  1.44M|    } else if (!hdr->hp) {
  ------------------
  |  Branch (472:16): [True: 610k, False: 834k]
  ------------------
  473|   610k|        mv->x = (mv->x - (mv->x >> 15)) & ~1U;
  474|   610k|        mv->y = (mv->y - (mv->y >> 15)) & ~1U;
  475|   610k|    }
  476|  1.48M|}
decode.c:fix_int_mv_precision:
  462|  56.7k|static inline void fix_int_mv_precision(mv *const mv) {
  463|  56.7k|    mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
  464|  56.7k|    mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
  465|  56.7k|}
decode.c:get_comp_dir_ctx:
  190|   312k|{
  191|   312k|#define has_uni_comp(edge, off) \
  192|   312k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  193|       |
  194|   312k|    if (have_top && have_left) {
  ------------------
  |  Branch (194:9): [True: 258k, False: 54.0k]
  |  Branch (194:21): [True: 245k, False: 12.8k]
  ------------------
  195|   245k|        const int a_intra = a->intra[xb4], l_intra = l->intra[yb4];
  196|       |
  197|   245k|        if (a_intra && l_intra) return 2;
  ------------------
  |  Branch (197:13): [True: 9.40k, False: 236k]
  |  Branch (197:24): [True: 2.50k, False: 6.89k]
  ------------------
  198|   242k|        if (a_intra || l_intra) {
  ------------------
  |  Branch (198:13): [True: 6.90k, False: 236k]
  |  Branch (198:24): [True: 8.06k, False: 227k]
  ------------------
  199|  14.9k|            const BlockContext *const edge = a_intra ? l : a;
  ------------------
  |  Branch (199:46): [True: 6.89k, False: 8.07k]
  ------------------
  200|  14.9k|            const int off = a_intra ? yb4 : xb4;
  ------------------
  |  Branch (200:29): [True: 6.89k, False: 8.07k]
  ------------------
  201|       |
  202|  14.9k|            if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (202:17): [True: 6.51k, False: 8.45k]
  ------------------
  203|  8.45k|            return 1 + 2 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  8.45k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  204|  14.9k|        }
  205|       |
  206|   227k|        const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE;
  207|   227k|        const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE;
  208|   227k|        const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4];
  209|       |
  210|   227k|        if (!a_comp && !l_comp) {
  ------------------
  |  Branch (210:13): [True: 63.7k, False: 164k]
  |  Branch (210:24): [True: 27.2k, False: 36.4k]
  ------------------
  211|  27.2k|            return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4));
  212|   200k|        } else if (!a_comp || !l_comp) {
  ------------------
  |  Branch (212:20): [True: 36.3k, False: 164k]
  |  Branch (212:31): [True: 41.2k, False: 123k]
  ------------------
  213|  77.7k|            const BlockContext *const edge = a_comp ? a : l;
  ------------------
  |  Branch (213:46): [True: 41.3k, False: 36.4k]
  ------------------
  214|  77.7k|            const int off = a_comp ? xb4 : yb4;
  ------------------
  |  Branch (214:29): [True: 41.3k, False: 36.4k]
  ------------------
  215|       |
  216|  77.7k|            if (!has_uni_comp(edge, off)) return 1;
  ------------------
  |  |  192|  77.7k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  |  Branch (216:17): [True: 66.6k, False: 11.0k]
  ------------------
  217|  11.0k|            return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4));
  218|   122k|        } else {
  219|   122k|            const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|   122k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
                          const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
  ------------------
  |  |  192|   122k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  220|       |
  221|   122k|            if (!a_uni && !l_uni) return 0;
  ------------------
  |  Branch (221:17): [True: 107k, False: 15.4k]
  |  Branch (221:27): [True: 100k, False: 6.49k]
  ------------------
  222|  21.9k|            if (!a_uni || !l_uni) return 2;
  ------------------
  |  Branch (222:17): [True: 6.38k, False: 15.5k]
  |  Branch (222:27): [True: 8.75k, False: 6.76k]
  ------------------
  223|  6.65k|            return 3 + ((a_ref0 == 4) == (l_ref0 == 4));
  224|  21.9k|        }
  225|   227k|    } else if (have_top || have_left) {
  ------------------
  |  Branch (225:16): [True: 12.6k, False: 54.1k]
  |  Branch (225:28): [True: 52.0k, False: 2.13k]
  ------------------
  226|  64.8k|        const BlockContext *const edge = have_left ? l : a;
  ------------------
  |  Branch (226:42): [True: 52.0k, False: 12.8k]
  ------------------
  227|  64.8k|        const int off = have_left ? yb4 : xb4;
  ------------------
  |  Branch (227:25): [True: 52.0k, False: 12.8k]
  ------------------
  228|       |
  229|  64.8k|        if (edge->intra[off]) return 2;
  ------------------
  |  Branch (229:13): [True: 1.68k, False: 63.1k]
  ------------------
  230|  63.1k|        if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
  ------------------
  |  Branch (230:13): [True: 14.6k, False: 48.5k]
  ------------------
  231|  48.5k|        return 4 * has_uni_comp(edge, off);
  ------------------
  |  |  192|  48.5k|    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
  ------------------
  232|  63.1k|    } else {
  233|  2.00k|        return 2;
  234|  2.00k|    }
  235|   312k|}
decode.c:av1_get_fwd_ref_ctx:
  307|  1.46M|{
  308|  1.46M|    int cnt[4] = { 0 };
  309|       |
  310|  1.46M|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (310:9): [True: 1.34M, False: 115k]
  |  Branch (310:21): [True: 1.24M, False: 108k]
  ------------------
  311|  1.24M|        if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (311:13): [True: 1.16M, False: 78.0k]
  ------------------
  312|  1.24M|        if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (312:13): [True: 216k, False: 1.02M]
  |  Branch (312:34): [True: 23.3k, False: 192k]
  ------------------
  313|  1.24M|    }
  314|       |
  315|  1.46M|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (315:9): [True: 1.41M, False: 46.8k]
  |  Branch (315:22): [True: 1.30M, False: 111k]
  ------------------
  316|  1.30M|        if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (316:13): [True: 1.22M, False: 80.8k]
  ------------------
  317|  1.30M|        if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (317:13): [True: 247k, False: 1.05M]
  |  Branch (317:34): [True: 21.5k, False: 225k]
  ------------------
  318|  1.30M|    }
  319|       |
  320|  1.46M|    cnt[0] += cnt[1];
  321|  1.46M|    cnt[2] += cnt[3];
  322|       |
  323|  1.46M|    return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (323:12): [True: 161k, False: 1.30M]
  |  Branch (323:35): [True: 176k, False: 1.12M]
  ------------------
  324|  1.46M|}
decode.c:av1_get_fwd_ref_2_ctx:
  350|   256k|{
  351|   256k|    int cnt[2] = { 0 };
  352|       |
  353|   256k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (353:9): [True: 212k, False: 43.9k]
  |  Branch (353:21): [True: 194k, False: 17.5k]
  ------------------
  354|   194k|        if ((a->ref[0][xb4] ^ 2U) < 2) cnt[a->ref[0][xb4] - 2]++;
  ------------------
  |  Branch (354:13): [True: 121k, False: 73.0k]
  ------------------
  355|   194k|        if (a->comp_type[xb4] && (a->ref[1][xb4] ^ 2U) < 2) cnt[a->ref[1][xb4] - 2]++;
  ------------------
  |  Branch (355:13): [True: 68.3k, False: 126k]
  |  Branch (355:34): [True: 9.75k, False: 58.6k]
  ------------------
  356|   194k|    }
  357|       |
  358|   256k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (358:9): [True: 245k, False: 10.8k]
  |  Branch (358:22): [True: 228k, False: 16.8k]
  ------------------
  359|   228k|        if ((l->ref[0][yb4] ^ 2U) < 2) cnt[l->ref[0][yb4] - 2]++;
  ------------------
  |  Branch (359:13): [True: 150k, False: 78.6k]
  ------------------
  360|   228k|        if (l->comp_type[yb4] && (l->ref[1][yb4] ^ 2U) < 2) cnt[l->ref[1][yb4] - 2]++;
  ------------------
  |  Branch (360:13): [True: 89.5k, False: 139k]
  |  Branch (360:34): [True: 9.32k, False: 80.2k]
  ------------------
  361|   228k|    }
  362|       |
  363|   256k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (363:12): [True: 66.1k, False: 190k]
  |  Branch (363:35): [True: 140k, False: 49.6k]
  ------------------
  364|   256k|}
decode.c:av1_get_fwd_ref_1_ctx:
  330|  1.22M|{
  331|  1.22M|    int cnt[2] = { 0 };
  332|       |
  333|  1.22M|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (333:9): [True: 1.15M, False: 75.1k]
  |  Branch (333:21): [True: 1.06M, False: 92.1k]
  ------------------
  334|  1.06M|        if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++;
  ------------------
  |  Branch (334:13): [True: 965k, False: 96.4k]
  ------------------
  335|  1.06M|        if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++;
  ------------------
  |  Branch (335:13): [True: 158k, False: 904k]
  |  Branch (335:34): [True: 9.78k, False: 148k]
  ------------------
  336|  1.06M|    }
  337|       |
  338|  1.22M|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (338:9): [True: 1.19M, False: 37.7k]
  |  Branch (338:22): [True: 1.09M, False: 95.5k]
  ------------------
  339|  1.09M|        if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++;
  ------------------
  |  Branch (339:13): [True: 994k, False: 101k]
  ------------------
  340|  1.09M|        if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++;
  ------------------
  |  Branch (340:13): [True: 169k, False: 926k]
  |  Branch (340:34): [True: 9.82k, False: 159k]
  ------------------
  341|  1.09M|    }
  342|       |
  343|  1.22M|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (343:12): [True: 117k, False: 1.11M]
  |  Branch (343:35): [True: 44.7k, False: 1.06M]
  ------------------
  344|  1.22M|}
decode.c:av1_get_bwd_ref_ctx:
  370|   741k|{
  371|   741k|    int cnt[3] = { 0 };
  372|       |
  373|   741k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (373:9): [True: 629k, False: 111k]
  |  Branch (373:21): [True: 580k, False: 49.0k]
  ------------------
  374|   580k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (374:13): [True: 314k, False: 266k]
  ------------------
  375|   580k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (375:13): [True: 191k, False: 389k]
  |  Branch (375:34): [True: 182k, False: 9.29k]
  ------------------
  376|   580k|    }
  377|       |
  378|   741k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (378:9): [True: 701k, False: 39.6k]
  |  Branch (378:22): [True: 649k, False: 51.7k]
  ------------------
  379|   649k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (379:13): [True: 345k, False: 303k]
  ------------------
  380|   649k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (380:13): [True: 220k, False: 429k]
  |  Branch (380:34): [True: 211k, False: 9.00k]
  ------------------
  381|   649k|    }
  382|       |
  383|   741k|    cnt[1] += cnt[0];
  384|       |
  385|   741k|    return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2;
  ------------------
  |  Branch (385:12): [True: 125k, False: 615k]
  |  Branch (385:35): [True: 414k, False: 200k]
  ------------------
  386|   741k|}
decode.c:av1_get_bwd_ref_1_ctx:
  392|   254k|{
  393|   254k|    int cnt[3] = { 0 };
  394|       |
  395|   254k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (395:9): [True: 227k, False: 26.8k]
  |  Branch (395:21): [True: 209k, False: 18.1k]
  ------------------
  396|   209k|        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
  ------------------
  |  Branch (396:13): [True: 88.6k, False: 120k]
  ------------------
  397|   209k|        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
  ------------------
  |  Branch (397:13): [True: 88.8k, False: 120k]
  |  Branch (397:34): [True: 83.7k, False: 5.03k]
  ------------------
  398|   209k|    }
  399|       |
  400|   254k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (400:9): [True: 243k, False: 10.5k]
  |  Branch (400:22): [True: 223k, False: 19.8k]
  ------------------
  401|   223k|        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
  ------------------
  |  Branch (401:13): [True: 94.6k, False: 129k]
  ------------------
  402|   223k|        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
  ------------------
  |  Branch (402:13): [True: 94.5k, False: 129k]
  |  Branch (402:34): [True: 89.5k, False: 4.90k]
  ------------------
  403|   223k|    }
  404|       |
  405|   254k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (405:12): [True: 55.6k, False: 198k]
  |  Branch (405:35): [True: 73.0k, False: 125k]
  ------------------
  406|   254k|}
decode.c:av1_get_ref_ctx:
  287|  1.71M|{
  288|  1.71M|    int cnt[2] = { 0 };
  289|       |
  290|  1.71M|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (290:9): [True: 1.57M, False: 142k]
  |  Branch (290:21): [True: 1.42M, False: 144k]
  ------------------
  291|  1.42M|        cnt[a->ref[0][xb4] >= 4]++;
  292|  1.42M|        if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++;
  ------------------
  |  Branch (292:13): [True: 118k, False: 1.30M]
  ------------------
  293|  1.42M|    }
  294|       |
  295|  1.71M|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (295:9): [True: 1.64M, False: 65.9k]
  |  Branch (295:22): [True: 1.49M, False: 148k]
  ------------------
  296|  1.49M|        cnt[l->ref[0][yb4] >= 4]++;
  297|  1.49M|        if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++;
  ------------------
  |  Branch (297:13): [True: 134k, False: 1.36M]
  ------------------
  298|  1.49M|    }
  299|       |
  300|  1.71M|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (300:12): [True: 210k, False: 1.50M]
  |  Branch (300:35): [True: 388k, False: 1.11M]
  ------------------
  301|  1.71M|}
decode.c:av1_get_uni_p1_ctx:
  412|  32.9k|{
  413|  32.9k|    int cnt[3] = { 0 };
  414|       |
  415|  32.9k|    if (have_top && !a->intra[xb4]) {
  ------------------
  |  Branch (415:9): [True: 27.8k, False: 5.17k]
  |  Branch (415:21): [True: 25.8k, False: 1.97k]
  ------------------
  416|  25.8k|        if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++;
  ------------------
  |  Branch (416:13): [True: 5.29k, False: 20.5k]
  ------------------
  417|  25.8k|        if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++;
  ------------------
  |  Branch (417:13): [True: 16.8k, False: 8.97k]
  |  Branch (417:34): [True: 9.53k, False: 7.33k]
  ------------------
  418|  25.8k|    }
  419|       |
  420|  32.9k|    if (have_left && !l->intra[yb4]) {
  ------------------
  |  Branch (420:9): [True: 30.7k, False: 2.27k]
  |  Branch (420:22): [True: 28.8k, False: 1.90k]
  ------------------
  421|  28.8k|        if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++;
  ------------------
  |  Branch (421:13): [True: 7.18k, False: 21.6k]
  ------------------
  422|  28.8k|        if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++;
  ------------------
  |  Branch (422:13): [True: 19.1k, False: 9.65k]
  |  Branch (422:34): [True: 10.2k, False: 8.92k]
  ------------------
  423|  28.8k|    }
  424|       |
  425|  32.9k|    cnt[1] += cnt[2];
  426|       |
  427|  32.9k|    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
  ------------------
  |  Branch (427:12): [True: 11.3k, False: 21.6k]
  |  Branch (427:35): [True: 14.3k, False: 7.29k]
  ------------------
  428|  32.9k|}
decode.c:get_drl_context:
  432|   879k|{
  433|   879k|    if (ref_mv_stack[ref_idx].weight >= 640)
  ------------------
  |  Branch (433:9): [True: 730k, False: 149k]
  ------------------
  434|   730k|        return ref_mv_stack[ref_idx + 1].weight < 640;
  435|       |
  436|  18.4E|    return ref_mv_stack[ref_idx + 1].weight < 640 ? 2 : 0;
  ------------------
  |  Branch (436:12): [True: 150k, False: 18.4E]
  ------------------
  437|   879k|}
decode.c:get_gmv_2d:
  482|  1.42M|{
  483|  1.42M|    switch (gmv->type) {
  484|   571k|    case DAV1D_WM_TYPE_ROT_ZOOM:
  ------------------
  |  Branch (484:5): [True: 571k, False: 852k]
  ------------------
  485|   571k|        assert(gmv->matrix[5] ==  gmv->matrix[2]);
  ------------------
  |  Branch (485:9): [True: 571k, False: 18.4E]
  ------------------
  486|   571k|        assert(gmv->matrix[4] == -gmv->matrix[3]);
  ------------------
  |  Branch (486:9): [True: 571k, False: 18.4E]
  ------------------
  487|       |        // fall-through
  488|   571k|    default:
  ------------------
  |  Branch (488:5): [True: 0, False: 1.42M]
  ------------------
  489|   651k|    case DAV1D_WM_TYPE_AFFINE: {
  ------------------
  |  Branch (489:5): [True: 79.5k, False: 1.34M]
  ------------------
  490|   651k|        const int x = bx4 * 4 + bw4 * 2 - 1;
  491|   651k|        const int y = by4 * 4 + bh4 * 2 - 1;
  492|   651k|        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
  493|   651k|                       gmv->matrix[3] * y + gmv->matrix[0];
  494|   651k|        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
  495|   651k|                       gmv->matrix[4] * x + gmv->matrix[1];
  496|   651k|        const int shift = 16 - (3 - !hdr->hp);
  497|   651k|        const int round = (1 << shift) >> 1;
  498|   651k|        mv res = (mv) {
  499|   651k|            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
  500|   651k|            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
  501|   651k|        };
  502|   651k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (502:13): [True: 14.3k, False: 636k]
  ------------------
  503|  14.3k|            fix_int_mv_precision(&res);
  504|   651k|        return res;
  505|   571k|    }
  506|  38.4k|    case DAV1D_WM_TYPE_TRANSLATION: {
  ------------------
  |  Branch (506:5): [True: 38.4k, False: 1.38M]
  ------------------
  507|  38.4k|        mv res = (mv) {
  508|  38.4k|            .y = gmv->matrix[0] >> 13,
  509|  38.4k|            .x = gmv->matrix[1] >> 13,
  510|  38.4k|        };
  511|  38.4k|        if (hdr->force_integer_mv)
  ------------------
  |  Branch (511:13): [True: 1.20k, False: 37.2k]
  ------------------
  512|  1.20k|            fix_int_mv_precision(&res);
  513|  38.4k|        return res;
  514|   571k|    }
  515|   734k|    case DAV1D_WM_TYPE_IDENTITY:
  ------------------
  |  Branch (515:5): [True: 734k, False: 689k]
  ------------------
  516|   734k|        return (mv) { .x = 0, .y = 0 };
  517|  1.42M|    }
  518|  1.42M|}
decode.c:get_mask_comp_ctx:
  266|   236k|{
  267|   236k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (267:23): [True: 37.3k, False: 198k]
  ------------------
  268|   236k|                      a->ref[0][xb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (268:23): [True: 13.0k, False: 185k]
  ------------------
  269|   236k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 :
  ------------------
  |  Branch (269:23): [True: 42.6k, False: 193k]
  ------------------
  270|   236k|                      l->ref[0][yb4] == 6 ? 3 : 0;
  ------------------
  |  Branch (270:23): [True: 13.6k, False: 179k]
  ------------------
  271|       |
  272|   236k|    return imin(a_ctx + l_ctx, 5);
  273|   236k|}
decode.c:get_jnt_comp_ctx:
  251|   167k|{
  252|   167k|    const int d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
  253|   167k|    const int d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc));
  254|   167k|    const int offset = d0 == d1;
  255|   167k|    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (255:23): [True: 76.8k, False: 90.8k]
  ------------------
  256|  90.8k|                      a->ref[0][xb4] == 6;
  ------------------
  |  Branch (256:23): [True: 8.59k, False: 82.2k]
  ------------------
  257|   167k|    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG ||
  ------------------
  |  Branch (257:23): [True: 82.9k, False: 84.8k]
  ------------------
  258|  84.8k|                      l->ref[0][yb4] == 6;
  ------------------
  |  Branch (258:23): [True: 9.14k, False: 75.6k]
  ------------------
  259|       |
  260|   167k|    return 3 * offset + a_ctx + l_ctx;
  261|   167k|}
decode.c:get_filter_ctx:
  139|  1.53M|{
  140|  1.53M|    const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
  ------------------
  |  Branch (140:27): [True: 983k, False: 548k]
  |  Branch (140:52): [True: 47.0k, False: 501k]
  ------------------
  141|  1.03M|                         a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS;
  142|  1.53M|    const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
  ------------------
  |  Branch (142:27): [True: 1.05M, False: 479k]
  |  Branch (142:52): [True: 44.8k, False: 434k]
  ------------------
  143|  1.09M|                         l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS;
  144|       |
  145|  1.53M|    if (a_filter == l_filter) {
  ------------------
  |  Branch (145:9): [True: 937k, False: 594k]
  ------------------
  146|   937k|        return comp * 4 + a_filter;
  147|   937k|    } else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (147:16): [True: 301k, False: 292k]
  ------------------
  148|   301k|        return comp * 4 + l_filter;
  149|   301k|    } else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) {
  ------------------
  |  Branch (149:16): [True: 234k, False: 58.2k]
  ------------------
  150|   234k|        return comp * 4 + a_filter;
  151|   234k|    } else {
  152|  58.2k|        return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS;
  153|  58.2k|    }
  154|  1.53M|}
decode.c:gather_top_partition_prob:
  106|   299k|{
  107|       |    // Exploit the fact that cdfs for PARTITION_V, PARTITION_SPLIT and
  108|       |    // PARTITION_T_TOP_SPLIT are neighbors.
  109|   299k|    unsigned out = in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT];
  110|       |    // Exploit the facts that cdfs for PARTITION_T_LEFT_SPLIT and
  111|       |    // PARTITION_T_RIGHT_SPLIT are neighbors, the probability for
  112|       |    // PARTITION_V4 is always zero, and the probability for
  113|       |    // PARTITION_T_RIGHT_SPLIT is zero in 128x128 blocks.
  114|   299k|    out += in[PARTITION_T_LEFT_SPLIT - 1];
  115|   299k|    if (bl != BL_128X128)
  ------------------
  |  Branch (115:9): [True: 264k, False: 34.5k]
  ------------------
  116|   264k|        out += in[PARTITION_V4 - 1] - in[PARTITION_T_RIGHT_SPLIT];
  117|   299k|    return out;
  118|   299k|}
decode.c:gather_left_partition_prob:
   94|   308k|{
   95|   308k|    unsigned out = in[PARTITION_H - 1] - in[PARTITION_H];
   96|       |    // Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT,
   97|       |    // PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors.
   98|   308k|    out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT];
   99|   308k|    if (bl != BL_128X128)
  ------------------
  |  Branch (99:9): [True: 271k, False: 36.8k]
  ------------------
  100|   271k|        out += in[PARTITION_H4 - 1] - in[PARTITION_H4];
  101|   308k|    return out;
  102|   308k|}
decode.c:get_poc_diff:
  239|  2.43M|{
  240|  2.43M|    if (!order_hint_n_bits) return 0;
  ------------------
  |  Branch (240:9): [True: 62.4k, False: 2.36M]
  ------------------
  241|  2.36M|    const int mask = 1 << (order_hint_n_bits - 1);
  242|  2.36M|    const int diff = poc0 - poc1;
  243|  2.36M|    return (diff & (mask - 1)) - (diff & mask);
  244|  2.43M|}
recon_tmpl.c:get_uv_inter_txtp:
  122|   717k|{
  123|   717k|    if (uvt_dim->max == TX_32X32)
  ------------------
  |  Branch (123:9): [True: 116k, False: 600k]
  ------------------
  124|   116k|        return ytxtp == IDTX ? IDTX : DCT_DCT;
  ------------------
  |  Branch (124:16): [True: 3.14k, False: 113k]
  ------------------
  125|   600k|    if (uvt_dim->min == TX_16X16 &&
  ------------------
  |  Branch (125:9): [True: 43.5k, False: 557k]
  ------------------
  126|  43.5k|        ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) |
  ------------------
  |  Branch (126:9): [True: 560, False: 42.9k]
  ------------------
  127|  43.5k|                         (1 << H_ADST) | (1 << V_ADST))))
  128|    560|    {
  129|    560|        return DCT_DCT;
  130|    560|    }
  131|       |
  132|   600k|    return ytxtp;
  133|   600k|}

dav1d_prep_grain_8bpc:
  105|  4.97k|{
  106|  4.97k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|       |#if BITDEPTH != 8
  108|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|       |#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  4.97k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  113|  4.97k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 372, False: 4.60k]
  |  Branch (113:35): [True: 1.10k, False: 3.49k]
  ------------------
  114|  1.47k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|  1.47k|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  116|  4.97k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 2.78k, False: 2.18k]
  |  Branch (116:35): [True: 1.10k, False: 1.08k]
  ------------------
  117|  3.89k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|  3.89k|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  4.97k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 1.34k, False: 3.63k]
  |  Branch (121:31): [True: 1.07k, False: 2.55k]
  ------------------
  122|  2.41k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  4.97k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 372, False: 4.60k]
  ------------------
  124|    372|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  4.97k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 2.78k, False: 2.18k]
  ------------------
  126|  2.78k|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  4.97k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 4.97k, False: 0]
  ------------------
  130|  4.97k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 3.63k, False: 1.34k]
  ------------------
  131|  3.63k|        const ptrdiff_t stride = out->stride[0];
  132|  3.63k|        const ptrdiff_t sz = out->p.h * stride;
  133|  3.63k|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 3.63k]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|  3.63k|        else
  137|  3.63k|            memcpy(out->data[0], in->data[0], sz);
  138|  3.63k|    }
  139|       |
  140|  4.97k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 4.44k, False: 532]
  |  Branch (140:52): [True: 3.33k, False: 1.10k]
  ------------------
  141|  3.33k|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 3.33k, False: 0]
  ------------------
  142|  3.33k|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|  3.33k|        const ptrdiff_t stride = out->stride[1];
  144|  3.33k|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|  3.33k|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 3.33k]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|  3.33k|        } else {
  153|  3.33k|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 2.96k, False: 372]
  ------------------
  154|  2.96k|                memcpy(out->data[1], in->data[1], sz);
  155|  3.33k|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 550, False: 2.78k]
  ------------------
  156|    550|                memcpy(out->data[2], in->data[2], sz);
  157|  3.33k|        }
  158|  3.33k|    }
  159|  4.97k|}
dav1d_apply_grain_row_8bpc:
  167|  23.7k|{
  168|       |    // Synthesize grain for the affected planes
  169|  23.7k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  23.7k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  23.7k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  23.7k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  23.7k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  23.7k|    pixel *const luma_src =
  175|  23.7k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  23.7k|#define FG_BLOCK_SIZE 32
  ------------------
                      ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   53|  23.7k|#define PXSTRIDE(x) (x)
  ------------------
  176|       |#if BITDEPTH != 8
  177|       |    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|       |#endif
  179|       |
  180|  23.7k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 9.66k, False: 14.0k]
  ------------------
  181|  9.66k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  9.66k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  9.66k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  9.66k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  9.66k|#define FG_BLOCK_SIZE 32
  ------------------
                      dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   53|  9.66k|#define PXSTRIDE(x) (x)
  ------------------
  183|  9.66k|                         luma_src, out->stride[0], data,
  184|  9.66k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  185|  9.66k|    }
  186|       |
  187|  23.7k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 20.3k, False: 3.38k]
  |  Branch (187:36): [True: 10.1k, False: 10.2k]
  ------------------
  188|  10.1k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 3.99k, False: 6.13k]
  ------------------
  189|  3.99k|    {
  190|  3.99k|        return;
  191|  3.99k|    }
  192|       |
  193|  19.7k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  19.7k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  19.7k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  19.7k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 4.13k, False: 15.5k]
  ------------------
  197|  4.13k|        pixel *ptr = luma_src;
  198|  94.3k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 90.2k, False: 4.13k]
  ------------------
  199|  90.2k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|  90.2k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  ------------------
  |  |   53|  90.2k|#define PXSTRIDE(x) (x)
  ------------------
  201|  90.2k|        }
  202|  4.13k|    }
  203|       |
  204|  19.7k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  19.7k|#define FG_BLOCK_SIZE 32
  ------------------
                  const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   53|  19.7k|#define PXSTRIDE(x) (x)
  ------------------
  205|  19.7k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 6.01k, False: 13.7k]
  ------------------
  206|  18.0k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 12.0k, False: 6.01k]
  ------------------
  207|  12.0k|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|  12.0k|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|  12.0k|                                                in->stride[1], data, cpw,
  210|  12.0k|                                                scaling[0], grain_lut[1 + pl],
  211|  12.0k|                                                bh, row, luma_src, in->stride[0],
  212|  12.0k|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  213|  13.7k|    } else {
  214|  40.8k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 27.1k, False: 13.7k]
  ------------------
  215|  27.1k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 16.0k, False: 11.0k]
  ------------------
  216|  16.0k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  16.0k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  16.0k|                                                    in->stride[1], data, cpw,
  219|  16.0k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  16.0k|                                                    bh, row, luma_src, in->stride[0],
  221|  16.0k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  222|  13.7k|    }
  223|  19.7k|}
fg_apply_tmpl.c:generate_scaling:
   44|  5.57k|{
   45|  5.57k|#if BITDEPTH == 8
   46|  5.57k|    const int shift_x = 0;
   47|  5.57k|    const int scaling_size = SCALING_SIZE;
  ------------------
  |  |   39|  5.57k|#define SCALING_SIZE 256
  ------------------
   48|       |#else
   49|       |    assert(bitdepth > 8);
   50|       |    const int shift_x = bitdepth - 8;
   51|       |    const int scaling_size = 1 << bitdepth;
   52|       |#endif
   53|       |
   54|  5.57k|    if (num == 0) {
  ------------------
  |  Branch (54:9): [True: 1.07k, False: 4.50k]
  ------------------
   55|  1.07k|        memset(scaling, 0, scaling_size);
   56|  1.07k|        return;
   57|  1.07k|    }
   58|       |
   59|       |    // Fill up the preceding entries with the initial value
   60|  4.50k|    memset(scaling, points[0][1], points[0][0] << shift_x);
   61|       |
   62|       |    // Linearly interpolate the values in the middle
   63|  9.48k|    for (int i = 0; i < num - 1; i++) {
  ------------------
  |  Branch (63:21): [True: 4.98k, False: 4.50k]
  ------------------
   64|  4.98k|        const int bx = points[i][0];
   65|  4.98k|        const int by = points[i][1];
   66|  4.98k|        const int ex = points[i+1][0];
   67|  4.98k|        const int ey = points[i+1][1];
   68|  4.98k|        const int dx = ex - bx;
   69|  4.98k|        const int dy = ey - by;
   70|  4.98k|        assert(dx > 0);
  ------------------
  |  Branch (70:9): [True: 4.98k, False: 0]
  ------------------
   71|  4.98k|        const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
   72|   250k|        for (int x = 0, d = 0x8000; x < dx; x++) {
  ------------------
  |  Branch (72:37): [True: 245k, False: 4.98k]
  ------------------
   73|   245k|            scaling[(bx + x) << shift_x] = by + (d >> 16);
   74|   245k|            d += delta;
   75|   245k|        }
   76|  4.98k|    }
   77|       |
   78|       |    // Fill up the remaining entries with the final value
   79|  4.50k|    const int n = points[num - 1][0] << shift_x;
   80|  4.50k|    memset(&scaling[n], points[num - 1][1], scaling_size - n);
   81|       |
   82|       |#if BITDEPTH != 8
   83|       |    const int pad = 1 << shift_x, rnd = pad >> 1;
   84|       |    for (int i = 0; i < num - 1; i++) {
   85|       |        const int bx = points[i][0] << shift_x;
   86|       |        const int ex = points[i+1][0] << shift_x;
   87|       |        const int dx = ex - bx;
   88|       |        for (int x = 0; x < dx; x += pad) {
   89|       |            const int range = scaling[bx + x + pad] - scaling[bx + x];
   90|       |            for (int n = 1, r = rnd; n < pad; n++) {
   91|       |                r += range;
   92|       |                scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x);
   93|       |            }
   94|       |        }
   95|       |    }
   96|       |#endif
   97|  4.50k|}
dav1d_prep_grain_16bpc:
  105|  1.83k|{
  106|  1.83k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  107|  1.83k|#if BITDEPTH != 8
  108|  1.83k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  109|  1.83k|#endif
  110|       |
  111|       |    // Generate grain LUTs as needed
  112|  1.83k|    dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
  ------------------
  |  |   74|  1.83k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  113|  1.83k|    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (113:9): [True: 351, False: 1.48k]
  |  Branch (113:35): [True: 1.01k, False: 468]
  ------------------
  114|  1.36k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
  115|  1.36k|                                                 data, 0 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  1.36k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  116|  1.83k|    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (116:9): [True: 196, False: 1.63k]
  |  Branch (116:35): [True: 1.01k, False: 623]
  ------------------
  117|  1.20k|        dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
  118|  1.20k|                                                 data, 1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  1.20k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  119|       |
  120|       |    // Generate scaling LUTs as needed
  121|  1.83k|    if (data->num_y_points || data->chroma_scaling_from_luma)
  ------------------
  |  Branch (121:9): [True: 877, False: 954]
  |  Branch (121:31): [True: 465, False: 489]
  ------------------
  122|  1.34k|        generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
  123|  1.83k|    if (data->num_uv_points[0])
  ------------------
  |  Branch (123:9): [True: 351, False: 1.48k]
  ------------------
  124|    351|        generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
  125|  1.83k|    if (data->num_uv_points[1])
  ------------------
  |  Branch (125:9): [True: 196, False: 1.63k]
  ------------------
  126|    196|        generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
  127|       |
  128|       |    // Copy over the non-modified planes
  129|  1.83k|    assert(out->stride[0] == in->stride[0]);
  ------------------
  |  Branch (129:5): [True: 1.83k, False: 0]
  ------------------
  130|  1.83k|    if (!data->num_y_points) {
  ------------------
  |  Branch (130:9): [True: 954, False: 877]
  ------------------
  131|    954|        const ptrdiff_t stride = out->stride[0];
  132|    954|        const ptrdiff_t sz = out->p.h * stride;
  133|    954|        if (sz < 0)
  ------------------
  |  Branch (133:13): [True: 0, False: 954]
  ------------------
  134|      0|            memcpy((uint8_t*) out->data[0] + sz - stride,
  135|      0|                   (uint8_t*) in->data[0] + sz - stride, -sz);
  136|    954|        else
  137|    954|            memcpy(out->data[0], in->data[0], sz);
  138|    954|    }
  139|       |
  140|  1.83k|    if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400 && !data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (140:9): [True: 1.69k, False: 141]
  |  Branch (140:52): [True: 678, False: 1.01k]
  ------------------
  141|    678|        assert(out->stride[1] == in->stride[1]);
  ------------------
  |  Branch (141:9): [True: 678, False: 0]
  ------------------
  142|    678|        const int ss_ver = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  143|    678|        const ptrdiff_t stride = out->stride[1];
  144|    678|        const ptrdiff_t sz = ((out->p.h + ss_ver) >> ss_ver) * stride;
  145|    678|        if (sz < 0) {
  ------------------
  |  Branch (145:13): [True: 0, False: 678]
  ------------------
  146|      0|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (146:17): [True: 0, False: 0]
  ------------------
  147|      0|                memcpy((uint8_t*) out->data[1] + sz - stride,
  148|      0|                       (uint8_t*) in->data[1] + sz - stride, -sz);
  149|      0|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (149:17): [True: 0, False: 0]
  ------------------
  150|      0|                memcpy((uint8_t*) out->data[2] + sz - stride,
  151|      0|                       (uint8_t*) in->data[2] + sz - stride, -sz);
  152|    678|        } else {
  153|    678|            if (!data->num_uv_points[0])
  ------------------
  |  Branch (153:17): [True: 327, False: 351]
  ------------------
  154|    327|                memcpy(out->data[1], in->data[1], sz);
  155|    678|            if (!data->num_uv_points[1])
  ------------------
  |  Branch (155:17): [True: 482, False: 196]
  ------------------
  156|    482|                memcpy(out->data[2], in->data[2], sz);
  157|    678|        }
  158|    678|    }
  159|  1.83k|}
dav1d_apply_grain_row_16bpc:
  167|  7.43k|{
  168|       |    // Synthesize grain for the affected planes
  169|  7.43k|    const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
  170|  7.43k|    const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
  171|  7.43k|    const int ss_x = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
  172|  7.43k|    const int cpw = (out->p.w + ss_x) >> ss_x;
  173|  7.43k|    const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
  174|  7.43k|    pixel *const luma_src =
  175|  7.43k|        ((pixel *) in->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(in->stride[0]);
  ------------------
  |  |   37|  7.43k|#define FG_BLOCK_SIZE 32
  ------------------
  176|  7.43k|#if BITDEPTH != 8
  177|  7.43k|    const int bitdepth_max = (1 << out->p.bpc) - 1;
  178|  7.43k|#endif
  179|       |
  180|  7.43k|    if (data->num_y_points) {
  ------------------
  |  Branch (180:9): [True: 4.00k, False: 3.43k]
  ------------------
  181|  4.00k|        const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.00k|#define FG_BLOCK_SIZE 32
  ------------------
                      const int bh = imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE);
  ------------------
  |  |   37|  4.00k|#define FG_BLOCK_SIZE 32
  ------------------
  182|  4.00k|        dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[0]),
  ------------------
  |  |   37|  4.00k|#define FG_BLOCK_SIZE 32
  ------------------
  183|  4.00k|                         luma_src, out->stride[0], data,
  184|  4.00k|                         out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  4.00k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  185|  4.00k|    }
  186|       |
  187|  7.43k|    if (!data->num_uv_points[0] && !data->num_uv_points[1] &&
  ------------------
  |  Branch (187:9): [True: 6.55k, False: 880]
  |  Branch (187:36): [True: 5.94k, False: 614]
  ------------------
  188|  5.94k|        !data->chroma_scaling_from_luma)
  ------------------
  |  Branch (188:9): [True: 1.63k, False: 4.30k]
  ------------------
  189|  1.63k|    {
  190|  1.63k|        return;
  191|  1.63k|    }
  192|       |
  193|  5.80k|    const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  5.80k|#define FG_BLOCK_SIZE 32
  ------------------
                  const int bh = (imin(out->p.h - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y;
  ------------------
  |  |   37|  5.80k|#define FG_BLOCK_SIZE 32
  ------------------
  194|       |
  195|       |    // extend padding pixels
  196|  5.80k|    if (out->p.w & ss_x) {
  ------------------
  |  Branch (196:9): [True: 1.78k, False: 4.01k]
  ------------------
  197|  1.78k|        pixel *ptr = luma_src;
  198|  36.2k|        for (int y = 0; y < bh; y++) {
  ------------------
  |  Branch (198:25): [True: 34.4k, False: 1.78k]
  ------------------
  199|  34.4k|            ptr[out->p.w] = ptr[out->p.w - 1];
  200|  34.4k|            ptr += PXSTRIDE(in->stride[0]) << ss_y;
  201|  34.4k|        }
  202|  1.78k|    }
  203|       |
  204|  5.80k|    const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
  ------------------
  |  |   37|  5.80k|#define FG_BLOCK_SIZE 32
  ------------------
  205|  5.80k|    if (data->chroma_scaling_from_luma) {
  ------------------
  |  Branch (205:9): [True: 4.28k, False: 1.51k]
  ------------------
  206|  12.8k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (206:26): [True: 8.57k, False: 4.28k]
  ------------------
  207|  8.57k|            dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  208|  8.57k|                                                ((const pixel *) in->data[1 + pl]) + uv_off,
  209|  8.57k|                                                in->stride[1], data, cpw,
  210|  8.57k|                                                scaling[0], grain_lut[1 + pl],
  211|  8.57k|                                                bh, row, luma_src, in->stride[0],
  212|  8.57k|                                                pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  8.57k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  213|  4.28k|    } else {
  214|  4.43k|        for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (214:26): [True: 2.91k, False: 1.51k]
  ------------------
  215|  2.91k|            if (data->num_uv_points[pl])
  ------------------
  |  Branch (215:17): [True: 1.68k, False: 1.23k]
  ------------------
  216|  1.68k|                dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
  217|  1.68k|                                                    ((const pixel *) in->data[1 + pl]) + uv_off,
  218|  1.68k|                                                    in->stride[1], data, cpw,
  219|  1.68k|                                                    scaling[1 + pl], grain_lut[1 + pl],
  220|  1.68k|                                                    bh, row, luma_src, in->stride[0],
  221|  1.68k|                                                    pl, is_id HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |   74|  1.68k|#define HIGHBD_TAIL_SUFFIX , bitdepth_max
  ------------------
  222|  1.51k|    }
  223|  5.80k|}

dav1d_film_grain_dsp_init_8bpc:
  423|  3.49k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  3.49k|    c->generate_grain_y = generate_grain_y_c;
  425|  3.49k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  3.49k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  3.49k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  3.49k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  3.49k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  3.49k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  3.49k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  3.49k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  3.49k|#endif
  440|  3.49k|#endif
  441|  3.49k|}
dav1d_film_grain_dsp_init_16bpc:
  423|  5.72k|COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
  424|  5.72k|    c->generate_grain_y = generate_grain_y_c;
  425|  5.72k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
  426|  5.72k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
  427|  5.72k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
  428|       |
  429|  5.72k|    c->fgy_32x32xn = fgy_32x32xn_c;
  430|  5.72k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
  431|  5.72k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
  432|  5.72k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
  433|       |
  434|  5.72k|#if HAVE_ASM
  435|       |#if ARCH_AARCH64 || ARCH_ARM
  436|       |    film_grain_dsp_init_arm(c);
  437|       |#elif ARCH_X86
  438|       |    film_grain_dsp_init_x86(c);
  439|  5.72k|#endif
  440|  5.72k|#endif
  441|  5.72k|}

dav1d_init_get_bits:
   38|   432k|{
   39|   432k|    assert(sz);
  ------------------
  |  Branch (39:5): [True: 432k, False: 0]
  ------------------
   40|   432k|    c->ptr = c->ptr_start = data;
   41|   432k|    c->ptr_end = &c->ptr_start[sz];
   42|   432k|    c->state = 0;
   43|   432k|    c->bits_left = 0;
   44|   432k|    c->error = 0;
   45|   432k|}
dav1d_get_bit:
   47|  10.1M|unsigned dav1d_get_bit(GetBits *const c) {
   48|  10.1M|    if (!c->bits_left) {
  ------------------
  |  Branch (48:9): [True: 1.63M, False: 8.55M]
  ------------------
   49|  1.63M|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (49:13): [True: 14.9k, False: 1.62M]
  ------------------
   50|  14.9k|            c->error = 1;
   51|  1.62M|        } else {
   52|  1.62M|            const unsigned state = *c->ptr++;
   53|  1.62M|            c->bits_left = 7;
   54|  1.62M|            c->state = (uint64_t) state << 57;
   55|  1.62M|            return state >> 7;
   56|  1.62M|        }
   57|  1.63M|    }
   58|       |
   59|  8.56M|    const uint64_t state = c->state;
   60|  8.56M|    c->bits_left--;
   61|  8.56M|    c->state = state << 1;
   62|  8.56M|    return (unsigned) (state >> 63);
   63|  10.1M|}
dav1d_get_uleb128:
   95|   143k|unsigned dav1d_get_uleb128(GetBits *const c) {
   96|   143k|    uint64_t val = 0;
   97|   143k|    unsigned i = 0, more;
   98|       |
   99|   151k|    do {
  100|   151k|        const int v = dav1d_get_bits(c, 8);
  101|   151k|        more = v & 0x80;
  102|   151k|        val |= ((uint64_t) (v & 0x7F)) << i;
  103|   151k|        i += 7;
  104|   151k|    } while (more && i < 56);
  ------------------
  |  Branch (104:14): [True: 7.91k, False: 143k]
  |  Branch (104:22): [True: 7.81k, False: 109]
  ------------------
  105|       |
  106|   143k|    if (val > UINT32_MAX || more) {
  ------------------
  |  Branch (106:9): [True: 210, False: 143k]
  |  Branch (106:29): [True: 86, False: 143k]
  ------------------
  107|    296|        c->error = 1;
  108|    296|        return 0;
  109|    296|    }
  110|       |
  111|   143k|    return (unsigned) val;
  112|   143k|}
dav1d_get_uniform:
  114|   149k|unsigned dav1d_get_uniform(GetBits *const c, const unsigned max) {
  115|       |    // Output in range [0..max-1]
  116|       |    // max must be > 1, or else nothing is read from the bitstream
  117|   149k|    assert(max > 1);
  ------------------
  |  Branch (117:5): [True: 149k, False: 0]
  ------------------
  118|   149k|    const int l = ulog2(max) + 1;
  119|   149k|    assert(l > 1);
  ------------------
  |  Branch (119:5): [True: 149k, False: 0]
  ------------------
  120|   149k|    const unsigned m = (1U << l) - max;
  121|   149k|    const unsigned v = dav1d_get_bits(c, l - 1);
  122|   149k|    return v < m ? v : (v << 1) - m + dav1d_get_bit(c);
  ------------------
  |  Branch (122:12): [True: 138k, False: 10.4k]
  ------------------
  123|   149k|}
dav1d_get_vlc:
  125|    660|unsigned dav1d_get_vlc(GetBits *const c) {
  126|    660|    if (dav1d_get_bit(c))
  ------------------
  |  Branch (126:9): [True: 310, False: 350]
  ------------------
  127|    310|        return 0;
  128|       |
  129|    350|    int n_bits = 0;
  130|  3.84k|    do {
  131|  3.84k|        if (++n_bits == 32)
  ------------------
  |  Branch (131:13): [True: 67, False: 3.77k]
  ------------------
  132|     67|            return UINT32_MAX;
  133|  3.84k|    } while (!dav1d_get_bit(c));
  ------------------
  |  Branch (133:14): [True: 3.49k, False: 283]
  ------------------
  134|       |
  135|    283|    return ((1U << n_bits) - 1) + dav1d_get_bits(c, n_bits);
  136|    350|}
dav1d_get_bits_subexp:
  162|   124k|int dav1d_get_bits_subexp(GetBits *const c, const int ref, const unsigned n) {
  163|   124k|    return (int) get_bits_subexp_u(c, ref + (1 << n), 2 << n) - (1 << n);
  164|   124k|}
getbits.c:refill:
   65|  1.80M|static inline void refill(GetBits *const c, const int n) {
   66|  1.80M|    assert(c->bits_left >= 0 && c->bits_left < 32);
  ------------------
  |  Branch (66:5): [True: 1.80M, False: 0]
  |  Branch (66:5): [True: 1.80M, False: 0]
  ------------------
   67|  1.80M|    unsigned state = 0;
   68|  1.87M|    do {
   69|  1.87M|        if (c->ptr >= c->ptr_end) {
  ------------------
  |  Branch (69:13): [True: 16.7k, False: 1.86M]
  ------------------
   70|  16.7k|            c->error = 1;
   71|  16.7k|            if (state) break;
  ------------------
  |  Branch (71:17): [True: 1.48k, False: 15.2k]
  ------------------
   72|  15.2k|            return;
   73|  16.7k|        }
   74|  1.86M|        state = (state << 8) | *c->ptr++;
   75|  1.86M|        c->bits_left += 8;
   76|  1.86M|    } while (n > c->bits_left);
  ------------------
  |  Branch (76:14): [True: 77.1k, False: 1.78M]
  ------------------
   77|  1.78M|    c->state |= (uint64_t) state << (64 - c->bits_left);
   78|  1.78M|}
getbits.c:get_bits_subexp_u:
  140|   124k|{
  141|   124k|    unsigned v = 0;
  142|       |
  143|   254k|    for (int i = 0;; i++) {
  144|   254k|        const int b = i ? 3 + i - 1 : 3;
  ------------------
  |  Branch (144:23): [True: 129k, False: 124k]
  ------------------
  145|       |
  146|   254k|        if (n < v + 3 * (1 << b)) {
  ------------------
  |  Branch (146:13): [True: 6.77k, False: 247k]
  ------------------
  147|  6.77k|            v += dav1d_get_uniform(c, n - v + 1);
  148|  6.77k|            break;
  149|  6.77k|        }
  150|       |
  151|   247k|        if (!dav1d_get_bit(c)) {
  ------------------
  |  Branch (151:13): [True: 117k, False: 129k]
  ------------------
  152|   117k|            v += dav1d_get_bits(c, b);
  153|   117k|            break;
  154|   117k|        }
  155|       |
  156|   129k|        v += 1 << b;
  157|   129k|    }
  158|       |
  159|   124k|    return ref * 2 <= n ? inv_recenter(ref, v) : n - inv_recenter(n - ref, v);
  ------------------
  |  Branch (159:12): [True: 114k, False: 9.79k]
  ------------------
  160|   124k|}

obu.c:dav1d_bytealign_get_bits:
   52|   616k|static inline void dav1d_bytealign_get_bits(GetBits *c) {
   53|       |    // bits_left is never more than 7, because it is only incremented
   54|       |    // by refill(), called by dav1d_get_bits and that never reads more
   55|       |    // than 7 bits more than it needs.
   56|       |    //
   57|       |    // If this wasn't true, we would need to work out how many bits to
   58|       |    // discard (bits_left % 8), subtract that from bits_left and then
   59|       |    // shift state right by that amount.
   60|   616k|    assert(c->bits_left <= 7);
  ------------------
  |  Branch (60:5): [True: 616k, False: 0]
  ------------------
   61|       |
   62|   616k|    c->bits_left = 0;
   63|   616k|    c->state = 0;
   64|   616k|}

dav1d_init_intra_edge_tree:
  126|      1|COLD void dav1d_init_intra_edge_tree(void) {
  127|       |    // This function is guaranteed to be called only once
  128|      1|    struct ModeSelMem mem;
  129|       |
  130|      1|    mem.nwc[BL_128X128] = &nodes.branch_sb128[1];
  131|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb128[1 + 4];
  132|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb128[1 + 4 + 16];
  133|      1|    mem.nt = nodes.tip_sb128;
  134|      1|    init_mode_node(nodes.branch_sb128, BL_128X128, &mem, 1, 0);
  135|      1|    assert(mem.nwc[BL_128X128] == &nodes.branch_sb128[1 + 4]);
  ------------------
  |  Branch (135:5): [True: 1, False: 0]
  ------------------
  136|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb128[1 + 4 + 16]);
  ------------------
  |  Branch (136:5): [True: 1, False: 0]
  ------------------
  137|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb128[1 + 4 + 16 + 64]);
  ------------------
  |  Branch (137:5): [True: 1, False: 0]
  ------------------
  138|      1|    assert(mem.nt == &nodes.tip_sb128[256]);
  ------------------
  |  Branch (138:5): [True: 1, False: 0]
  ------------------
  139|       |
  140|      1|    mem.nwc[BL_128X128] = NULL;
  141|      1|    mem.nwc[BL_64X64] = &nodes.branch_sb64[1];
  142|      1|    mem.nwc[BL_32X32] = &nodes.branch_sb64[1 + 4];
  143|      1|    mem.nt = nodes.tip_sb64;
  144|      1|    init_mode_node(nodes.branch_sb64, BL_64X64, &mem, 1, 0);
  145|      1|    assert(mem.nwc[BL_64X64] == &nodes.branch_sb64[1 + 4]);
  ------------------
  |  Branch (145:5): [True: 1, False: 0]
  ------------------
  146|      1|    assert(mem.nwc[BL_32X32] == &nodes.branch_sb64[1 + 4 + 16]);
  ------------------
  |  Branch (146:5): [True: 1, False: 0]
  ------------------
  147|      1|    assert(mem.nt == &nodes.tip_sb64[64]);
  ------------------
  |  Branch (147:5): [True: 1, False: 0]
  ------------------
  148|      1|}
intra_edge.c:init_mode_node:
  101|    106|{
  102|    106|    init_edges(&nwc->node, bl,
  103|    106|               (top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
  ------------------
  |  Branch (103:17): [True: 73, False: 33]
  ------------------
  104|    106|               (left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
  ------------------
  |  Branch (104:17): [True: 33, False: 73]
  ------------------
  105|    106|    if (bl == BL_16X16) {
  ------------------
  |  Branch (105:9): [True: 80, False: 26]
  ------------------
  106|    400|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (106:25): [True: 320, False: 80]
  ------------------
  107|    320|            EdgeTip *const nt = mem->nt++;
  108|    320|            nwc->split_offset[n] = PTR_OFFSET(nwc, nt);
  ------------------
  |  |   94|    320|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  109|    320|            init_edges(&nt->node, bl + 1,
  110|    320|                       ((n == 3 || (n == 1 && !top_has_right)) ? 0 :
  ------------------
  |  Branch (110:26): [True: 80, False: 240]
  |  Branch (110:37): [True: 80, False: 160]
  |  Branch (110:47): [True: 26, False: 54]
  ------------------
  111|    320|                        EDGE_ALL_TOP_HAS_RIGHT) |
  112|    320|                       (!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
  ------------------
  |  Branch (112:27): [True: 80, False: 240]
  |  Branch (112:38): [True: 80, False: 160]
  |  Branch (112:48): [True: 26, False: 54]
  ------------------
  113|    320|                        EDGE_ALL_LEFT_HAS_BOTTOM));
  114|    320|        }
  115|     80|    } else {
  116|    130|        for (int n = 0; n < 4; n++) {
  ------------------
  |  Branch (116:25): [True: 104, False: 26]
  ------------------
  117|    104|            EdgeBranch *const nwc_child = mem->nwc[bl]++;
  118|    104|            nwc->split_offset[n] = PTR_OFFSET(nwc, nwc_child);
  ------------------
  |  |   94|    104|#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))
  ------------------
  119|    104|            init_mode_node(nwc_child, bl + 1, mem,
  120|    104|                           !(n == 3 || (n == 1 && !top_has_right)),
  ------------------
  |  Branch (120:30): [True: 26, False: 78]
  |  Branch (120:41): [True: 26, False: 52]
  |  Branch (120:51): [True: 7, False: 19]
  ------------------
  121|    104|                           n == 0 || (n == 2 && left_has_bottom));
  ------------------
  |  Branch (121:28): [True: 26, False: 78]
  |  Branch (121:39): [True: 26, False: 52]
  |  Branch (121:49): [True: 7, False: 19]
  ------------------
  122|    104|        }
  123|     26|    }
  124|    106|}
intra_edge.c:init_edges:
   58|    426|{
   59|    426|    node->o = edge_flags;
   60|    426|    node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
   61|    426|    node->v[0] = edge_flags | EDGE_ALL_TOP_HAS_RIGHT;
   62|       |
   63|    426|    if (bl == BL_8X8) {
  ------------------
  |  Branch (63:9): [True: 320, False: 106]
  ------------------
   64|    320|        EdgeTip *const nt = (EdgeTip *) node;
   65|       |
   66|    320|        node->h[1] = edge_flags & (EDGE_ALL_LEFT_HAS_BOTTOM |
   67|    320|                                   EDGE_I420_TOP_HAS_RIGHT);
   68|    320|        node->v[1] = edge_flags & (EDGE_ALL_TOP_HAS_RIGHT |
   69|    320|                                   EDGE_I420_LEFT_HAS_BOTTOM |
   70|    320|                                   EDGE_I422_LEFT_HAS_BOTTOM);
   71|       |
   72|    320|        nt->split[0] = (edge_flags & EDGE_ALL_TOP_HAS_RIGHT) |
   73|    320|                       EDGE_I422_LEFT_HAS_BOTTOM;
   74|    320|        nt->split[1] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
   75|    320|        nt->split[2] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
   76|    320|                                     EDGE_I420_LEFT_HAS_BOTTOM |
   77|    320|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   78|    320|    } else {
   79|    106|        EdgeBranch *const nwc = (EdgeBranch *) node;
   80|       |
   81|    106|        node->h[1] = edge_flags & EDGE_ALL_LEFT_HAS_BOTTOM;
   82|    106|        node->v[1] = edge_flags & EDGE_ALL_TOP_HAS_RIGHT;
   83|       |
   84|    106|        nwc->h4 = EDGE_ALL_LEFT_HAS_BOTTOM;
   85|    106|        nwc->v4 = EDGE_ALL_TOP_HAS_RIGHT;
   86|    106|        if (bl == BL_16X16) {
  ------------------
  |  Branch (86:13): [True: 80, False: 26]
  ------------------
   87|     80|            nwc->h4 |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
   88|     80|            nwc->v4 |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
   89|     80|                                     EDGE_I422_LEFT_HAS_BOTTOM);
   90|     80|        }
   91|    106|    }
   92|    426|}

recon_tmpl.c:sm_flag:
   95|  4.82M|static inline int sm_flag(const BlockContext *const b, const int idx) {
   96|  4.82M|    if (!b->intra[idx]) return 0;
  ------------------
  |  Branch (96:9): [True: 298k, False: 4.52M]
  ------------------
   97|  4.52M|    const enum IntraPredMode m = b->mode[idx];
   98|  4.52M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (98:13): [True: 193k, False: 4.32M]
  |  Branch (98:33): [True: 97.8k, False: 4.23M]
  ------------------
   99|  4.23M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   364k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (99:13): [True: 69.1k, False: 4.16M]
  ------------------
  100|  4.82M|}
recon_tmpl.c:sm_uv_flag:
  102|  2.96M|static inline int sm_uv_flag(const BlockContext *const b, const int idx) {
  103|  2.96M|    const enum IntraPredMode m = b->uvmode[idx];
  104|  2.96M|    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
  ------------------
  |  Branch (104:13): [True: 126k, False: 2.83M]
  |  Branch (104:33): [True: 66.1k, False: 2.77M]
  ------------------
  105|  2.77M|            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
  ------------------
  |  |   93|   257k|#define ANGLE_SMOOTH_EDGE_FLAG      512
  ------------------
  |  Branch (105:13): [True: 62.6k, False: 2.71M]
  ------------------
  106|  2.96M|}

dav1d_prepare_intra_edges_8bpc:
   86|  8.91M|{
   87|  8.91M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   58|  8.91M|#define bitdepth_from_max(x) 8
  ------------------
   88|  8.91M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 8.91M, False: 833]
  |  Branch (88:5): [True: 8.91M, False: 18.4E]
  ------------------
   89|       |
   90|  8.91M|    switch (mode) {
   91|   239k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 239k, False: 8.67M]
  ------------------
   92|   830k|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 591k, False: 8.32M]
  ------------------
   93|   902k|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 71.7k, False: 8.84M]
  ------------------
   94|   980k|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 78.1k, False: 8.83M]
  ------------------
   95|  1.03M|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 54.4k, False: 8.85M]
  ------------------
   96|  1.84M|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 813k, False: 8.09M]
  ------------------
   97|  2.09M|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 245k, False: 8.66M]
  ------------------
   98|  2.18M|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 89.1k, False: 8.82M]
  ------------------
   99|  2.18M|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|  2.18M|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 348k, False: 1.83M]
  ------------------
  102|   348k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 205k, False: 143k]
  |  Branch (102:35): [True: 203k, False: 2.17k]
  ------------------
  103|  1.83M|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 1.09M, False: 736k]
  ------------------
  104|  1.09M|            mode = Z2_PRED;
  105|   736k|        else
  106|   736k|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 332k, False: 403k]
  |  Branch (106:36): [True: 328k, False: 3.62k]
  ------------------
  107|  2.18M|        break;
  108|  2.09M|    }
  109|  5.32M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 5.32M, False: 3.58M]
  ------------------
  110|  5.75M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 425k, False: 8.48M]
  ------------------
  111|  5.75M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  5.75M|        break;
  113|   987k|    default:
  ------------------
  |  Branch (113:5): [True: 987k, False: 7.92M]
  ------------------
  114|   987k|        break;
  115|  8.91M|    }
  116|       |
  117|  8.91M|    const pixel *dst_top;
  118|  8.91M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 8.75M, False: 161k]
  ------------------
  119|  8.75M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 8.03M, False: 722k]
  ------------------
  120|   722k|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 322k, False: 400k]
  ------------------
  121|   400k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 400k, False: 0]
  |  Branch (121:58): [True: 8.85k, False: 391k]
  ------------------
  122|  8.36M|    {
  123|  8.36M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 813k, False: 7.54M]
  ------------------
  124|   813k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  7.54M|        } else {
  126|  7.54M|            dst_top = &dst[-PXSTRIDE(stride)];
  ------------------
  |  |   53|  7.54M|#define PXSTRIDE(x) (x)
  ------------------
  127|  7.54M|        }
  128|  8.36M|    }
  129|       |
  130|  8.91M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 8.02M, False: 892k]
  ------------------
  131|  8.02M|        const int sz = th << 2;
  132|  8.02M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  8.02M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 7.92M, False: 102k]
  ------------------
  135|  7.92M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  72.0M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 64.1M, False: 7.92M]
  ------------------
  138|  64.1M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  ------------------
  |  |   53|  64.1M|#define PXSTRIDE(x) (x)
  ------------------
  139|  7.92M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 19.6k, False: 7.90M]
  ------------------
  140|  19.6k|                pixel_set(left, left[sz - px_have], sz - px_have);
  ------------------
  |  |   48|  19.6k|#define pixel_set memset
  ------------------
  141|  7.92M|        } else {
  142|   102k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  |   48|   102k|#define pixel_set memset
  ------------------
  |  Branch (142:29): [True: 98.8k, False: 3.32k]
  ------------------
  143|   102k|        }
  144|       |
  145|  8.02M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 328k, False: 7.69M]
  ------------------
  146|   328k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 1, False: 328k]
  |  Branch (146:56): [True: 4.50k, False: 324k]
  ------------------
  147|   328k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   328k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 72.3k, False: 256k]
  ------------------
  150|  72.3k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   615k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 543k, False: 72.3k]
  ------------------
  153|   543k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  ------------------
  |  |   53|   543k|#define PXSTRIDE(x) (x)
  ------------------
  154|  72.3k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 417, False: 71.9k]
  ------------------
  155|    417|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  ------------------
  |  |   48|    417|#define pixel_set memset
  ------------------
  156|   256k|            } else {
  157|   256k|                pixel_set(left - sz, left[0], sz);
  ------------------
  |  |   48|   256k|#define pixel_set memset
  ------------------
  158|   256k|            }
  159|   328k|        }
  160|  8.02M|    }
  161|       |
  162|  8.91M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 8.08M, False: 830k]
  ------------------
  163|  8.08M|        const int sz = tw << 2;
  164|  8.08M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  8.08M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 8.03M, False: 49.5k]
  ------------------
  167|  8.03M|            const int px_have = imin(sz, (w - x) << 2);
  168|  8.03M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   47|  8.03M|#define pixel_copy memcpy
  ------------------
  169|  8.03M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 309k, False: 7.72M]
  ------------------
  170|   309k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  ------------------
  |  |   48|   309k|#define pixel_set memset
  ------------------
  171|  8.03M|        } else {
  172|  49.5k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  |   48|  49.5k|#define pixel_set memset
  ------------------
  |  Branch (172:28): [True: 46.4k, False: 3.10k]
  ------------------
  173|  49.5k|        }
  174|       |
  175|  8.08M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 203k, False: 7.88M]
  ------------------
  176|   203k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 1, False: 203k]
  |  Branch (176:53): [True: 4.54k, False: 198k]
  ------------------
  177|   203k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   203k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 143k, False: 59.2k]
  ------------------
  180|   143k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|   143k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   47|   143k|#define pixel_copy memcpy
  ------------------
  183|   143k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 995, False: 142k]
  ------------------
  184|    995|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  ------------------
  |  |   48|    995|#define pixel_set memset
  ------------------
  185|    995|                              sz - px_have);
  186|   143k|            } else {
  187|  59.2k|                pixel_set(top + sz, top[sz - 1], sz);
  ------------------
  |  |   48|  59.2k|#define pixel_set memset
  ------------------
  188|  59.2k|            }
  189|   203k|        }
  190|  8.08M|    }
  191|       |
  192|  8.91M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 2.35M, False: 6.56M]
  ------------------
  193|  2.35M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 2.26M, False: 85.4k]
  ------------------
  194|  2.26M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 2.23M, False: 35.9k]
  ------------------
  195|  85.4k|        else
  196|  85.4k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 82.7k, False: 2.62k]
  ------------------
  197|       |
  198|  2.35M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 1.09M, False: 1.25M]
  |  Branch (198:32): [True: 60.9k, False: 1.03M]
  |  Branch (198:48): [True: 59.0k, False: 1.88k]
  ------------------
  199|  59.0k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  59.0k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  2.35M|    }
  202|       |
  203|  8.91M|    return mode;
  204|  8.91M|}
dav1d_prepare_intra_edges_16bpc:
   86|  11.6M|{
   87|  11.6M|    const int bitdepth = bitdepth_from_max(bitdepth_max);
  ------------------
  |  |   75|  11.6M|#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
  ------------------
   88|  11.6M|    assert(y < h && x < w);
  ------------------
  |  Branch (88:5): [True: 11.6M, False: 388]
  |  Branch (88:5): [True: 11.6M, False: 18.4E]
  ------------------
   89|       |
   90|  11.6M|    switch (mode) {
   91|   377k|    case VERT_PRED:
  ------------------
  |  Branch (91:5): [True: 377k, False: 11.2M]
  ------------------
   92|  1.19M|    case HOR_PRED:
  ------------------
  |  Branch (92:5): [True: 822k, False: 10.8M]
  ------------------
   93|  1.30M|    case DIAG_DOWN_LEFT_PRED:
  ------------------
  |  Branch (93:5): [True: 104k, False: 11.5M]
  ------------------
   94|  1.49M|    case DIAG_DOWN_RIGHT_PRED:
  ------------------
  |  Branch (94:5): [True: 193k, False: 11.4M]
  ------------------
   95|  1.59M|    case VERT_RIGHT_PRED:
  ------------------
  |  Branch (95:5): [True: 98.4k, False: 11.5M]
  ------------------
   96|  1.81M|    case HOR_DOWN_PRED:
  ------------------
  |  Branch (96:5): [True: 216k, False: 11.4M]
  ------------------
   97|  2.37M|    case HOR_UP_PRED:
  ------------------
  |  Branch (97:5): [True: 559k, False: 11.0M]
  ------------------
   98|  2.52M|    case VERT_LEFT_PRED: {
  ------------------
  |  Branch (98:5): [True: 153k, False: 11.4M]
  ------------------
   99|  2.52M|        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
  100|       |
  101|  2.52M|        if (*angle <= 90)
  ------------------
  |  Branch (101:13): [True: 559k, False: 1.96M]
  ------------------
  102|   559k|            mode = *angle < 90 && have_top ? Z1_PRED : VERT_PRED;
  ------------------
  |  Branch (102:20): [True: 359k, False: 200k]
  |  Branch (102:35): [True: 348k, False: 10.6k]
  ------------------
  103|  1.96M|        else if (*angle < 180)
  ------------------
  |  Branch (103:18): [True: 761k, False: 1.20M]
  ------------------
  104|   761k|            mode = Z2_PRED;
  105|  1.20M|        else
  106|  1.20M|            mode = *angle > 180 && have_left ? Z3_PRED : HOR_PRED;
  ------------------
  |  Branch (106:20): [True: 771k, False: 433k]
  |  Branch (106:36): [True: 760k, False: 10.3k]
  ------------------
  107|  2.52M|        break;
  108|  2.37M|    }
  109|  6.80M|    case DC_PRED:
  ------------------
  |  Branch (109:5): [True: 6.80M, False: 4.83M]
  ------------------
  110|  7.53M|    case PAETH_PRED:
  ------------------
  |  Branch (110:5): [True: 730k, False: 10.9M]
  ------------------
  111|  7.53M|        mode = av1_mode_conv[mode][have_left][have_top];
  112|  7.53M|        break;
  113|  1.64M|    default:
  ------------------
  |  Branch (113:5): [True: 1.64M, False: 9.99M]
  ------------------
  114|  1.64M|        break;
  115|  11.6M|    }
  116|       |
  117|  11.6M|    const pixel *dst_top;
  118|  11.6M|    if (have_top &&
  ------------------
  |  Branch (118:9): [True: 11.0M, False: 638k]
  ------------------
  119|  11.0M|        (av1_intra_prediction_edges[mode].needs_top ||
  ------------------
  |  Branch (119:10): [True: 9.83M, False: 1.16M]
  ------------------
  120|  1.16M|         av1_intra_prediction_edges[mode].needs_topleft ||
  ------------------
  |  Branch (120:10): [True: 737k, False: 430k]
  ------------------
  121|   430k|         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
  ------------------
  |  Branch (121:11): [True: 430k, False: 7]
  |  Branch (121:58): [True: 11.5k, False: 419k]
  ------------------
  122|  10.5M|    {
  123|  10.5M|        if (prefilter_toplevel_sb_edge) {
  ------------------
  |  Branch (123:13): [True: 616k, False: 9.97M]
  ------------------
  124|   616k|            dst_top = &prefilter_toplevel_sb_edge[x * 4];
  125|  9.97M|        } else {
  126|  9.97M|            dst_top = &dst[-PXSTRIDE(stride)];
  127|  9.97M|        }
  128|  10.5M|    }
  129|       |
  130|  11.6M|    if (av1_intra_prediction_edges[mode].needs_left) {
  ------------------
  |  Branch (130:9): [True: 10.1M, False: 1.52M]
  ------------------
  131|  10.1M|        const int sz = th << 2;
  132|  10.1M|        pixel *const left = &topleft_out[-sz];
  133|       |
  134|  10.1M|        if (have_left) {
  ------------------
  |  Branch (134:13): [True: 10.0M, False: 46.5k]
  ------------------
  135|  10.0M|            const int px_have = imin(sz, (h - y) << 2);
  136|       |
  137|  78.4M|            for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (137:29): [True: 68.3M, False: 10.0M]
  ------------------
  138|  68.3M|                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
  139|  10.0M|            if (px_have < sz)
  ------------------
  |  Branch (139:17): [True: 32.9k, False: 10.0M]
  ------------------
  140|  32.9k|                pixel_set(left, left[sz - px_have], sz - px_have);
  141|  10.0M|        } else {
  142|  46.5k|            pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
  ------------------
  |  Branch (142:29): [True: 34.9k, False: 11.6k]
  ------------------
  143|  46.5k|        }
  144|       |
  145|  10.1M|        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
  ------------------
  |  Branch (145:13): [True: 759k, False: 9.35M]
  ------------------
  146|   759k|            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
  ------------------
  |  Branch (146:42): [True: 18.4E, False: 759k]
  |  Branch (146:56): [True: 19.7k, False: 739k]
  ------------------
  147|   759k|                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
  148|       |
  149|   759k|            if (have_bottomleft) {
  ------------------
  |  Branch (149:17): [True: 87.3k, False: 671k]
  ------------------
  150|  87.3k|                const int px_have = imin(sz, (h - y - th) << 2);
  151|       |
  152|   705k|                for (int i = 0; i < px_have; i++)
  ------------------
  |  Branch (152:33): [True: 618k, False: 87.3k]
  ------------------
  153|   618k|                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
  154|  87.3k|                if (px_have < sz)
  ------------------
  |  Branch (154:21): [True: 528, False: 86.7k]
  ------------------
  155|    528|                    pixel_set(left - sz, left[-px_have], sz - px_have);
  156|   671k|            } else {
  157|   671k|                pixel_set(left - sz, left[0], sz);
  158|   671k|            }
  159|   759k|        }
  160|  10.1M|    }
  161|       |
  162|  11.6M|    if (av1_intra_prediction_edges[mode].needs_top) {
  ------------------
  |  Branch (162:9): [True: 9.92M, False: 1.71M]
  ------------------
  163|  9.92M|        const int sz = tw << 2;
  164|  9.92M|        pixel *const top = &topleft_out[1];
  165|       |
  166|  9.92M|        if (have_top) {
  ------------------
  |  Branch (166:13): [True: 9.83M, False: 97.5k]
  ------------------
  167|  9.83M|            const int px_have = imin(sz, (w - x) << 2);
  168|  9.83M|            pixel_copy(top, dst_top, px_have);
  ------------------
  |  |   65|  9.83M|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  169|  9.83M|            if (px_have < sz)
  ------------------
  |  Branch (169:17): [True: 588k, False: 9.24M]
  ------------------
  170|   588k|                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
  171|  9.83M|        } else {
  172|  97.5k|            pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
  ------------------
  |  Branch (172:28): [True: 79.2k, False: 18.3k]
  ------------------
  173|  97.5k|        }
  174|       |
  175|  9.92M|        if (av1_intra_prediction_edges[mode].needs_topright) {
  ------------------
  |  Branch (175:13): [True: 348k, False: 9.58M]
  ------------------
  176|   348k|            const int have_topright = (!have_top || x + tw >= w) ? 0 :
  ------------------
  |  Branch (176:40): [True: 18.4E, False: 348k]
  |  Branch (176:53): [True: 4.61k, False: 343k]
  ------------------
  177|   348k|                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
  178|       |
  179|   348k|            if (have_topright) {
  ------------------
  |  Branch (179:17): [True: 300k, False: 47.9k]
  ------------------
  180|   300k|                const int px_have = imin(sz, (w - x - tw) << 2);
  181|       |
  182|   300k|                pixel_copy(top + sz, &dst_top[sz], px_have);
  ------------------
  |  |   65|   300k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
  183|   300k|                if (px_have < sz)
  ------------------
  |  Branch (183:21): [True: 1.27k, False: 299k]
  ------------------
  184|  1.27k|                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
  185|  1.27k|                              sz - px_have);
  186|   300k|            } else {
  187|  47.9k|                pixel_set(top + sz, top[sz - 1], sz);
  188|  47.9k|            }
  189|   348k|        }
  190|  9.92M|    }
  191|       |
  192|  11.6M|    if (av1_intra_prediction_edges[mode].needs_topleft) {
  ------------------
  |  Branch (192:9): [True: 2.78M, False: 8.85M]
  ------------------
  193|  2.78M|        if (have_left)
  ------------------
  |  Branch (193:13): [True: 2.76M, False: 17.3k]
  ------------------
  194|  2.76M|            *topleft_out = have_top ? dst_top[-1] : dst[-1];
  ------------------
  |  Branch (194:28): [True: 2.72M, False: 43.5k]
  ------------------
  195|  17.3k|        else
  196|  17.3k|            *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
  ------------------
  |  Branch (196:28): [True: 15.7k, False: 1.60k]
  ------------------
  197|       |
  198|  2.78M|        if (mode == Z2_PRED && tw + th >= 6 && filter_edge)
  ------------------
  |  Branch (198:13): [True: 758k, False: 2.02M]
  |  Branch (198:32): [True: 64.0k, False: 694k]
  |  Branch (198:48): [True: 60.5k, False: 3.54k]
  ------------------
  199|  60.5k|            *topleft_out = ((topleft_out[-1] + topleft_out[1]) * 5 +
  200|  60.5k|                            topleft_out[0] * 6 + 8) >> 4;
  201|  2.78M|    }
  202|       |
  203|  11.6M|    return mode;
  204|  11.6M|}

dav1d_intra_pred_dsp_init_8bpc:
  744|  3.49k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  3.49k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  3.49k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  3.49k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  3.49k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  3.49k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  3.49k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  3.49k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  3.49k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  3.49k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  3.49k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  3.49k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  3.49k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  3.49k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  3.49k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  3.49k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  3.49k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  3.49k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  3.49k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  3.49k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  3.49k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  3.49k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  3.49k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  3.49k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  3.49k|#endif
  782|  3.49k|}
dav1d_intra_pred_dsp_init_16bpc:
  744|  5.72k|COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
  745|  5.72k|    c->intra_pred[DC_PRED      ] = ipred_dc_c;
  746|  5.72k|    c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
  747|  5.72k|    c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
  748|  5.72k|    c->intra_pred[LEFT_DC_PRED ] = ipred_dc_left_c;
  749|  5.72k|    c->intra_pred[HOR_PRED     ] = ipred_h_c;
  750|  5.72k|    c->intra_pred[VERT_PRED    ] = ipred_v_c;
  751|  5.72k|    c->intra_pred[PAETH_PRED   ] = ipred_paeth_c;
  752|  5.72k|    c->intra_pred[SMOOTH_PRED  ] = ipred_smooth_c;
  753|  5.72k|    c->intra_pred[SMOOTH_V_PRED] = ipred_smooth_v_c;
  754|  5.72k|    c->intra_pred[SMOOTH_H_PRED] = ipred_smooth_h_c;
  755|  5.72k|    c->intra_pred[Z1_PRED      ] = ipred_z1_c;
  756|  5.72k|    c->intra_pred[Z2_PRED      ] = ipred_z2_c;
  757|  5.72k|    c->intra_pred[Z3_PRED      ] = ipred_z3_c;
  758|  5.72k|    c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
  759|       |
  760|  5.72k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
  761|  5.72k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
  762|  5.72k|    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
  763|       |
  764|  5.72k|    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
  765|  5.72k|    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
  766|  5.72k|    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
  767|  5.72k|    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
  768|       |
  769|  5.72k|    c->pal_pred = pal_pred_c;
  770|       |
  771|  5.72k|#if HAVE_ASM
  772|       |#if ARCH_AARCH64 || ARCH_ARM
  773|       |    intra_pred_dsp_init_arm(c);
  774|       |#elif ARCH_RISCV
  775|       |    intra_pred_dsp_init_riscv(c);
  776|       |#elif ARCH_X86
  777|       |    intra_pred_dsp_init_x86(c);
  778|       |#elif ARCH_LOONGARCH64
  779|       |    intra_pred_dsp_init_loongarch(c);
  780|       |#endif
  781|  5.72k|#endif
  782|  5.72k|}

itx_1d.c:inv_dct4_1d_internal_c:
   68|  1.80M|{
   69|  1.80M|    assert(stride > 0);
  ------------------
  |  Branch (69:5): [True: 1.80M, False: 325]
  ------------------
   70|  1.80M|    const int in0 = c[0 * stride], in1 = c[1 * stride];
   71|       |
   72|  1.80M|    int t0, t1, t2, t3;
   73|  1.80M|    if (tx64) {
  ------------------
  |  Branch (73:9): [True: 1.00M, False: 797k]
  ------------------
   74|  1.00M|        t0 = t1 = (in0 * 181 + 128) >> 8;
   75|  1.00M|        t2 = (in1 * 1567 + 2048) >> 12;
   76|  1.00M|        t3 = (in1 * 3784 + 2048) >> 12;
   77|  1.00M|    } else {
   78|   797k|        const int in2 = c[2 * stride], in3 = c[3 * stride];
   79|       |
   80|   797k|        t0 = ((in0 + in2) * 181 + 128) >> 8;
   81|   797k|        t1 = ((in0 - in2) * 181 + 128) >> 8;
   82|   797k|        t2 = ((in1 *  1567         - in3 * (3784 - 4096) + 2048) >> 12) - in3;
   83|   797k|        t3 = ((in1 * (3784 - 4096) + in3 *  1567         + 2048) >> 12) + in1;
   84|   797k|    }
   85|       |
   86|  1.80M|    c[0 * stride] = CLIP(t0 + t3);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
   87|  1.80M|    c[1 * stride] = CLIP(t1 + t2);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
   88|  1.80M|    c[2 * stride] = CLIP(t1 - t2);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
   89|  1.80M|    c[3 * stride] = CLIP(t0 - t3);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
   90|  1.80M|}
itx_1d.c:inv_dct8_1d_internal_c:
  101|  1.80M|{
  102|  1.80M|    assert(stride > 0);
  ------------------
  |  Branch (102:5): [True: 1.80M, False: 18.4E]
  ------------------
  103|  1.80M|    inv_dct4_1d_internal_c(c, stride << 1, min, max, tx64);
  104|       |
  105|  1.80M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  106|       |
  107|  1.80M|    int t4a, t5a, t6a, t7a;
  108|  1.80M|    if (tx64) {
  ------------------
  |  Branch (108:9): [True: 1.00M, False: 797k]
  ------------------
  109|  1.00M|        t4a = (in1 *   799 + 2048) >> 12;
  110|  1.00M|        t5a = (in3 * -2276 + 2048) >> 12;
  111|  1.00M|        t6a = (in3 *  3406 + 2048) >> 12;
  112|  1.00M|        t7a = (in1 *  4017 + 2048) >> 12;
  113|  1.00M|    } else {
  114|   797k|        const int in5 = c[5 * stride], in7 = c[7 * stride];
  115|       |
  116|   797k|        t4a = ((in1 *   799         - in7 * (4017 - 4096) + 2048) >> 12) - in7;
  117|   797k|        t5a =  (in5 *  1703         - in3 *  1138         + 1024) >> 11;
  118|   797k|        t6a =  (in5 *  1138         + in3 *  1703         + 1024) >> 11;
  119|   797k|        t7a = ((in1 * (4017 - 4096) + in7 *  799          + 2048) >> 12) + in1;
  120|   797k|    }
  121|       |
  122|  1.80M|    const int t4  = CLIP(t4a + t5a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  123|  1.80M|              t5a = CLIP(t4a - t5a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  124|  1.80M|    const int t7  = CLIP(t7a + t6a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  125|  1.80M|              t6a = CLIP(t7a - t6a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  126|       |
  127|  1.80M|    const int t5  = ((t6a - t5a) * 181 + 128) >> 8;
  128|  1.80M|    const int t6  = ((t6a + t5a) * 181 + 128) >> 8;
  129|       |
  130|  1.80M|    const int t0 = c[0 * stride];
  131|  1.80M|    const int t1 = c[2 * stride];
  132|  1.80M|    const int t2 = c[4 * stride];
  133|  1.80M|    const int t3 = c[6 * stride];
  134|       |
  135|  1.80M|    c[0 * stride] = CLIP(t0 + t7);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  136|  1.80M|    c[1 * stride] = CLIP(t1 + t6);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  137|  1.80M|    c[2 * stride] = CLIP(t2 + t5);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  138|  1.80M|    c[3 * stride] = CLIP(t3 + t4);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  139|  1.80M|    c[4 * stride] = CLIP(t3 - t4);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  140|  1.80M|    c[5 * stride] = CLIP(t2 - t5);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  141|  1.80M|    c[6 * stride] = CLIP(t1 - t6);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  142|  1.80M|    c[7 * stride] = CLIP(t0 - t7);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  143|  1.80M|}
itx_1d.c:inv_dct16_1d_c:
  242|   226k|{
  243|   226k|    inv_dct16_1d_internal_c(c, stride, min, max, 0);
  244|   226k|}
itx_1d.c:inv_dct16_1d_internal_c:
  154|  1.80M|{
  155|  1.80M|    assert(stride > 0);
  ------------------
  |  Branch (155:5): [True: 1.80M, False: 18.4E]
  ------------------
  156|  1.80M|    inv_dct8_1d_internal_c(c, stride << 1, min, max, tx64);
  157|       |
  158|  1.80M|    const int in1 = c[1 * stride], in3 = c[3 * stride];
  159|  1.80M|    const int in5 = c[5 * stride], in7 = c[7 * stride];
  160|       |
  161|  1.80M|    int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
  162|  1.80M|    if (tx64) {
  ------------------
  |  Branch (162:9): [True: 1.00M, False: 796k]
  ------------------
  163|  1.00M|        t8a  = (in1 *   401 + 2048) >> 12;
  164|  1.00M|        t9a  = (in7 * -2598 + 2048) >> 12;
  165|  1.00M|        t10a = (in5 *  1931 + 2048) >> 12;
  166|  1.00M|        t11a = (in3 * -1189 + 2048) >> 12;
  167|  1.00M|        t12a = (in3 *  3920 + 2048) >> 12;
  168|  1.00M|        t13a = (in5 *  3612 + 2048) >> 12;
  169|  1.00M|        t14a = (in7 *  3166 + 2048) >> 12;
  170|  1.00M|        t15a = (in1 *  4076 + 2048) >> 12;
  171|  1.00M|    } else {
  172|   796k|        const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  173|   796k|        const int in13 = c[13 * stride], in15 = c[15 * stride];
  174|       |
  175|   796k|        t8a  = ((in1  *   401         - in15 * (4076 - 4096) + 2048) >> 12) - in15;
  176|   796k|        t9a  =  (in9  *  1583         - in7  *  1299         + 1024) >> 11;
  177|   796k|        t10a = ((in5  *  1931         - in11 * (3612 - 4096) + 2048) >> 12) - in11;
  178|   796k|        t11a = ((in13 * (3920 - 4096) - in3  *  1189         + 2048) >> 12) + in13;
  179|   796k|        t12a = ((in13 *  1189         + in3  * (3920 - 4096) + 2048) >> 12) + in3;
  180|   796k|        t13a = ((in5  * (3612 - 4096) + in11 *  1931         + 2048) >> 12) + in5;
  181|   796k|        t14a =  (in9  *  1299         + in7  *  1583         + 1024) >> 11;
  182|   796k|        t15a = ((in1  * (4076 - 4096) + in15 *   401         + 2048) >> 12) + in1;
  183|   796k|    }
  184|       |
  185|  1.80M|    int t8  = CLIP(t8a  + t9a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  186|  1.80M|    int t9  = CLIP(t8a  - t9a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  187|  1.80M|    int t10 = CLIP(t11a - t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  188|  1.80M|    int t11 = CLIP(t11a + t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  189|  1.80M|    int t12 = CLIP(t12a + t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  190|  1.80M|    int t13 = CLIP(t12a - t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  191|  1.80M|    int t14 = CLIP(t15a - t14a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  192|  1.80M|    int t15 = CLIP(t15a + t14a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  193|       |
  194|  1.80M|    t9a  = ((  t14 *  1567         - t9  * (3784 - 4096)  + 2048) >> 12) - t9;
  195|  1.80M|    t14a = ((  t14 * (3784 - 4096) + t9  *  1567          + 2048) >> 12) + t14;
  196|  1.80M|    t10a = ((-(t13 * (3784 - 4096) + t10 *  1567)         + 2048) >> 12) - t13;
  197|  1.80M|    t13a = ((  t13 *  1567         - t10 * (3784 - 4096)  + 2048) >> 12) - t10;
  198|       |
  199|  1.80M|    t8a  = CLIP(t8   + t11);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  200|  1.80M|    t9   = CLIP(t9a  + t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  201|  1.80M|    t10  = CLIP(t9a  - t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  202|  1.80M|    t11a = CLIP(t8   - t11);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  203|  1.80M|    t12a = CLIP(t15  - t12);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  204|  1.80M|    t13  = CLIP(t14a - t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  205|  1.80M|    t14  = CLIP(t14a + t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  206|  1.80M|    t15a = CLIP(t15  + t12);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  207|       |
  208|  1.80M|    t10a = ((t13  - t10)  * 181 + 128) >> 8;
  209|  1.80M|    t13a = ((t13  + t10)  * 181 + 128) >> 8;
  210|  1.80M|    t11  = ((t12a - t11a) * 181 + 128) >> 8;
  211|  1.80M|    t12  = ((t12a + t11a) * 181 + 128) >> 8;
  212|       |
  213|  1.80M|    const int t0 = c[ 0 * stride];
  214|  1.80M|    const int t1 = c[ 2 * stride];
  215|  1.80M|    const int t2 = c[ 4 * stride];
  216|  1.80M|    const int t3 = c[ 6 * stride];
  217|  1.80M|    const int t4 = c[ 8 * stride];
  218|  1.80M|    const int t5 = c[10 * stride];
  219|  1.80M|    const int t6 = c[12 * stride];
  220|  1.80M|    const int t7 = c[14 * stride];
  221|       |
  222|  1.80M|    c[ 0 * stride] = CLIP(t0 + t15a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  223|  1.80M|    c[ 1 * stride] = CLIP(t1 + t14);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  224|  1.80M|    c[ 2 * stride] = CLIP(t2 + t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  225|  1.80M|    c[ 3 * stride] = CLIP(t3 + t12);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  226|  1.80M|    c[ 4 * stride] = CLIP(t4 + t11);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  227|  1.80M|    c[ 5 * stride] = CLIP(t5 + t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  228|  1.80M|    c[ 6 * stride] = CLIP(t6 + t9);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  229|  1.80M|    c[ 7 * stride] = CLIP(t7 + t8a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  230|  1.80M|    c[ 8 * stride] = CLIP(t7 - t8a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  231|  1.80M|    c[ 9 * stride] = CLIP(t6 - t9);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  232|  1.80M|    c[10 * stride] = CLIP(t5 - t10a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  233|  1.80M|    c[11 * stride] = CLIP(t4 - t11);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  234|  1.80M|    c[12 * stride] = CLIP(t3 - t12);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  235|  1.80M|    c[13 * stride] = CLIP(t2 - t13a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  236|  1.80M|    c[14 * stride] = CLIP(t1 - t14);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  237|  1.80M|    c[15 * stride] = CLIP(t0 - t15a);
  ------------------
  |  |   37|  1.80M|#define CLIP(a) iclip(a, min, max)
  ------------------
  238|  1.80M|}
itx_1d.c:inv_dct32_1d_c:
  432|   572k|{
  433|   572k|    inv_dct32_1d_internal_c(c, stride, min, max, 0);
  434|   572k|}
itx_1d.c:inv_dct32_1d_internal_c:
  249|  1.57M|{
  250|  1.57M|    assert(stride > 0);
  ------------------
  |  Branch (250:5): [True: 1.57M, False: 208]
  ------------------
  251|  1.57M|    inv_dct16_1d_internal_c(c, stride << 1, min, max, tx64);
  252|       |
  253|  1.57M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  254|  1.57M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  255|  1.57M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  256|  1.57M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  257|       |
  258|  1.57M|    int t16a, t17a, t18a, t19a, t20a, t21a, t22a, t23a;
  259|  1.57M|    int t24a, t25a, t26a, t27a, t28a, t29a, t30a, t31a;
  260|  1.57M|    if (tx64) {
  ------------------
  |  Branch (260:9): [True: 1.00M, False: 570k]
  ------------------
  261|  1.00M|        t16a = (in1  *   201 + 2048) >> 12;
  262|  1.00M|        t17a = (in15 * -2751 + 2048) >> 12;
  263|  1.00M|        t18a = (in9  *  1751 + 2048) >> 12;
  264|  1.00M|        t19a = (in7  * -1380 + 2048) >> 12;
  265|  1.00M|        t20a = (in5  *   995 + 2048) >> 12;
  266|  1.00M|        t21a = (in11 * -2106 + 2048) >> 12;
  267|  1.00M|        t22a = (in13 *  2440 + 2048) >> 12;
  268|  1.00M|        t23a = (in3  *  -601 + 2048) >> 12;
  269|  1.00M|        t24a = (in3  *  4052 + 2048) >> 12;
  270|  1.00M|        t25a = (in13 *  3290 + 2048) >> 12;
  271|  1.00M|        t26a = (in11 *  3513 + 2048) >> 12;
  272|  1.00M|        t27a = (in5  *  3973 + 2048) >> 12;
  273|  1.00M|        t28a = (in7  *  3857 + 2048) >> 12;
  274|  1.00M|        t29a = (in9  *  3703 + 2048) >> 12;
  275|  1.00M|        t30a = (in15 *  3035 + 2048) >> 12;
  276|  1.00M|        t31a = (in1  *  4091 + 2048) >> 12;
  277|  1.00M|    } else {
  278|   570k|        const int in17 = c[17 * stride], in19 = c[19 * stride];
  279|   570k|        const int in21 = c[21 * stride], in23 = c[23 * stride];
  280|   570k|        const int in25 = c[25 * stride], in27 = c[27 * stride];
  281|   570k|        const int in29 = c[29 * stride], in31 = c[31 * stride];
  282|       |
  283|   570k|        t16a = ((in1  *   201         - in31 * (4091 - 4096) + 2048) >> 12) - in31;
  284|   570k|        t17a = ((in17 * (3035 - 4096) - in15 *  2751         + 2048) >> 12) + in17;
  285|   570k|        t18a = ((in9  *  1751         - in23 * (3703 - 4096) + 2048) >> 12) - in23;
  286|   570k|        t19a = ((in25 * (3857 - 4096) - in7  *  1380         + 2048) >> 12) + in25;
  287|   570k|        t20a = ((in5  *   995         - in27 * (3973 - 4096) + 2048) >> 12) - in27;
  288|   570k|        t21a = ((in21 * (3513 - 4096) - in11 *  2106         + 2048) >> 12) + in21;
  289|   570k|        t22a =  (in13 *  1220         - in19 *  1645         + 1024) >> 11;
  290|   570k|        t23a = ((in29 * (4052 - 4096) - in3  *   601         + 2048) >> 12) + in29;
  291|   570k|        t24a = ((in29 *   601         + in3  * (4052 - 4096) + 2048) >> 12) + in3;
  292|   570k|        t25a =  (in13 *  1645         + in19 *  1220         + 1024) >> 11;
  293|   570k|        t26a = ((in21 *  2106         + in11 * (3513 - 4096) + 2048) >> 12) + in11;
  294|   570k|        t27a = ((in5  * (3973 - 4096) + in27 *   995         + 2048) >> 12) + in5;
  295|   570k|        t28a = ((in25 *  1380         + in7  * (3857 - 4096) + 2048) >> 12) + in7;
  296|   570k|        t29a = ((in9  * (3703 - 4096) + in23 *  1751         + 2048) >> 12) + in9;
  297|   570k|        t30a = ((in17 *  2751         + in15 * (3035 - 4096) + 2048) >> 12) + in15;
  298|   570k|        t31a = ((in1  * (4091 - 4096) + in31 *   201         + 2048) >> 12) + in1;
  299|   570k|    }
  300|       |
  301|  1.57M|    int t16 = CLIP(t16a + t17a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  302|  1.57M|    int t17 = CLIP(t16a - t17a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  303|  1.57M|    int t18 = CLIP(t19a - t18a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  304|  1.57M|    int t19 = CLIP(t19a + t18a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  305|  1.57M|    int t20 = CLIP(t20a + t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  306|  1.57M|    int t21 = CLIP(t20a - t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  307|  1.57M|    int t22 = CLIP(t23a - t22a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  308|  1.57M|    int t23 = CLIP(t23a + t22a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  309|  1.57M|    int t24 = CLIP(t24a + t25a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  310|  1.57M|    int t25 = CLIP(t24a - t25a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  311|  1.57M|    int t26 = CLIP(t27a - t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  312|  1.57M|    int t27 = CLIP(t27a + t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  313|  1.57M|    int t28 = CLIP(t28a + t29a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  314|  1.57M|    int t29 = CLIP(t28a - t29a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  315|  1.57M|    int t30 = CLIP(t31a - t30a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  316|  1.57M|    int t31 = CLIP(t31a + t30a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  317|       |
  318|  1.57M|    t17a = ((  t30 *   799         - t17 * (4017 - 4096)  + 2048) >> 12) - t17;
  319|  1.57M|    t30a = ((  t30 * (4017 - 4096) + t17 *   799          + 2048) >> 12) + t30;
  320|  1.57M|    t18a = ((-(t29 * (4017 - 4096) + t18 *   799)         + 2048) >> 12) - t29;
  321|  1.57M|    t29a = ((  t29 *   799         - t18 * (4017 - 4096)  + 2048) >> 12) - t18;
  322|  1.57M|    t21a =  (  t26 *  1703         - t21 *  1138          + 1024) >> 11;
  323|  1.57M|    t26a =  (  t26 *  1138         + t21 *  1703          + 1024) >> 11;
  324|  1.57M|    t22a =  (-(t25 *  1138         + t22 *  1703        ) + 1024) >> 11;
  325|  1.57M|    t25a =  (  t25 *  1703         - t22 *  1138          + 1024) >> 11;
  326|       |
  327|  1.57M|    t16a = CLIP(t16  + t19);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  328|  1.57M|    t17  = CLIP(t17a + t18a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  329|  1.57M|    t18  = CLIP(t17a - t18a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  330|  1.57M|    t19a = CLIP(t16  - t19);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  331|  1.57M|    t20a = CLIP(t23  - t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  332|  1.57M|    t21  = CLIP(t22a - t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  333|  1.57M|    t22  = CLIP(t22a + t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  334|  1.57M|    t23a = CLIP(t23  + t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  335|  1.57M|    t24a = CLIP(t24  + t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  336|  1.57M|    t25  = CLIP(t25a + t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  337|  1.57M|    t26  = CLIP(t25a - t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  338|  1.57M|    t27a = CLIP(t24  - t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  339|  1.57M|    t28a = CLIP(t31  - t28);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  340|  1.57M|    t29  = CLIP(t30a - t29a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  341|  1.57M|    t30  = CLIP(t30a + t29a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  342|  1.57M|    t31a = CLIP(t31  + t28);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  343|       |
  344|  1.57M|    t18a = ((  t29  *  1567         - t18  * (3784 - 4096)  + 2048) >> 12) - t18;
  345|  1.57M|    t29a = ((  t29  * (3784 - 4096) + t18  *  1567          + 2048) >> 12) + t29;
  346|  1.57M|    t19  = ((  t28a *  1567         - t19a * (3784 - 4096)  + 2048) >> 12) - t19a;
  347|  1.57M|    t28  = ((  t28a * (3784 - 4096) + t19a *  1567          + 2048) >> 12) + t28a;
  348|  1.57M|    t20  = ((-(t27a * (3784 - 4096) + t20a *  1567)         + 2048) >> 12) - t27a;
  349|  1.57M|    t27  = ((  t27a *  1567         - t20a * (3784 - 4096)  + 2048) >> 12) - t20a;
  350|  1.57M|    t21a = ((-(t26  * (3784 - 4096) + t21  *  1567)         + 2048) >> 12) - t26;
  351|  1.57M|    t26a = ((  t26  *  1567         - t21  * (3784 - 4096)  + 2048) >> 12) - t21;
  352|       |
  353|  1.57M|    t16  = CLIP(t16a + t23a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  354|  1.57M|    t17a = CLIP(t17  + t22);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  355|  1.57M|    t18  = CLIP(t18a + t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  356|  1.57M|    t19a = CLIP(t19  + t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  357|  1.57M|    t20a = CLIP(t19  - t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  358|  1.57M|    t21  = CLIP(t18a - t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  359|  1.57M|    t22a = CLIP(t17  - t22);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  360|  1.57M|    t23  = CLIP(t16a - t23a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  361|  1.57M|    t24  = CLIP(t31a - t24a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  362|  1.57M|    t25a = CLIP(t30  - t25);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  363|  1.57M|    t26  = CLIP(t29a - t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  364|  1.57M|    t27a = CLIP(t28  - t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  365|  1.57M|    t28a = CLIP(t28  + t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  366|  1.57M|    t29  = CLIP(t29a + t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  367|  1.57M|    t30a = CLIP(t30  + t25);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  368|  1.57M|    t31  = CLIP(t31a + t24a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  369|       |
  370|  1.57M|    t20  = ((t27a - t20a) * 181 + 128) >> 8;
  371|  1.57M|    t27  = ((t27a + t20a) * 181 + 128) >> 8;
  372|  1.57M|    t21a = ((t26  - t21 ) * 181 + 128) >> 8;
  373|  1.57M|    t26a = ((t26  + t21 ) * 181 + 128) >> 8;
  374|  1.57M|    t22  = ((t25a - t22a) * 181 + 128) >> 8;
  375|  1.57M|    t25  = ((t25a + t22a) * 181 + 128) >> 8;
  376|  1.57M|    t23a = ((t24  - t23 ) * 181 + 128) >> 8;
  377|  1.57M|    t24a = ((t24  + t23 ) * 181 + 128) >> 8;
  378|       |
  379|  1.57M|    const int t0  = c[ 0 * stride];
  380|  1.57M|    const int t1  = c[ 2 * stride];
  381|  1.57M|    const int t2  = c[ 4 * stride];
  382|  1.57M|    const int t3  = c[ 6 * stride];
  383|  1.57M|    const int t4  = c[ 8 * stride];
  384|  1.57M|    const int t5  = c[10 * stride];
  385|  1.57M|    const int t6  = c[12 * stride];
  386|  1.57M|    const int t7  = c[14 * stride];
  387|  1.57M|    const int t8  = c[16 * stride];
  388|  1.57M|    const int t9  = c[18 * stride];
  389|  1.57M|    const int t10 = c[20 * stride];
  390|  1.57M|    const int t11 = c[22 * stride];
  391|  1.57M|    const int t12 = c[24 * stride];
  392|  1.57M|    const int t13 = c[26 * stride];
  393|  1.57M|    const int t14 = c[28 * stride];
  394|  1.57M|    const int t15 = c[30 * stride];
  395|       |
  396|  1.57M|    c[ 0 * stride] = CLIP(t0  + t31);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  397|  1.57M|    c[ 1 * stride] = CLIP(t1  + t30a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  398|  1.57M|    c[ 2 * stride] = CLIP(t2  + t29);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  399|  1.57M|    c[ 3 * stride] = CLIP(t3  + t28a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  400|  1.57M|    c[ 4 * stride] = CLIP(t4  + t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  401|  1.57M|    c[ 5 * stride] = CLIP(t5  + t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  402|  1.57M|    c[ 6 * stride] = CLIP(t6  + t25);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  403|  1.57M|    c[ 7 * stride] = CLIP(t7  + t24a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  404|  1.57M|    c[ 8 * stride] = CLIP(t8  + t23a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  405|  1.57M|    c[ 9 * stride] = CLIP(t9  + t22);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  406|  1.57M|    c[10 * stride] = CLIP(t10 + t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  407|  1.57M|    c[11 * stride] = CLIP(t11 + t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  408|  1.57M|    c[12 * stride] = CLIP(t12 + t19a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  409|  1.57M|    c[13 * stride] = CLIP(t13 + t18);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  410|  1.57M|    c[14 * stride] = CLIP(t14 + t17a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  411|  1.57M|    c[15 * stride] = CLIP(t15 + t16);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  412|  1.57M|    c[16 * stride] = CLIP(t15 - t16);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  413|  1.57M|    c[17 * stride] = CLIP(t14 - t17a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  414|  1.57M|    c[18 * stride] = CLIP(t13 - t18);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  415|  1.57M|    c[19 * stride] = CLIP(t12 - t19a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  416|  1.57M|    c[20 * stride] = CLIP(t11 - t20);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  417|  1.57M|    c[21 * stride] = CLIP(t10 - t21a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  418|  1.57M|    c[22 * stride] = CLIP(t9  - t22);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  419|  1.57M|    c[23 * stride] = CLIP(t8  - t23a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  420|  1.57M|    c[24 * stride] = CLIP(t7  - t24a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  421|  1.57M|    c[25 * stride] = CLIP(t6  - t25);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  422|  1.57M|    c[26 * stride] = CLIP(t5  - t26a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  423|  1.57M|    c[27 * stride] = CLIP(t4  - t27);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  424|  1.57M|    c[28 * stride] = CLIP(t3  - t28a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  425|  1.57M|    c[29 * stride] = CLIP(t2  - t29);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  426|  1.57M|    c[30 * stride] = CLIP(t1  - t30a);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  427|  1.57M|    c[31 * stride] = CLIP(t0  - t31);
  ------------------
  |  |   37|  1.57M|#define CLIP(a) iclip(a, min, max)
  ------------------
  428|  1.57M|}
itx_1d.c:inv_dct64_1d_c:
  438|  1.00M|{
  439|  1.00M|    assert(stride > 0);
  ------------------
  |  Branch (439:5): [True: 1.00M, False: 18.4E]
  ------------------
  440|  1.00M|    inv_dct32_1d_internal_c(c, stride << 1, min, max, 1);
  441|       |
  442|  1.00M|    const int in1  = c[ 1 * stride], in3  = c[ 3 * stride];
  443|  1.00M|    const int in5  = c[ 5 * stride], in7  = c[ 7 * stride];
  444|  1.00M|    const int in9  = c[ 9 * stride], in11 = c[11 * stride];
  445|  1.00M|    const int in13 = c[13 * stride], in15 = c[15 * stride];
  446|  1.00M|    const int in17 = c[17 * stride], in19 = c[19 * stride];
  447|  1.00M|    const int in21 = c[21 * stride], in23 = c[23 * stride];
  448|  1.00M|    const int in25 = c[25 * stride], in27 = c[27 * stride];
  449|  1.00M|    const int in29 = c[29 * stride], in31 = c[31 * stride];
  450|       |
  451|  1.00M|    int t32a = (in1  *   101 + 2048) >> 12;
  452|  1.00M|    int t33a = (in31 * -2824 + 2048) >> 12;
  453|  1.00M|    int t34a = (in17 *  1660 + 2048) >> 12;
  454|  1.00M|    int t35a = (in15 * -1474 + 2048) >> 12;
  455|  1.00M|    int t36a = (in9  *   897 + 2048) >> 12;
  456|  1.00M|    int t37a = (in23 * -2191 + 2048) >> 12;
  457|  1.00M|    int t38a = (in25 *  2359 + 2048) >> 12;
  458|  1.00M|    int t39a = (in7  *  -700 + 2048) >> 12;
  459|  1.00M|    int t40a = (in5  *   501 + 2048) >> 12;
  460|  1.00M|    int t41a = (in27 * -2520 + 2048) >> 12;
  461|  1.00M|    int t42a = (in21 *  2019 + 2048) >> 12;
  462|  1.00M|    int t43a = (in11 * -1092 + 2048) >> 12;
  463|  1.00M|    int t44a = (in13 *  1285 + 2048) >> 12;
  464|  1.00M|    int t45a = (in19 * -1842 + 2048) >> 12;
  465|  1.00M|    int t46a = (in29 *  2675 + 2048) >> 12;
  466|  1.00M|    int t47a = (in3  *  -301 + 2048) >> 12;
  467|  1.00M|    int t48a = (in3  *  4085 + 2048) >> 12;
  468|  1.00M|    int t49a = (in29 *  3102 + 2048) >> 12;
  469|  1.00M|    int t50a = (in19 *  3659 + 2048) >> 12;
  470|  1.00M|    int t51a = (in13 *  3889 + 2048) >> 12;
  471|  1.00M|    int t52a = (in11 *  3948 + 2048) >> 12;
  472|  1.00M|    int t53a = (in21 *  3564 + 2048) >> 12;
  473|  1.00M|    int t54a = (in27 *  3229 + 2048) >> 12;
  474|  1.00M|    int t55a = (in5  *  4065 + 2048) >> 12;
  475|  1.00M|    int t56a = (in7  *  4036 + 2048) >> 12;
  476|  1.00M|    int t57a = (in25 *  3349 + 2048) >> 12;
  477|  1.00M|    int t58a = (in23 *  3461 + 2048) >> 12;
  478|  1.00M|    int t59a = (in9  *  3996 + 2048) >> 12;
  479|  1.00M|    int t60a = (in15 *  3822 + 2048) >> 12;
  480|  1.00M|    int t61a = (in17 *  3745 + 2048) >> 12;
  481|  1.00M|    int t62a = (in31 *  2967 + 2048) >> 12;
  482|  1.00M|    int t63a = (in1  *  4095 + 2048) >> 12;
  483|       |
  484|  1.00M|    int t32 = CLIP(t32a + t33a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  485|  1.00M|    int t33 = CLIP(t32a - t33a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  486|  1.00M|    int t34 = CLIP(t35a - t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  487|  1.00M|    int t35 = CLIP(t35a + t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  488|  1.00M|    int t36 = CLIP(t36a + t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  489|  1.00M|    int t37 = CLIP(t36a - t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  490|  1.00M|    int t38 = CLIP(t39a - t38a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  491|  1.00M|    int t39 = CLIP(t39a + t38a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  492|  1.00M|    int t40 = CLIP(t40a + t41a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  493|  1.00M|    int t41 = CLIP(t40a - t41a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  494|  1.00M|    int t42 = CLIP(t43a - t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  495|  1.00M|    int t43 = CLIP(t43a + t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  496|  1.00M|    int t44 = CLIP(t44a + t45a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  497|  1.00M|    int t45 = CLIP(t44a - t45a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  498|  1.00M|    int t46 = CLIP(t47a - t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  499|  1.00M|    int t47 = CLIP(t47a + t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  500|  1.00M|    int t48 = CLIP(t48a + t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  501|  1.00M|    int t49 = CLIP(t48a - t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  502|  1.00M|    int t50 = CLIP(t51a - t50a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  503|  1.00M|    int t51 = CLIP(t51a + t50a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  504|  1.00M|    int t52 = CLIP(t52a + t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  505|  1.00M|    int t53 = CLIP(t52a - t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  506|  1.00M|    int t54 = CLIP(t55a - t54a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  507|  1.00M|    int t55 = CLIP(t55a + t54a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  508|  1.00M|    int t56 = CLIP(t56a + t57a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  509|  1.00M|    int t57 = CLIP(t56a - t57a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  510|  1.00M|    int t58 = CLIP(t59a - t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  511|  1.00M|    int t59 = CLIP(t59a + t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  512|  1.00M|    int t60 = CLIP(t60a + t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  513|  1.00M|    int t61 = CLIP(t60a - t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  514|  1.00M|    int t62 = CLIP(t63a - t62a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  515|  1.00M|    int t63 = CLIP(t63a + t62a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  516|       |
  517|  1.00M|    t33a = ((t33 * (4096 - 4076) + t62 *   401         + 2048) >> 12) - t33;
  518|  1.00M|    t34a = ((t34 *  -401         + t61 * (4096 - 4076) + 2048) >> 12) - t61;
  519|  1.00M|    t37a =  (t37 * -1299         + t58 *  1583         + 1024) >> 11;
  520|  1.00M|    t38a =  (t38 * -1583         + t57 * -1299         + 1024) >> 11;
  521|  1.00M|    t41a = ((t41 * (4096 - 3612) + t54 *  1931         + 2048) >> 12) - t41;
  522|  1.00M|    t42a = ((t42 * -1931         + t53 * (4096 - 3612) + 2048) >> 12) - t53;
  523|  1.00M|    t45a = ((t45 * -1189         + t50 * (3920 - 4096) + 2048) >> 12) + t50;
  524|  1.00M|    t46a = ((t46 * (4096 - 3920) + t49 * -1189         + 2048) >> 12) - t46;
  525|  1.00M|    t49a = ((t46 * -1189         + t49 * (3920 - 4096) + 2048) >> 12) + t49;
  526|  1.00M|    t50a = ((t45 * (3920 - 4096) + t50 *  1189         + 2048) >> 12) + t45;
  527|  1.00M|    t53a = ((t42 * (4096 - 3612) + t53 *  1931         + 2048) >> 12) - t42;
  528|  1.00M|    t54a = ((t41 *  1931         + t54 * (3612 - 4096) + 2048) >> 12) + t54;
  529|  1.00M|    t57a =  (t38 * -1299         + t57 *  1583         + 1024) >> 11;
  530|  1.00M|    t58a =  (t37 *  1583         + t58 *  1299         + 1024) >> 11;
  531|  1.00M|    t61a = ((t34 * (4096 - 4076) + t61 *   401         + 2048) >> 12) - t34;
  532|  1.00M|    t62a = ((t33 *   401         + t62 * (4076 - 4096) + 2048) >> 12) + t62;
  533|       |
  534|  1.00M|    t32a = CLIP(t32  + t35);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  535|  1.00M|    t33  = CLIP(t33a + t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  536|  1.00M|    t34  = CLIP(t33a - t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  537|  1.00M|    t35a = CLIP(t32  - t35);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  538|  1.00M|    t36a = CLIP(t39  - t36);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  539|  1.00M|    t37  = CLIP(t38a - t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  540|  1.00M|    t38  = CLIP(t38a + t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  541|  1.00M|    t39a = CLIP(t39  + t36);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  542|  1.00M|    t40a = CLIP(t40  + t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  543|  1.00M|    t41  = CLIP(t41a + t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  544|  1.00M|    t42  = CLIP(t41a - t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  545|  1.00M|    t43a = CLIP(t40  - t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  546|  1.00M|    t44a = CLIP(t47  - t44);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  547|  1.00M|    t45  = CLIP(t46a - t45a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  548|  1.00M|    t46  = CLIP(t46a + t45a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  549|  1.00M|    t47a = CLIP(t47  + t44);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  550|  1.00M|    t48a = CLIP(t48  + t51);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  551|  1.00M|    t49  = CLIP(t49a + t50a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  552|  1.00M|    t50  = CLIP(t49a - t50a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  553|  1.00M|    t51a = CLIP(t48  - t51);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  554|  1.00M|    t52a = CLIP(t55  - t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  555|  1.00M|    t53  = CLIP(t54a - t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  556|  1.00M|    t54  = CLIP(t54a + t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  557|  1.00M|    t55a = CLIP(t55  + t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  558|  1.00M|    t56a = CLIP(t56  + t59);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  559|  1.00M|    t57  = CLIP(t57a + t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  560|  1.00M|    t58  = CLIP(t57a - t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  561|  1.00M|    t59a = CLIP(t56  - t59);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  562|  1.00M|    t60a = CLIP(t63  - t60);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  563|  1.00M|    t61  = CLIP(t62a - t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  564|  1.00M|    t62  = CLIP(t62a + t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  565|  1.00M|    t63a = CLIP(t63  + t60);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  566|       |
  567|  1.00M|    t34a = ((t34  * (4096 - 4017) + t61  *   799         + 2048) >> 12) - t34;
  568|  1.00M|    t35  = ((t35a * (4096 - 4017) + t60a *   799         + 2048) >> 12) - t35a;
  569|  1.00M|    t36  = ((t36a *  -799         + t59a * (4096 - 4017) + 2048) >> 12) - t59a;
  570|  1.00M|    t37a = ((t37  *  -799         + t58  * (4096 - 4017) + 2048) >> 12) - t58;
  571|  1.00M|    t42a =  (t42  * -1138         + t53  *  1703         + 1024) >> 11;
  572|  1.00M|    t43  =  (t43a * -1138         + t52a *  1703         + 1024) >> 11;
  573|  1.00M|    t44  =  (t44a * -1703         + t51a * -1138         + 1024) >> 11;
  574|  1.00M|    t45a =  (t45  * -1703         + t50  * -1138         + 1024) >> 11;
  575|  1.00M|    t50a =  (t45  * -1138         + t50  *  1703         + 1024) >> 11;
  576|  1.00M|    t51  =  (t44a * -1138         + t51a *  1703         + 1024) >> 11;
  577|  1.00M|    t52  =  (t43a *  1703         + t52a *  1138         + 1024) >> 11;
  578|  1.00M|    t53a =  (t42  *  1703         + t53  *  1138         + 1024) >> 11;
  579|  1.00M|    t58a = ((t37  * (4096 - 4017) + t58  *   799         + 2048) >> 12) - t37;
  580|  1.00M|    t59  = ((t36a * (4096 - 4017) + t59a *   799         + 2048) >> 12) - t36a;
  581|  1.00M|    t60  = ((t35a *   799         + t60a * (4017 - 4096) + 2048) >> 12) + t60a;
  582|  1.00M|    t61a = ((t34  *   799         + t61  * (4017 - 4096) + 2048) >> 12) + t61;
  583|       |
  584|  1.00M|    t32  = CLIP(t32a + t39a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  585|  1.00M|    t33a = CLIP(t33  + t38);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  586|  1.00M|    t34  = CLIP(t34a + t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  587|  1.00M|    t35a = CLIP(t35  + t36);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  588|  1.00M|    t36a = CLIP(t35  - t36);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  589|  1.00M|    t37  = CLIP(t34a - t37a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  590|  1.00M|    t38a = CLIP(t33  - t38);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  591|  1.00M|    t39  = CLIP(t32a - t39a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  592|  1.00M|    t40  = CLIP(t47a - t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  593|  1.00M|    t41a = CLIP(t46  - t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  594|  1.00M|    t42  = CLIP(t45a - t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  595|  1.00M|    t43a = CLIP(t44  - t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  596|  1.00M|    t44a = CLIP(t44  + t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  597|  1.00M|    t45  = CLIP(t45a + t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  598|  1.00M|    t46a = CLIP(t46  + t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  599|  1.00M|    t47  = CLIP(t47a + t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  600|  1.00M|    t48  = CLIP(t48a + t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  601|  1.00M|    t49a = CLIP(t49  + t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  602|  1.00M|    t50  = CLIP(t50a + t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  603|  1.00M|    t51a = CLIP(t51  + t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  604|  1.00M|    t52a = CLIP(t51  - t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  605|  1.00M|    t53  = CLIP(t50a - t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  606|  1.00M|    t54a = CLIP(t49  - t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  607|  1.00M|    t55  = CLIP(t48a - t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  608|  1.00M|    t56  = CLIP(t63a - t56a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  609|  1.00M|    t57a = CLIP(t62  - t57);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  610|  1.00M|    t58  = CLIP(t61a - t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  611|  1.00M|    t59a = CLIP(t60  - t59);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  612|  1.00M|    t60a = CLIP(t60  + t59);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  613|  1.00M|    t61  = CLIP(t61a + t58a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  614|  1.00M|    t62a = CLIP(t62  + t57);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  615|  1.00M|    t63  = CLIP(t63a + t56a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  616|       |
  617|  1.00M|    t36  = ((t36a * (4096 - 3784) + t59a *  1567         + 2048) >> 12) - t36a;
  618|  1.00M|    t37a = ((t37  * (4096 - 3784) + t58  *  1567         + 2048) >> 12) - t37;
  619|  1.00M|    t38  = ((t38a * (4096 - 3784) + t57a *  1567         + 2048) >> 12) - t38a;
  620|  1.00M|    t39a = ((t39  * (4096 - 3784) + t56  *  1567         + 2048) >> 12) - t39;
  621|  1.00M|    t40a = ((t40  * -1567         + t55  * (4096 - 3784) + 2048) >> 12) - t55;
  622|  1.00M|    t41  = ((t41a * -1567         + t54a * (4096 - 3784) + 2048) >> 12) - t54a;
  623|  1.00M|    t42a = ((t42  * -1567         + t53  * (4096 - 3784) + 2048) >> 12) - t53;
  624|  1.00M|    t43  = ((t43a * -1567         + t52a * (4096 - 3784) + 2048) >> 12) - t52a;
  625|  1.00M|    t52  = ((t43a * (4096 - 3784) + t52a *  1567         + 2048) >> 12) - t43a;
  626|  1.00M|    t53a = ((t42  * (4096 - 3784) + t53  *  1567         + 2048) >> 12) - t42;
  627|  1.00M|    t54  = ((t41a * (4096 - 3784) + t54a *  1567         + 2048) >> 12) - t41a;
  628|  1.00M|    t55a = ((t40  * (4096 - 3784) + t55  *  1567         + 2048) >> 12) - t40;
  629|  1.00M|    t56a = ((t39  *  1567         + t56  * (3784 - 4096) + 2048) >> 12) + t56;
  630|  1.00M|    t57  = ((t38a *  1567         + t57a * (3784 - 4096) + 2048) >> 12) + t57a;
  631|  1.00M|    t58a = ((t37  *  1567         + t58  * (3784 - 4096) + 2048) >> 12) + t58;
  632|  1.00M|    t59  = ((t36a *  1567         + t59a * (3784 - 4096) + 2048) >> 12) + t59a;
  633|       |
  634|  1.00M|    t32a = CLIP(t32  + t47);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  635|  1.00M|    t33  = CLIP(t33a + t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  636|  1.00M|    t34a = CLIP(t34  + t45);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  637|  1.00M|    t35  = CLIP(t35a + t44a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  638|  1.00M|    t36a = CLIP(t36  + t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  639|  1.00M|    t37  = CLIP(t37a + t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  640|  1.00M|    t38a = CLIP(t38  + t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  641|  1.00M|    t39  = CLIP(t39a + t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  642|  1.00M|    t40  = CLIP(t39a - t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  643|  1.00M|    t41a = CLIP(t38  - t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  644|  1.00M|    t42  = CLIP(t37a - t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  645|  1.00M|    t43a = CLIP(t36  - t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  646|  1.00M|    t44  = CLIP(t35a - t44a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  647|  1.00M|    t45a = CLIP(t34  - t45);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  648|  1.00M|    t46  = CLIP(t33a - t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  649|  1.00M|    t47a = CLIP(t32  - t47);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  650|  1.00M|    t48a = CLIP(t63  - t48);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  651|  1.00M|    t49  = CLIP(t62a - t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  652|  1.00M|    t50a = CLIP(t61  - t50);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  653|  1.00M|    t51  = CLIP(t60a - t51a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  654|  1.00M|    t52a = CLIP(t59  - t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  655|  1.00M|    t53  = CLIP(t58a - t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  656|  1.00M|    t54a = CLIP(t57  - t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  657|  1.00M|    t55  = CLIP(t56a - t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  658|  1.00M|    t56  = CLIP(t56a + t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  659|  1.00M|    t57a = CLIP(t57  + t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  660|  1.00M|    t58  = CLIP(t58a + t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  661|  1.00M|    t59a = CLIP(t59  + t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  662|  1.00M|    t60  = CLIP(t60a + t51a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  663|  1.00M|    t61a = CLIP(t61  + t50);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  664|  1.00M|    t62  = CLIP(t62a + t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  665|  1.00M|    t63a = CLIP(t63  + t48);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  666|       |
  667|  1.00M|    t40a = ((t55  - t40 ) * 181 + 128) >> 8;
  668|  1.00M|    t41  = ((t54a - t41a) * 181 + 128) >> 8;
  669|  1.00M|    t42a = ((t53  - t42 ) * 181 + 128) >> 8;
  670|  1.00M|    t43  = ((t52a - t43a) * 181 + 128) >> 8;
  671|  1.00M|    t44a = ((t51  - t44 ) * 181 + 128) >> 8;
  672|  1.00M|    t45  = ((t50a - t45a) * 181 + 128) >> 8;
  673|  1.00M|    t46a = ((t49  - t46 ) * 181 + 128) >> 8;
  674|  1.00M|    t47  = ((t48a - t47a) * 181 + 128) >> 8;
  675|  1.00M|    t48  = ((t47a + t48a) * 181 + 128) >> 8;
  676|  1.00M|    t49a = ((t46  + t49 ) * 181 + 128) >> 8;
  677|  1.00M|    t50  = ((t45a + t50a) * 181 + 128) >> 8;
  678|  1.00M|    t51a = ((t44  + t51 ) * 181 + 128) >> 8;
  679|  1.00M|    t52  = ((t43a + t52a) * 181 + 128) >> 8;
  680|  1.00M|    t53a = ((t42  + t53 ) * 181 + 128) >> 8;
  681|  1.00M|    t54  = ((t41a + t54a) * 181 + 128) >> 8;
  682|  1.00M|    t55a = ((t40  + t55 ) * 181 + 128) >> 8;
  683|       |
  684|  1.00M|    const int t0  = c[ 0 * stride];
  685|  1.00M|    const int t1  = c[ 2 * stride];
  686|  1.00M|    const int t2  = c[ 4 * stride];
  687|  1.00M|    const int t3  = c[ 6 * stride];
  688|  1.00M|    const int t4  = c[ 8 * stride];
  689|  1.00M|    const int t5  = c[10 * stride];
  690|  1.00M|    const int t6  = c[12 * stride];
  691|  1.00M|    const int t7  = c[14 * stride];
  692|  1.00M|    const int t8  = c[16 * stride];
  693|  1.00M|    const int t9  = c[18 * stride];
  694|  1.00M|    const int t10 = c[20 * stride];
  695|  1.00M|    const int t11 = c[22 * stride];
  696|  1.00M|    const int t12 = c[24 * stride];
  697|  1.00M|    const int t13 = c[26 * stride];
  698|  1.00M|    const int t14 = c[28 * stride];
  699|  1.00M|    const int t15 = c[30 * stride];
  700|  1.00M|    const int t16 = c[32 * stride];
  701|  1.00M|    const int t17 = c[34 * stride];
  702|  1.00M|    const int t18 = c[36 * stride];
  703|  1.00M|    const int t19 = c[38 * stride];
  704|  1.00M|    const int t20 = c[40 * stride];
  705|  1.00M|    const int t21 = c[42 * stride];
  706|  1.00M|    const int t22 = c[44 * stride];
  707|  1.00M|    const int t23 = c[46 * stride];
  708|  1.00M|    const int t24 = c[48 * stride];
  709|  1.00M|    const int t25 = c[50 * stride];
  710|  1.00M|    const int t26 = c[52 * stride];
  711|  1.00M|    const int t27 = c[54 * stride];
  712|  1.00M|    const int t28 = c[56 * stride];
  713|  1.00M|    const int t29 = c[58 * stride];
  714|  1.00M|    const int t30 = c[60 * stride];
  715|  1.00M|    const int t31 = c[62 * stride];
  716|       |
  717|  1.00M|    c[ 0 * stride] = CLIP(t0  + t63a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  718|  1.00M|    c[ 1 * stride] = CLIP(t1  + t62);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  719|  1.00M|    c[ 2 * stride] = CLIP(t2  + t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  720|  1.00M|    c[ 3 * stride] = CLIP(t3  + t60);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  721|  1.00M|    c[ 4 * stride] = CLIP(t4  + t59a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  722|  1.00M|    c[ 5 * stride] = CLIP(t5  + t58);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  723|  1.00M|    c[ 6 * stride] = CLIP(t6  + t57a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  724|  1.00M|    c[ 7 * stride] = CLIP(t7  + t56);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  725|  1.00M|    c[ 8 * stride] = CLIP(t8  + t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  726|  1.00M|    c[ 9 * stride] = CLIP(t9  + t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  727|  1.00M|    c[10 * stride] = CLIP(t10 + t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  728|  1.00M|    c[11 * stride] = CLIP(t11 + t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  729|  1.00M|    c[12 * stride] = CLIP(t12 + t51a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  730|  1.00M|    c[13 * stride] = CLIP(t13 + t50);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  731|  1.00M|    c[14 * stride] = CLIP(t14 + t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  732|  1.00M|    c[15 * stride] = CLIP(t15 + t48);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  733|  1.00M|    c[16 * stride] = CLIP(t16 + t47);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  734|  1.00M|    c[17 * stride] = CLIP(t17 + t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  735|  1.00M|    c[18 * stride] = CLIP(t18 + t45);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  736|  1.00M|    c[19 * stride] = CLIP(t19 + t44a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  737|  1.00M|    c[20 * stride] = CLIP(t20 + t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  738|  1.00M|    c[21 * stride] = CLIP(t21 + t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  739|  1.00M|    c[22 * stride] = CLIP(t22 + t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  740|  1.00M|    c[23 * stride] = CLIP(t23 + t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  741|  1.00M|    c[24 * stride] = CLIP(t24 + t39);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  742|  1.00M|    c[25 * stride] = CLIP(t25 + t38a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  743|  1.00M|    c[26 * stride] = CLIP(t26 + t37);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  744|  1.00M|    c[27 * stride] = CLIP(t27 + t36a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  745|  1.00M|    c[28 * stride] = CLIP(t28 + t35);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  746|  1.00M|    c[29 * stride] = CLIP(t29 + t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  747|  1.00M|    c[30 * stride] = CLIP(t30 + t33);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  748|  1.00M|    c[31 * stride] = CLIP(t31 + t32a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  749|  1.00M|    c[32 * stride] = CLIP(t31 - t32a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  750|  1.00M|    c[33 * stride] = CLIP(t30 - t33);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  751|  1.00M|    c[34 * stride] = CLIP(t29 - t34a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  752|  1.00M|    c[35 * stride] = CLIP(t28 - t35);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  753|  1.00M|    c[36 * stride] = CLIP(t27 - t36a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  754|  1.00M|    c[37 * stride] = CLIP(t26 - t37);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  755|  1.00M|    c[38 * stride] = CLIP(t25 - t38a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  756|  1.00M|    c[39 * stride] = CLIP(t24 - t39);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  757|  1.00M|    c[40 * stride] = CLIP(t23 - t40a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  758|  1.00M|    c[41 * stride] = CLIP(t22 - t41);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  759|  1.00M|    c[42 * stride] = CLIP(t21 - t42a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  760|  1.00M|    c[43 * stride] = CLIP(t20 - t43);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  761|  1.00M|    c[44 * stride] = CLIP(t19 - t44a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  762|  1.00M|    c[45 * stride] = CLIP(t18 - t45);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  763|  1.00M|    c[46 * stride] = CLIP(t17 - t46a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  764|  1.00M|    c[47 * stride] = CLIP(t16 - t47);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  765|  1.00M|    c[48 * stride] = CLIP(t15 - t48);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  766|  1.00M|    c[49 * stride] = CLIP(t14 - t49a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  767|  1.00M|    c[50 * stride] = CLIP(t13 - t50);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  768|  1.00M|    c[51 * stride] = CLIP(t12 - t51a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  769|  1.00M|    c[52 * stride] = CLIP(t11 - t52);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  770|  1.00M|    c[53 * stride] = CLIP(t10 - t53a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  771|  1.00M|    c[54 * stride] = CLIP(t9  - t54);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  772|  1.00M|    c[55 * stride] = CLIP(t8  - t55a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  773|  1.00M|    c[56 * stride] = CLIP(t7  - t56);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  774|  1.00M|    c[57 * stride] = CLIP(t6  - t57a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  775|  1.00M|    c[58 * stride] = CLIP(t5  - t58);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  776|  1.00M|    c[59 * stride] = CLIP(t4  - t59a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  777|  1.00M|    c[60 * stride] = CLIP(t3  - t60);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  778|  1.00M|    c[61 * stride] = CLIP(t2  - t61a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  779|  1.00M|    c[62 * stride] = CLIP(t1  - t62);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  780|  1.00M|    c[63 * stride] = CLIP(t0  - t63a);
  ------------------
  |  |   37|  1.00M|#define CLIP(a) iclip(a, min, max)
  ------------------
  781|  1.00M|}

dav1d_itx_dsp_init_8bpc:
  220|  3.49k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  3.49k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  3.49k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  3.49k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  3.49k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  3.49k|
  264|  3.49k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  3.49k|  ARCH_AARCH64 || \
  266|  3.49k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  3.49k|))
  268|  3.49k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  3.49k|#endif
  270|  3.49k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  3.49k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  3.49k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  3.49k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  3.49k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  3.49k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  3.49k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  3.49k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  3.49k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  3.49k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  3.49k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  3.49k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  3.49k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  3.49k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  3.49k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  3.49k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  3.49k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  3.49k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  3.49k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  3.49k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  3.49k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  3.49k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  3.49k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  3.49k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  3.49k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  3.49k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  3.49k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  3.49k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  3.49k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  3.49k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  3.49k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  3.49k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  3.49k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  3.49k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  3.49k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  3.49k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  3.49k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  3.49k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  3.49k|    int all_simd = 0;
  291|  3.49k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  3.49k|#if ARCH_X86
  305|  3.49k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  3.49k|#endif
  307|  3.49k|#endif
  308|       |
  309|  3.49k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 0, False: 3.49k]
  ------------------
  310|      0|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  3.49k|}
itx_tmpl.c:inv_txfm_add_c:
   47|  60.2k|{
   48|  60.2k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
   49|  60.2k|    const int w = 4 * t_dim->w, h = 4 * t_dim->h;
   50|  60.2k|    const int has_dconly = txtp == DCT_DCT;
   51|  60.2k|    assert(w >= 4 && w <= 64);
  ------------------
  |  Branch (51:5): [True: 60.2k, False: 18.4E]
  |  Branch (51:5): [True: 60.2k, False: 18.4E]
  ------------------
   52|  60.2k|    assert(h >= 4 && h <= 64);
  ------------------
  |  Branch (52:5): [True: 60.2k, False: 18.4E]
  |  Branch (52:5): [True: 60.2k, False: 0]
  ------------------
   53|  60.2k|    assert(eob >= 0);
  ------------------
  |  Branch (53:5): [True: 60.2k, False: 0]
  ------------------
   54|       |
   55|  60.2k|    const int is_rect2 = w * 2 == h || h * 2 == w;
  ------------------
  |  Branch (55:26): [True: 7.43k, False: 52.7k]
  |  Branch (55:40): [True: 9.33k, False: 43.4k]
  ------------------
   56|  60.2k|    const int rnd = (1 << shift) >> 1;
   57|       |
   58|  60.2k|    if (eob < has_dconly) {
  ------------------
  |  Branch (58:9): [True: 28.0k, False: 32.1k]
  ------------------
   59|  28.0k|        int dc = coeff[0];
   60|  28.0k|        coeff[0] = 0;
   61|  28.0k|        if (is_rect2)
  ------------------
  |  Branch (61:13): [True: 6.67k, False: 21.3k]
  ------------------
   62|  6.67k|            dc = (dc * 181 + 128) >> 8;
   63|  28.0k|        dc = (dc * 181 + 128) >> 8;
   64|  28.0k|        dc = (dc + rnd) >> shift;
   65|  28.0k|        dc = (dc * 181 + 128 + 2048) >> 12;
   66|  1.29M|        for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  1.26M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (66:25): [True: 1.26M, False: 28.0k]
  ------------------
   67|  67.5M|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (67:29): [True: 66.3M, False: 1.26M]
  ------------------
   68|  66.3M|                dst[x] = iclip_pixel(dst[x] + dc);
  ------------------
  |  |   49|  66.3M|#define iclip_pixel iclip_u8
  ------------------
   69|  28.0k|        return;
   70|  28.0k|    }
   71|       |
   72|  32.1k|    const uint8_t *const txtps = dav1d_tx1d_types[txtp];
   73|  32.1k|    const itx_1d_fn first_1d_fn = dav1d_tx1d_fns[t_dim->lw][txtps[0]];
   74|  32.1k|    const itx_1d_fn second_1d_fn = dav1d_tx1d_fns[t_dim->lh][txtps[1]];
   75|  32.1k|    const int sh = imin(h, 32), sw = imin(w, 32);
   76|  32.1k|#if BITDEPTH == 8
   77|  32.1k|    const int row_clip_min = INT16_MIN;
   78|  32.1k|    const int col_clip_min = INT16_MIN;
   79|       |#else
   80|       |    const int row_clip_min = (int) ((unsigned) ~bitdepth_max << 7);
   81|       |    const int col_clip_min = (int) ((unsigned) ~bitdepth_max << 5);
   82|       |#endif
   83|  32.1k|    const int row_clip_max = ~row_clip_min;
   84|  32.1k|    const int col_clip_max = ~col_clip_min;
   85|       |
   86|  32.1k|    int32_t tmp[64 * 64], *c = tmp;
   87|  32.1k|    int last_nonzero_col; // in first 1d itx
   88|  32.1k|    if (txtps[1] == IDENTITY && txtps[0] != IDENTITY) {
  ------------------
  |  Branch (88:9): [True: 0, False: 32.1k]
  |  Branch (88:33): [True: 0, False: 0]
  ------------------
   89|      0|        last_nonzero_col = imin(sh - 1, eob);
   90|  32.1k|    } else if (txtps[0] == IDENTITY && txtps[1] != IDENTITY) {
  ------------------
  |  Branch (90:16): [True: 0, False: 32.1k]
  |  Branch (90:40): [True: 0, False: 0]
  ------------------
   91|      0|        last_nonzero_col = eob >> (t_dim->lw + 2);
   92|  32.1k|    } else {
   93|  32.1k|        last_nonzero_col = dav1d_last_nonzero_col_from_eob[tx][eob];
   94|  32.1k|    }
   95|  32.1k|    assert(last_nonzero_col < sh);
  ------------------
  |  Branch (95:5): [True: 32.1k, False: 0]
  ------------------
   96|   442k|    for (int y = 0; y <= last_nonzero_col; y++, c += w) {
  ------------------
  |  Branch (96:21): [True: 409k, False: 32.1k]
  ------------------
   97|   409k|        if (is_rect2)
  ------------------
  |  Branch (97:13): [True: 112k, False: 297k]
  ------------------
   98|  3.19M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (98:29): [True: 3.08M, False: 112k]
  ------------------
   99|  3.08M|                c[x] = (coeff[y + x * sh] * 181 + 128) >> 8;
  100|   297k|        else
  101|  9.64M|            for (int x = 0; x < sw; x++)
  ------------------
  |  Branch (101:29): [True: 9.34M, False: 297k]
  ------------------
  102|  9.34M|                c[x] = coeff[y + x * sh];
  103|   409k|        first_1d_fn(c, 1, row_clip_min, row_clip_max);
  104|   409k|    }
  105|  32.1k|    if (last_nonzero_col + 1 < sh)
  ------------------
  |  Branch (105:9): [True: 26.0k, False: 6.13k]
  ------------------
  106|  26.0k|        memset(c, 0, sizeof(*c) * (sh - last_nonzero_col - 1) * w);
  107|       |
  108|  32.1k|    memset(coeff, 0, sizeof(*coeff) * sw * sh);
  109|  41.5M|    for (int i = 0; i < w * sh; i++)
  ------------------
  |  Branch (109:21): [True: 41.5M, False: 32.1k]
  ------------------
  110|  41.5M|        tmp[i] = iclip((tmp[i] + rnd) >> shift, col_clip_min, col_clip_max);
  111|       |
  112|  1.42M|    for (int x = 0; x < w; x++)
  ------------------
  |  Branch (112:21): [True: 1.39M, False: 32.1k]
  ------------------
  113|  1.39M|        second_1d_fn(&tmp[x], w, col_clip_min, col_clip_max);
  114|       |
  115|  32.1k|    c = tmp;
  116|  1.45M|    for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
  ------------------
  |  |   53|  1.42M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (116:21): [True: 1.42M, False: 32.1k]
  ------------------
  117|  68.3M|        for (int x = 0; x < w; x++)
  ------------------
  |  Branch (117:25): [True: 66.9M, False: 1.42M]
  ------------------
  118|  66.9M|            dst[x] = iclip_pixel(dst[x] + ((*c++ + 8) >> 4));
  ------------------
  |  |   49|  66.9M|#define iclip_pixel iclip_u8
  ------------------
  119|  32.1k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x32_c:
  127|  4.48k|                                               HIGHBD_DECL_SUFFIX) \
  128|  4.48k|{ \
  129|  4.48k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  4.48k|                   HIGHBD_TAIL_SUFFIX); \
  131|  4.48k|}
itx_tmpl.c:inv_txfm_add_dct_dct_16x64_c:
  127|  1.15k|                                               HIGHBD_DECL_SUFFIX) \
  128|  1.15k|{ \
  129|  1.15k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  1.15k|                   HIGHBD_TAIL_SUFFIX); \
  131|  1.15k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x16_c:
  127|  6.67k|                                               HIGHBD_DECL_SUFFIX) \
  128|  6.67k|{ \
  129|  6.67k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  6.67k|                   HIGHBD_TAIL_SUFFIX); \
  131|  6.67k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x32_c:
  127|  17.1k|                                               HIGHBD_DECL_SUFFIX) \
  128|  17.1k|{ \
  129|  17.1k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  17.1k|                   HIGHBD_TAIL_SUFFIX); \
  131|  17.1k|}
itx_tmpl.c:inv_txfm_add_dct_dct_32x64_c:
  127|  2.95k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.95k|{ \
  129|  2.95k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.95k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.95k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x16_c:
  127|  1.20k|                                               HIGHBD_DECL_SUFFIX) \
  128|  1.20k|{ \
  129|  1.20k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  1.20k|                   HIGHBD_TAIL_SUFFIX); \
  131|  1.20k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x32_c:
  127|  2.65k|                                               HIGHBD_DECL_SUFFIX) \
  128|  2.65k|{ \
  129|  2.65k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  2.65k|                   HIGHBD_TAIL_SUFFIX); \
  131|  2.65k|}
itx_tmpl.c:inv_txfm_add_dct_dct_64x64_c:
  127|  23.9k|                                               HIGHBD_DECL_SUFFIX) \
  128|  23.9k|{ \
  129|  23.9k|    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
  130|  23.9k|                   HIGHBD_TAIL_SUFFIX); \
  131|  23.9k|}
dav1d_itx_dsp_init_16bpc:
  220|  5.72k|COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
  221|  5.72k|#define assign_itx_all_fn64(w, h, pfx) \
  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  224|       |
  225|  5.72k|#define assign_itx_all_fn32(w, h, pfx) \
  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  229|       |
  230|  5.72k|#define assign_itx_all_fn16(w, h, pfx) \
  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  252|       |
  253|  5.72k|#define assign_itx_all_fn84(w, h, pfx) \
  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  263|  5.72k|
  264|  5.72k|#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  265|  5.72k|  ARCH_AARCH64 || \
  266|  5.72k|  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
  267|  5.72k|))
  268|  5.72k|    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
  269|  5.72k|#endif
  270|  5.72k|    assign_itx_all_fn84( 4,  4, );
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  271|  5.72k|    assign_itx_all_fn84( 4,  8, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  272|  5.72k|    assign_itx_all_fn84( 4, 16, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  273|  5.72k|    assign_itx_all_fn84( 8,  4, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  274|  5.72k|    assign_itx_all_fn84( 8,  8, );
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  275|  5.72k|    assign_itx_all_fn84( 8, 16, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  276|  5.72k|    assign_itx_all_fn32( 8, 32, R);
  ------------------
  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  277|  5.72k|    assign_itx_all_fn84(16,  4, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  278|  5.72k|    assign_itx_all_fn84(16,  8, R);
  ------------------
  |  |  254|  5.72k|    assign_itx_all_fn16(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  |  |  ------------------
  |  |  255|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
  |  |  256|  5.72k|        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
  |  |  257|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
  |  |  258|  5.72k|        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
  |  |  259|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
  |  |  260|  5.72k|        inv_txfm_add_adst_identity_##w##x##h##_c; \
  |  |  261|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
  |  |  262|  5.72k|        inv_txfm_add_identity_adst_##w##x##h##_c; \
  ------------------
  279|  5.72k|    assign_itx_all_fn16(16, 16, );
  ------------------
  |  |  231|  5.72k|    assign_itx_all_fn32(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  |  |  ------------------
  |  |  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  |  |  ------------------
  |  |  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  |  |  ------------------
  |  |  232|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
  |  |  233|  5.72k|        inv_txfm_add_adst_dct_##w##x##h##_c; \
  |  |  234|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
  |  |  235|  5.72k|        inv_txfm_add_dct_adst_##w##x##h##_c; \
  |  |  236|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
  |  |  237|  5.72k|        inv_txfm_add_adst_adst_##w##x##h##_c; \
  |  |  238|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
  |  |  239|  5.72k|        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
  |  |  240|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
  |  |  241|  5.72k|        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
  |  |  242|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
  |  |  243|  5.72k|        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
  |  |  244|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
  |  |  245|  5.72k|        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
  |  |  246|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
  |  |  247|  5.72k|        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
  |  |  248|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
  |  |  249|  5.72k|        inv_txfm_add_dct_identity_##w##x##h##_c; \
  |  |  250|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
  |  |  251|  5.72k|        inv_txfm_add_identity_dct_##w##x##h##_c
  ------------------
  280|  5.72k|    assign_itx_all_fn32(16, 32, R);
  ------------------
  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  281|  5.72k|    assign_itx_all_fn64(16, 64, R);
  ------------------
  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  282|  5.72k|    assign_itx_all_fn32(32,  8, R);
  ------------------
  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  283|  5.72k|    assign_itx_all_fn32(32, 16, R);
  ------------------
  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  284|  5.72k|    assign_itx_all_fn32(32, 32, );
  ------------------
  |  |  226|  5.72k|    assign_itx_all_fn64(w, h, pfx); \
  |  |  ------------------
  |  |  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  |  |  ------------------
  |  |  227|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
  |  |  228|  5.72k|        inv_txfm_add_identity_identity_##w##x##h##_c
  ------------------
  285|  5.72k|    assign_itx_all_fn64(32, 64, R);
  ------------------
  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  286|  5.72k|    assign_itx_all_fn64(64, 16, R);
  ------------------
  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  287|  5.72k|    assign_itx_all_fn64(64, 32, R);
  ------------------
  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  288|  5.72k|    assign_itx_all_fn64(64, 64, );
  ------------------
  |  |  222|  5.72k|    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
  |  |  223|  5.72k|        inv_txfm_add_dct_dct_##w##x##h##_c
  ------------------
  289|       |
  290|  5.72k|    int all_simd = 0;
  291|  5.72k|#if HAVE_ASM
  292|       |#if ARCH_AARCH64 || ARCH_ARM
  293|       |    itx_dsp_init_arm(c, bpc, &all_simd);
  294|       |#endif
  295|       |#if ARCH_LOONGARCH64
  296|       |    itx_dsp_init_loongarch(c, bpc);
  297|       |#endif
  298|       |#if ARCH_PPC64LE
  299|       |    itx_dsp_init_ppc(c, bpc);
  300|       |#endif
  301|       |#if ARCH_RISCV
  302|       |    itx_dsp_init_riscv(c, bpc);
  303|       |#endif
  304|  5.72k|#if ARCH_X86
  305|  5.72k|    itx_dsp_init_x86(c, bpc, &all_simd);
  306|  5.72k|#endif
  307|  5.72k|#endif
  308|       |
  309|  5.72k|    if (!all_simd)
  ------------------
  |  Branch (309:9): [True: 3.39k, False: 2.32k]
  ------------------
  310|  3.39k|        dav1d_init_last_nonzero_col_from_eob_tables();
  311|  5.72k|}

dav1d_copy_lpf_8bpc:
  106|   212k|{
  107|   212k|    const int have_tt = f->c->n_tc > 1;
  108|   212k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|   212k|    const int offset = 8 * !!sby;
  110|   212k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|   212k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|   212k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|   212k|    pixel *const dst[3] = {
  114|   212k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  ------------------
  |  |   53|   212k|#define PXSTRIDE(x) (x)
  ------------------
  115|   212k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  ------------------
  |  |   53|   212k|#define PXSTRIDE(x) (x)
  ------------------
  116|   212k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  ------------------
  |  |   53|   212k|#define PXSTRIDE(x) (x)
  ------------------
  117|   212k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|   212k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|   212k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 90.3k, False: 122k]
  |  Branch (122:29): [True: 120k, False: 1.67k]
  ------------------
  123|   210k|        const int h = f->cur.p.h;
  124|   210k|        const int w = f->bw << 2;
  125|   210k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|   210k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|   210k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 127k, False: 83.6k]
  |  Branch (127:46): [True: 80.5k, False: 3.00k]
  ------------------
  128|   207k|            backup_lpf(f, dst[0], lr_stride[0],
  129|   207k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|   207k|#define PXSTRIDE(x) (x)
  ------------------
  130|   207k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|   210k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 210k, False: 320]
  |  Branch (131:24): [True: 4.73k, False: 205k]
  ------------------
  132|  4.73k|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  ------------------
  |  |   53|  4.73k|#define PXSTRIDE(x) (x)
  ------------------
  133|  4.73k|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|  4.73k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  ------------------
  |  |   53|  4.73k|#define PXSTRIDE(x) (x)
  ------------------
  135|  4.73k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|  4.73k|        }
  137|   210k|    }
  138|   212k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 90.3k, False: 122k]
  |  Branch (138:30): [True: 3.25k, False: 118k]
  ------------------
  139|  93.3k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 35.2k, False: 58.0k]
  ------------------
  140|  35.2k|    {
  141|  35.2k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|  35.2k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|  35.2k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|  35.2k|        const int w = f->bw << (2 - ss_hor);
  145|  35.2k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|  35.2k|        const int offset_uv = offset >> ss_ver;
  147|  35.2k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|  35.2k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  ------------------
  |  |   53|  35.2k|#define PXSTRIDE(x) (x)
  ------------------
  149|  35.2k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 32.0k, False: 3.25k]
  |  Branch (149:33): [True: 2.01k, False: 1.24k]
  ------------------
  150|  34.0k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 3.98k, False: 30.0k]
  |  Branch (150:50): [True: 27.0k, False: 3.03k]
  ------------------
  151|  30.9k|                backup_lpf(f, dst[1], lr_stride[1],
  152|  30.9k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  30.9k|#define PXSTRIDE(x) (x)
  ------------------
  153|  30.9k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|  30.9k|                           row_h, w, h, ss_hor, 1);
  155|  34.0k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 34.0k, False: 10]
  |  Branch (155:28): [True: 5.06k, False: 28.9k]
  ------------------
  156|  5.06k|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|  5.06k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  5.06k|#define PXSTRIDE(x) (x)
  ------------------
  158|  5.06k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|  5.06k|                           row_h, w, h, ss_hor, 0);
  160|  34.0k|        }
  161|  35.2k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 32.0k, False: 3.25k]
  |  Branch (161:33): [True: 2.69k, False: 566]
  ------------------
  162|  34.7k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 5.43k, False: 29.2k]
  |  Branch (162:50): [True: 26.6k, False: 2.62k]
  ------------------
  163|  32.0k|                backup_lpf(f, dst[2], lr_stride[1],
  164|  32.0k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  32.0k|#define PXSTRIDE(x) (x)
  ------------------
  165|  32.0k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|  32.0k|                           row_h, w, h, ss_hor, 1);
  167|  34.7k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 34.7k, False: 18.4E]
  |  Branch (167:28): [True: 5.50k, False: 29.2k]
  ------------------
  168|  5.50k|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|  5.50k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  ------------------
  |  |   53|  5.50k|#define PXSTRIDE(x) (x)
  ------------------
  170|  5.50k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|  5.50k|                           row_h, w, h, ss_hor, 0);
  172|  34.7k|        }
  173|  35.2k|    }
  174|   212k|}
dav1d_loopfilter_sbrow_cols_8bpc:
  316|   380k|{
  317|   380k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|   380k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|   380k|    const int starty4 = (sby & is_sb64) << 4;
  321|   380k|    const int sbsz = 32 >> is_sb64;
  322|   380k|    const int sbl2 = 5 - is_sb64;
  323|   380k|    const int halign = (f->bh + 31) & ~31;
  324|   380k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|   380k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|   380k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|   380k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|   380k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|   380k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|   380k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|   380k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|   581k|    for (int tile_col = 1;; tile_col++) {
  335|   581k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|   581k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 380k, False: 201k]
  ------------------
  337|   201k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 197k, False: 3.52k]
  ------------------
  338|   201k|        x >>= is_sb64;
  339|       |
  340|   201k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|  3.44M|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 3.23M, False: 201k]
  ------------------
  342|  3.23M|            const int sidx = mask >= 0x10000U;
  343|  3.23M|            const unsigned smask = mask >> (sidx << 4);
  344|  3.23M|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|  3.23M|                                !!(y_hmask[1][sidx] & smask);
  346|  3.23M|            y_hmask[2][sidx] &= ~smask;
  347|  3.23M|            y_hmask[1][sidx] &= ~smask;
  348|  3.23M|            y_hmask[0][sidx] &= ~smask;
  349|  3.23M|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|  3.23M|        }
  351|       |
  352|   201k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 3.45k, False: 197k]
  ------------------
  353|  3.45k|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  47.0k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 43.5k, False: 3.45k]
  ------------------
  355|  43.5k|                 y++, uv_mask <<= 1)
  356|  43.5k|            {
  357|  43.5k|                const int sidx = uv_mask >= vmax;
  358|  43.5k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  43.5k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  43.5k|                uv_hmask[1][sidx] &= ~smask;
  361|  43.5k|                uv_hmask[0][sidx] &= ~smask;
  362|  43.5k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  43.5k|            }
  364|  3.45k|        }
  365|   201k|        lpf_y  += halign;
  366|   201k|        lpf_uv += halign >> ss_ver;
  367|   201k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|   380k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 649, False: 379k]
  ------------------
  371|    649|        const BlockContext *a;
  372|    649|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  2.65k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 2.00k, False: 649]
  ------------------
  374|  2.00k|        {
  375|  2.00k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  2.00k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  62.2k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 60.2k, False: 2.00k]
  ------------------
  378|  60.2k|                const int sidx = mask >= 0x10000U;
  379|  60.2k|                const unsigned smask = mask >> (sidx << 4);
  380|  60.2k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  60.2k|                                    !!(y_vmask[1][sidx] & smask);
  382|  60.2k|                y_vmask[2][sidx] &= ~smask;
  383|  60.2k|                y_vmask[1][sidx] &= ~smask;
  384|  60.2k|                y_vmask[0][sidx] &= ~smask;
  385|  60.2k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  60.2k|            }
  387|       |
  388|  2.00k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 1.24k, False: 759]
  ------------------
  389|  1.24k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  1.24k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  20.1k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 18.8k, False: 1.24k]
  ------------------
  392|  18.8k|                    const int sidx = uv_mask >= hmax;
  393|  18.8k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  18.8k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  18.8k|                    uv_vmask[1][sidx] &= ~smask;
  396|  18.8k|                    uv_vmask[0][sidx] &= ~smask;
  397|  18.8k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  18.8k|                }
  399|  1.24k|            }
  400|  2.00k|        }
  401|    649|    }
  402|       |
  403|   380k|    pixel *ptr;
  404|   380k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|   797k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 416k, False: 380k]
  ------------------
  406|   416k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|   416k|    {
  408|   416k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|   416k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|   416k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|   416k|    }
  412|       |
  413|   380k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 351k, False: 29.5k]
  |  Branch (413:46): [True: 349k, False: 1.70k]
  ------------------
  414|   349k|        return;
  415|       |
  416|  31.2k|    ptrdiff_t uv_off;
  417|  31.2k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  92.7k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 61.5k, False: 31.2k]
  ------------------
  419|  61.5k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  61.5k|    {
  421|  61.5k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  61.5k|                             lflvl[x].filter_uv[0],
  423|  61.5k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  61.5k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  61.5k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  61.5k|    }
  427|  31.2k|}
dav1d_loopfilter_sbrow_rows_8bpc:
  432|   380k|{
  433|   380k|    int x;
  434|       |    // Don't filter outside the frame
  435|   380k|    const int have_top = sby > 0;
  436|   380k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|   380k|    const int starty4 = (sby & is_sb64) << 4;
  438|   380k|    const int sbsz = 32 >> is_sb64;
  439|   380k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|   380k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|   380k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|   380k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|   380k|    pixel *ptr;
  445|   380k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|   797k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 416k, False: 380k]
  ------------------
  447|   416k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|   416k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|   416k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|   416k|    }
  451|       |
  452|   380k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 351k, False: 29.5k]
  |  Branch (452:46): [True: 349k, False: 1.70k]
  ------------------
  453|   349k|        return;
  454|       |
  455|  31.2k|    ptrdiff_t uv_off;
  456|  31.2k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  92.7k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 61.5k, False: 31.2k]
  ------------------
  458|  61.5k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  61.5k|    {
  460|  61.5k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  61.5k|                             lflvl[x].filter_uv[1],
  462|  61.5k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  61.5k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  61.5k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  61.5k|    }
  466|  31.2k|}
lf_apply_tmpl.c:backup_lpf:
   47|  1.00M|{
   48|  1.00M|    const int cdef_backup = !lr_backup;
   49|  1.00M|    const int dst_w = f->frame_hdr->super_res.enabled ?
  ------------------
  |  Branch (49:23): [True: 84.4k, False: 916k]
  ------------------
   50|   916k|                      (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
   51|       |
   52|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   53|  1.00M|    int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
   54|  1.00M|    src += (stripe_h - 2) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  1.00M|#define PXSTRIDE(x) (x)
  ------------------
   55|       |
   56|  1.00M|    if (f->c->n_tc == 1) {
  ------------------
  |  Branch (56:9): [True: 0, False: 1.00M]
  ------------------
   57|      0|        if (row) {
  ------------------
  |  Branch (57:13): [True: 0, False: 0]
  ------------------
   58|      0|            const int top = 4 << sb128;
   59|       |            // Copy the top part of the stored loop filtered pixels from the
   60|       |            // previous sb row needed above the first stripe of this sb row.
   61|      0|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   62|      0|                       &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   63|      0|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   64|      0|                       &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   65|      0|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   66|      0|                       &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   67|      0|            pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   47|      0|#define pixel_copy memcpy
  ------------------
                          pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   68|      0|                       &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   69|      0|        }
   70|      0|        dst += 4 * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
   71|      0|    }
   72|       |
   73|  1.00M|    if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
  ------------------
  |  Branch (73:9): [True: 946k, False: 53.6k]
  |  Branch (73:22): [True: 24.1k, False: 922k]
  ------------------
   74|  55.9k|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (74:16): [True: 31.7k, False: 24.1k]
  ------------------
   75|  31.7k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   76|  31.7k|            f->dsp->mc.resize(dst, dst_stride, src, src_stride,
   77|  31.7k|                              dst_w, n_lines, src_w, f->resize_step[ss_hor],
   78|  31.7k|                              f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
   79|  31.7k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   80|  31.7k|            stripe_h = 64 >> ss_ver;
   81|  31.7k|            src += stripe_h * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  31.7k|#define PXSTRIDE(x) (x)
  ------------------
   82|  31.7k|            dst += n_lines * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  31.7k|#define PXSTRIDE(x) (x)
  ------------------
   83|  31.7k|            if (n_lines == 3) {
  ------------------
  |  Branch (83:17): [True: 2.83k, False: 28.9k]
  ------------------
   84|  2.83k|                pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   47|  2.83k|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
  ------------------
  |  |   53|  2.83k|#define PXSTRIDE(x) (x)
  ------------------
   85|  2.83k|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  2.83k|#define PXSTRIDE(x) (x)
  ------------------
   86|  2.83k|            }
   87|  31.7k|        }
   88|   976k|    } else {
   89|  1.94M|        while (row + stripe_h <= row_h) {
  ------------------
  |  Branch (89:16): [True: 971k, False: 976k]
  ------------------
   90|   971k|            const int n_lines = 4 - (row + stripe_h + 1 == h);
   91|  4.84M|            for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (91:29): [True: 3.87M, False: 971k]
  ------------------
   92|  3.87M|                pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   47|  3.87M|#define pixel_copy memcpy
  ------------------
                              pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
  ------------------
  |  |   53|  7.91k|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (92:33): [True: 7.91k, False: 3.86M]
  ------------------
   93|  3.87M|                                               src, src_w);
   94|  3.87M|                dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  3.87M|#define PXSTRIDE(x) (x)
  ------------------
   95|  3.87M|                src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|  3.87M|#define PXSTRIDE(x) (x)
  ------------------
   96|  3.87M|            }
   97|   971k|            row += stripe_h; // unmodified stripe_h for the 1st stripe
   98|   971k|            stripe_h = 64 >> ss_ver;
   99|   971k|            src += (stripe_h - 4) * PXSTRIDE(src_stride);
  ------------------
  |  |   53|   971k|#define PXSTRIDE(x) (x)
  ------------------
  100|   971k|        }
  101|   976k|    }
  102|  1.00M|}
lf_apply_tmpl.c:filter_plane_cols_y:
  184|   757k|{
  185|   757k|    const Dav1dDSPContext *const dsp = f->dsp;
  186|       |
  187|       |    // filter edges between columns (e.g. block1 | block2)
  188|  20.7M|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (188:21): [True: 19.9M, False: 757k]
  ------------------
  189|  19.9M|        if (!have_left && !x) continue;
  ------------------
  |  Branch (189:13): [True: 18.0M, False: 1.93M]
  |  Branch (189:27): [True: 693k, False: 17.3M]
  ------------------
  190|  19.2M|        uint32_t hmask[4];
  191|  19.2M|        if (!starty4) {
  ------------------
  |  Branch (191:13): [True: 14.1M, False: 5.08M]
  ------------------
  192|  14.1M|            hmask[0] = mask[x][0][0];
  193|  14.1M|            hmask[1] = mask[x][1][0];
  194|  14.1M|            hmask[2] = mask[x][2][0];
  195|  14.1M|            if (endy4 > 16) {
  ------------------
  |  Branch (195:17): [True: 8.82M, False: 5.35M]
  ------------------
  196|  8.82M|                hmask[0] |= (unsigned) mask[x][0][1] << 16;
  197|  8.82M|                hmask[1] |= (unsigned) mask[x][1][1] << 16;
  198|  8.82M|                hmask[2] |= (unsigned) mask[x][2][1] << 16;
  199|  8.82M|            }
  200|  14.1M|        } else {
  201|  5.08M|            hmask[0] = mask[x][0][1];
  202|  5.08M|            hmask[1] = mask[x][1][1];
  203|  5.08M|            hmask[2] = mask[x][2][1];
  204|  5.08M|        }
  205|  19.2M|        hmask[3] = 0;
  206|  19.2M|        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
  207|  19.2M|                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
  208|  19.2M|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  209|  19.2M|    }
  210|   757k|}
lf_apply_tmpl.c:filter_plane_cols_uv:
  251|   114k|{
  252|   114k|    const Dav1dDSPContext *const dsp = f->dsp;
  253|       |
  254|       |    // filter edges between columns (e.g. block1 | block2)
  255|  1.79M|    for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (255:21): [True: 1.67M, False: 114k]
  ------------------
  256|  1.67M|        if (!have_left && !x) continue;
  ------------------
  |  Branch (256:13): [True: 887k, False: 790k]
  |  Branch (256:27): [True: 68.6k, False: 819k]
  ------------------
  257|  1.61M|        uint32_t hmask[3];
  258|  1.61M|        if (!starty4) {
  ------------------
  |  Branch (258:13): [True: 1.49M, False: 113k]
  ------------------
  259|  1.49M|            hmask[0] = mask[x][0][0];
  260|  1.49M|            hmask[1] = mask[x][1][0];
  261|  1.49M|            if (endy4 > (16 >> ss_ver)) {
  ------------------
  |  Branch (261:17): [True: 1.29M, False: 206k]
  ------------------
  262|  1.29M|                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
  263|  1.29M|                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
  264|  1.29M|            }
  265|  1.49M|        } else {
  266|   113k|            hmask[0] = mask[x][0][1];
  267|   113k|            hmask[1] = mask[x][1][1];
  268|   113k|        }
  269|  1.61M|        hmask[2] = 0;
  270|  1.61M|        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
  271|  1.61M|                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
  272|  1.61M|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  273|  1.61M|        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
  274|  1.61M|                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
  275|  1.61M|                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
  276|  1.61M|    }
  277|   114k|}
lf_apply_tmpl.c:filter_plane_rows_y:
  220|   757k|{
  221|   757k|    const Dav1dDSPContext *const dsp = f->dsp;
  222|       |
  223|       |    //                                 block1
  224|       |    // filter edges between rows (e.g. ------)
  225|       |    //                                 block2
  226|  18.3M|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (226:27): [True: 17.6M, False: 757k]
  ------------------
  227|  17.6M|         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|  17.6M|#define PXSTRIDE(x) (x)
  ------------------
  228|  17.6M|    {
  229|  17.6M|        if (!have_top && !y) continue;
  ------------------
  |  Branch (229:13): [True: 724k, False: 16.8M]
  |  Branch (229:26): [True: 32.8k, False: 691k]
  ------------------
  230|  17.5M|        const uint32_t vmask[4] = {
  231|  17.5M|            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
  232|  17.5M|            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
  233|  17.5M|            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
  234|  17.5M|            0,
  235|  17.5M|        };
  236|  17.5M|        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
  237|  17.5M|                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
  238|  17.5M|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  239|  17.5M|    }
  240|   757k|}
lf_apply_tmpl.c:filter_plane_rows_uv:
  288|   114k|{
  289|   114k|    const Dav1dDSPContext *const dsp = f->dsp;
  290|   114k|    ptrdiff_t off_l = 0;
  291|       |
  292|       |    //                                 block1
  293|       |    // filter edges between rows (e.g. ------)
  294|       |    //                                 block2
  295|  1.93M|    for (int y = starty4; y < endy4;
  ------------------
  |  Branch (295:27): [True: 1.82M, False: 114k]
  ------------------
  296|  1.82M|         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
  ------------------
  |  |   53|  1.82M|#define PXSTRIDE(x) (x)
  ------------------
  297|  1.82M|    {
  298|  1.82M|        if (!have_top && !y) continue;
  ------------------
  |  Branch (298:13): [True: 251k, False: 1.56M]
  |  Branch (298:26): [True: 16.4k, False: 235k]
  ------------------
  299|  1.80M|        const uint32_t vmask[3] = {
  300|  1.80M|            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
  301|  1.80M|            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
  302|  1.80M|            0,
  303|  1.80M|        };
  304|  1.80M|        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
  305|  1.80M|                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
  306|  1.80M|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  307|  1.80M|        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
  308|  1.80M|                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
  309|  1.80M|                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
  310|  1.80M|    }
  311|   114k|}
dav1d_copy_lpf_16bpc:
  106|   431k|{
  107|   431k|    const int have_tt = f->c->n_tc > 1;
  108|   431k|    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  109|   431k|    const int offset = 8 * !!sby;
  110|   431k|    const ptrdiff_t *const src_stride = f->cur.stride;
  111|   431k|    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
  112|   431k|    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
  113|   431k|    pixel *const dst[3] = {
  114|   431k|        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
  115|   431k|        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
  116|   431k|        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
  117|   431k|    };
  118|       |
  119|       |    // TODO Also check block level restore type to reduce copying.
  120|   431k|    const int restore_planes = f->lf.restore_planes;
  121|       |
  122|   431k|    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (122:9): [True: 417k, False: 13.9k]
  |  Branch (122:29): [True: 13.2k, False: 775]
  ------------------
  123|   430k|        const int h = f->cur.p.h;
  124|   430k|        const int w = f->bw << 2;
  125|   430k|        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
  126|   430k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
  127|   430k|        if (restore_planes & LR_RESTORE_Y || !resize)
  ------------------
  |  Branch (127:13): [True: 53.1k, False: 377k]
  |  Branch (127:46): [True: 362k, False: 15.5k]
  ------------------
  128|   415k|            backup_lpf(f, dst[0], lr_stride[0],
  129|   415k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  130|   415k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
  131|   430k|        if (have_tt && resize) {
  ------------------
  |  Branch (131:13): [True: 430k, False: 18.4E]
  |  Branch (131:24): [True: 23.1k, False: 407k]
  ------------------
  132|  23.1k|            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
  133|  23.1k|            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
  134|  23.1k|                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
  135|  23.1k|                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
  136|  23.1k|        }
  137|   430k|    }
  138|   431k|    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
  ------------------
  |  Branch (138:10): [True: 417k, False: 13.9k]
  |  Branch (138:30): [True: 2.25k, False: 11.7k]
  ------------------
  139|   419k|        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
  ------------------
  |  Branch (139:9): [True: 133k, False: 285k]
  ------------------
  140|   133k|    {
  141|   133k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  142|   133k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  143|   133k|        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
  144|   133k|        const int w = f->bw << (2 - ss_hor);
  145|   133k|        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
  146|   133k|        const int offset_uv = offset >> ss_ver;
  147|   133k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  148|   133k|        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
  149|   133k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
  ------------------
  |  Branch (149:13): [True: 131k, False: 2.25k]
  |  Branch (149:33): [True: 1.62k, False: 635]
  ------------------
  150|   133k|            if (restore_planes & LR_RESTORE_U || !resize)
  ------------------
  |  Branch (150:17): [True: 7.97k, False: 125k]
  |  Branch (150:50): [True: 122k, False: 3.12k]
  ------------------
  151|   130k|                backup_lpf(f, dst[1], lr_stride[1],
  152|   130k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  153|   130k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  154|   130k|                           row_h, w, h, ss_hor, 1);
  155|   133k|            if (have_tt && resize)
  ------------------
  |  Branch (155:17): [True: 133k, False: 18.4E]
  |  Branch (155:28): [True: 7.70k, False: 125k]
  ------------------
  156|  7.70k|                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
  157|  7.70k|                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
  158|  7.70k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  159|  7.70k|                           row_h, w, h, ss_hor, 0);
  160|   133k|        }
  161|   133k|        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
  ------------------
  |  Branch (161:13): [True: 131k, False: 2.25k]
  |  Branch (161:33): [True: 1.50k, False: 752]
  ------------------
  162|   133k|            if (restore_planes & LR_RESTORE_V || !resize)
  ------------------
  |  Branch (162:17): [True: 8.94k, False: 124k]
  |  Branch (162:50): [True: 121k, False: 2.49k]
  ------------------
  163|   130k|                backup_lpf(f, dst[2], lr_stride[1],
  164|   130k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  165|   130k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  166|   130k|                           row_h, w, h, ss_hor, 1);
  167|   133k|            if (have_tt && resize)
  ------------------
  |  Branch (167:17): [True: 133k, False: 18.4E]
  |  Branch (167:28): [True: 7.84k, False: 125k]
  ------------------
  168|  7.84k|                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
  169|  7.84k|                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
  170|  7.84k|                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
  171|  7.84k|                           row_h, w, h, ss_hor, 0);
  172|   133k|        }
  173|   133k|    }
  174|   431k|}
dav1d_loopfilter_sbrow_cols_16bpc:
  316|   312k|{
  317|   312k|    int x, have_left;
  318|       |    // Don't filter outside the frame
  319|   312k|    const int is_sb64 = !f->seq_hdr->sb128;
  320|   312k|    const int starty4 = (sby & is_sb64) << 4;
  321|   312k|    const int sbsz = 32 >> is_sb64;
  322|   312k|    const int sbl2 = 5 - is_sb64;
  323|   312k|    const int halign = (f->bh + 31) & ~31;
  324|   312k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  325|   312k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  326|   312k|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  327|   312k|    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
  328|   312k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  329|   312k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  330|       |
  331|       |    // fix lpf strength at tile col boundaries
  332|   312k|    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
  333|   312k|    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
  334|   318k|    for (int tile_col = 1;; tile_col++) {
  335|   318k|        x = f->frame_hdr->tiling.col_start_sb[tile_col];
  336|   318k|        if ((x << sbl2) >= f->bw) break;
  ------------------
  |  Branch (336:13): [True: 312k, False: 6.38k]
  ------------------
  337|  6.38k|        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
  ------------------
  |  Branch (337:25): [True: 4.19k, False: 2.19k]
  ------------------
  338|  6.38k|        x >>= is_sb64;
  339|       |
  340|  6.38k|        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
  341|   130k|        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
  ------------------
  |  Branch (341:51): [True: 124k, False: 6.38k]
  ------------------
  342|   124k|            const int sidx = mask >= 0x10000U;
  343|   124k|            const unsigned smask = mask >> (sidx << 4);
  344|   124k|            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
  345|   124k|                                !!(y_hmask[1][sidx] & smask);
  346|   124k|            y_hmask[2][sidx] &= ~smask;
  347|   124k|            y_hmask[1][sidx] &= ~smask;
  348|   124k|            y_hmask[0][sidx] &= ~smask;
  349|   124k|            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
  350|   124k|        }
  351|       |
  352|  6.38k|        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (352:13): [True: 1.78k, False: 4.60k]
  ------------------
  353|  1.78k|            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
  354|  31.0k|            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
  ------------------
  |  Branch (354:68): [True: 29.2k, False: 1.78k]
  ------------------
  355|  29.2k|                 y++, uv_mask <<= 1)
  356|  29.2k|            {
  357|  29.2k|                const int sidx = uv_mask >= vmax;
  358|  29.2k|                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
  359|  29.2k|                const int idx = !!(uv_hmask[1][sidx] & smask);
  360|  29.2k|                uv_hmask[1][sidx] &= ~smask;
  361|  29.2k|                uv_hmask[0][sidx] &= ~smask;
  362|  29.2k|                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
  363|  29.2k|            }
  364|  1.78k|        }
  365|  6.38k|        lpf_y  += halign;
  366|  6.38k|        lpf_uv += halign >> ss_ver;
  367|  6.38k|    }
  368|       |
  369|       |    // fix lpf strength at tile row boundaries
  370|   312k|    if (start_of_tile_row) {
  ------------------
  |  Branch (370:9): [True: 565, False: 311k]
  ------------------
  371|    565|        const BlockContext *a;
  372|    565|        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
  373|  3.21k|             x < f->sb128w; x++, a++)
  ------------------
  |  Branch (373:14): [True: 2.65k, False: 565]
  ------------------
  374|  2.65k|        {
  375|  2.65k|            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
  376|  2.65k|            const unsigned w = imin(32, f->w4 - (x << 5));
  377|  84.0k|            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
  ------------------
  |  Branch (377:44): [True: 81.3k, False: 2.65k]
  ------------------
  378|  81.3k|                const int sidx = mask >= 0x10000U;
  379|  81.3k|                const unsigned smask = mask >> (sidx << 4);
  380|  81.3k|                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
  381|  81.3k|                                    !!(y_vmask[1][sidx] & smask);
  382|  81.3k|                y_vmask[2][sidx] &= ~smask;
  383|  81.3k|                y_vmask[1][sidx] &= ~smask;
  384|  81.3k|                y_vmask[0][sidx] &= ~smask;
  385|  81.3k|                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
  386|  81.3k|            }
  387|       |
  388|  2.65k|            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (388:17): [True: 2.02k, False: 626]
  ------------------
  389|  2.02k|                const unsigned cw = (w + ss_hor) >> ss_hor;
  390|  2.02k|                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
  391|  38.9k|                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
  ------------------
  |  Branch (391:51): [True: 36.8k, False: 2.02k]
  ------------------
  392|  36.8k|                    const int sidx = uv_mask >= hmax;
  393|  36.8k|                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
  394|  36.8k|                    const int idx = !!(uv_vmask[1][sidx] & smask);
  395|  36.8k|                    uv_vmask[1][sidx] &= ~smask;
  396|  36.8k|                    uv_vmask[0][sidx] &= ~smask;
  397|  36.8k|                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
  398|  36.8k|                }
  399|  2.02k|            }
  400|  2.65k|        }
  401|    565|    }
  402|       |
  403|   312k|    pixel *ptr;
  404|   312k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  405|   653k|    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (405:44): [True: 340k, False: 312k]
  ------------------
  406|   340k|         x++, have_left = 1, ptr += 128, level_ptr += 32)
  407|   340k|    {
  408|   340k|        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
  409|   340k|                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
  410|   340k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  411|   340k|    }
  412|       |
  413|   312k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (413:9): [True: 276k, False: 35.7k]
  |  Branch (413:46): [True: 275k, False: 1.52k]
  ------------------
  414|   275k|        return;
  415|       |
  416|  37.2k|    ptrdiff_t uv_off;
  417|  37.2k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  418|  90.4k|    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (418:44): [True: 53.1k, False: 37.2k]
  ------------------
  419|  53.1k|         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  420|  53.1k|    {
  421|  53.1k|        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
  422|  53.1k|                             lflvl[x].filter_uv[0],
  423|  53.1k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  424|  53.1k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  425|  53.1k|                             starty4 >> ss_ver, uv_endy4, ss_ver);
  426|  53.1k|    }
  427|  37.2k|}
dav1d_loopfilter_sbrow_rows_16bpc:
  432|   312k|{
  433|   312k|    int x;
  434|       |    // Don't filter outside the frame
  435|   312k|    const int have_top = sby > 0;
  436|   312k|    const int is_sb64 = !f->seq_hdr->sb128;
  437|   312k|    const int starty4 = (sby & is_sb64) << 4;
  438|   312k|    const int sbsz = 32 >> is_sb64;
  439|   312k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  440|   312k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  441|   312k|    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
  442|   312k|    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
  443|       |
  444|   312k|    pixel *ptr;
  445|   312k|    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
  446|   653k|    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
  ------------------
  |  Branch (446:29): [True: 340k, False: 312k]
  ------------------
  447|   340k|        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
  448|   340k|                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
  449|   340k|                            imin(32, f->w4 - x * 32), starty4, endy4);
  450|   340k|    }
  451|       |
  452|   312k|    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
  ------------------
  |  Branch (452:9): [True: 276k, False: 35.9k]
  |  Branch (452:46): [True: 275k, False: 1.51k]
  ------------------
  453|   275k|        return;
  454|       |
  455|  37.4k|    ptrdiff_t uv_off;
  456|  37.4k|    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
  457|  90.5k|    for (uv_off = 0, x = 0; x < f->sb128w;
  ------------------
  |  Branch (457:29): [True: 53.0k, False: 37.4k]
  ------------------
  458|  53.0k|         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
  459|  53.0k|    {
  460|  53.0k|        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
  461|  53.0k|                             lflvl[x].filter_uv[1],
  462|  53.0k|                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
  463|  53.0k|                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
  464|  53.0k|                             starty4 >> ss_ver, uv_endy4, ss_hor);
  465|  53.0k|    }
  466|  37.4k|}

dav1d_create_lf_mask_intra:
  271|  1.78M|{
  272|  1.78M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  273|  1.78M|    const int bw4 = imin(iw - bx, b_dim[0]);
  274|  1.78M|    const int bh4 = imin(ih - by, b_dim[1]);
  275|  1.78M|    const int bx4 = bx & 31;
  276|  1.78M|    const int by4 = by & 31;
  277|  1.78M|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (277:5): [True: 1.78M, False: 5.18k]
  |  Branch (277:5): [True: 1.78M, False: 18.4E]
  ------------------
  278|       |
  279|  1.78M|    if (bw4 && bh4) {
  ------------------
  |  Branch (279:9): [True: 1.78M, False: 3.32k]
  |  Branch (279:16): [True: 1.75M, False: 28.0k]
  ------------------
  280|  1.75M|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  281|  9.14M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (281:25): [True: 7.38M, False: 1.75M]
  ------------------
  282|  64.1M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (282:29): [True: 56.7M, False: 7.38M]
  ------------------
  283|  56.7M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  284|  56.7M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  285|  56.7M|            }
  286|  7.38M|            level_cache_ptr += b4_stride;
  287|  7.38M|        }
  288|       |
  289|  1.75M|        mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
  290|  1.75M|    }
  291|       |
  292|  1.78M|    if (!auv) return;
  ------------------
  |  Branch (292:9): [True: 539k, False: 1.24M]
  ------------------
  293|       |
  294|  1.24M|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  295|  1.24M|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  296|  1.24M|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  297|  1.24M|                          (b_dim[0] + ss_hor) >> ss_hor);
  298|  1.24M|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  299|  1.24M|                          (b_dim[1] + ss_ver) >> ss_ver);
  300|  1.24M|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (300:5): [True: 1.25M, False: 18.4E]
  |  Branch (300:5): [True: 1.25M, False: 12]
  ------------------
  301|       |
  302|  1.25M|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (302:9): [True: 1.45k, False: 1.25M]
  |  Branch (302:18): [True: 10.5k, False: 1.23M]
  ------------------
  303|       |
  304|  1.23M|    const int cbx4 = bx4 >> ss_hor;
  305|  1.23M|    const int cby4 = by4 >> ss_ver;
  306|       |
  307|  1.23M|    uint8_t (*level_cache_ptr)[4] =
  308|  1.23M|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  309|  4.61M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (309:21): [True: 3.37M, False: 1.23M]
  ------------------
  310|  20.7M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (310:25): [True: 17.3M, False: 3.37M]
  ------------------
  311|  17.3M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  312|  17.3M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  313|  17.3M|        }
  314|  3.37M|        level_cache_ptr += b4_stride;
  315|  3.37M|    }
  316|       |
  317|  1.23M|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
  318|  1.23M|                      auv, luv, ss_hor, ss_ver);
  319|  1.23M|}
dav1d_create_lf_mask_inter:
  334|  2.99M|{
  335|  2.99M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  336|  2.99M|    const int bw4 = imin(iw - bx, b_dim[0]);
  337|  2.99M|    const int bh4 = imin(ih - by, b_dim[1]);
  338|  2.99M|    const int bx4 = bx & 31;
  339|  2.99M|    const int by4 = by & 31;
  340|  2.99M|    assert(bw4 >= 0 && bh4 >= 0);
  ------------------
  |  Branch (340:5): [True: 2.98M, False: 5.04k]
  |  Branch (340:5): [True: 2.98M, False: 18.4E]
  ------------------
  341|       |
  342|  2.98M|    if (bw4 && bh4) {
  ------------------
  |  Branch (342:9): [True: 2.98M, False: 5.56k]
  |  Branch (342:16): [True: 2.97M, False: 5.06k]
  ------------------
  343|  2.97M|        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
  344|  32.1M|        for (int y = 0; y < bh4; y++) {
  ------------------
  |  Branch (344:25): [True: 29.1M, False: 2.97M]
  ------------------
  345|   475M|            for (int x = 0; x < bw4; x++) {
  ------------------
  |  Branch (345:29): [True: 446M, False: 29.1M]
  ------------------
  346|   446M|                level_cache_ptr[x][0] = filter_level[0][0][0];
  347|   446M|                level_cache_ptr[x][1] = filter_level[1][0][0];
  348|   446M|            }
  349|  29.1M|            level_cache_ptr += b4_stride;
  350|  29.1M|        }
  351|       |
  352|  2.97M|        mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
  353|  2.97M|                         max_ytx, tx_masks, ay, ly);
  354|  2.97M|    }
  355|       |
  356|  2.98M|    if (!auv) return;
  ------------------
  |  Branch (356:9): [True: 1.72M, False: 1.26M]
  ------------------
  357|       |
  358|  1.26M|    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
  359|  1.26M|    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
  360|  1.26M|    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
  361|  1.26M|                          (b_dim[0] + ss_hor) >> ss_hor);
  362|  1.26M|    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
  363|  1.26M|                          (b_dim[1] + ss_ver) >> ss_ver);
  364|  1.26M|    assert(cbw4 >= 0 && cbh4 >= 0);
  ------------------
  |  Branch (364:5): [True: 1.26M, False: 18.4E]
  |  Branch (364:5): [True: 1.26M, False: 229]
  ------------------
  365|       |
  366|  1.26M|    if (!cbw4 || !cbh4) return;
  ------------------
  |  Branch (366:9): [True: 939, False: 1.26M]
  |  Branch (366:18): [True: 1.34k, False: 1.26M]
  ------------------
  367|       |
  368|  1.26M|    const int cbx4 = bx4 >> ss_hor;
  369|  1.26M|    const int cby4 = by4 >> ss_ver;
  370|       |
  371|  1.26M|    uint8_t (*level_cache_ptr)[4] =
  372|  1.26M|        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
  373|  5.44M|    for (int y = 0; y < cbh4; y++) {
  ------------------
  |  Branch (373:21): [True: 4.18M, False: 1.26M]
  ------------------
  374|  32.7M|        for (int x = 0; x < cbw4; x++) {
  ------------------
  |  Branch (374:25): [True: 28.5M, False: 4.18M]
  ------------------
  375|  28.5M|            level_cache_ptr[x][2] = filter_level[2][0][0];
  376|  28.5M|            level_cache_ptr[x][3] = filter_level[3][0][0];
  377|  28.5M|        }
  378|  4.18M|        level_cache_ptr += b4_stride;
  379|  4.18M|    }
  380|       |
  381|  1.26M|    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
  382|  1.26M|                      auv, luv, ss_hor, ss_ver);
  383|  1.26M|}
dav1d_calc_eih:
  385|  31.6k|void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
  386|       |    // set E/I/H values from loopfilter level
  387|  31.6k|    const int sharp = filter_sharpness;
  388|  2.05M|    for (int level = 0; level < 64; level++) {
  ------------------
  |  Branch (388:25): [True: 2.02M, False: 31.6k]
  ------------------
  389|  2.02M|        int limit = level;
  390|       |
  391|  2.02M|        if (sharp > 0) {
  ------------------
  |  Branch (391:13): [True: 1.03M, False: 990k]
  ------------------
  392|  1.03M|            limit >>= (sharp + 3) >> 2;
  393|  1.03M|            limit = imin(limit, 9 - sharp);
  394|  1.03M|        }
  395|  2.02M|        limit = imax(limit, 1);
  396|       |
  397|  2.02M|        lim_lut->i[level] = limit;
  398|  2.02M|        lim_lut->e[level] = 2 * (level + 2) + limit;
  399|  2.02M|    }
  400|  31.6k|    lim_lut->sharp[0] = (sharp + 3) >> 2;
  401|  31.6k|    lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
  ------------------
  |  Branch (401:25): [True: 16.1k, False: 15.5k]
  ------------------
  402|  31.6k|}
dav1d_calc_lf_values:
  441|   295k|{
  442|   295k|    const int n_seg = hdr->segmentation.enabled ? 8 : 1;
  ------------------
  |  Branch (442:23): [True: 22.7k, False: 272k]
  ------------------
  443|       |
  444|   295k|    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
  ------------------
  |  Branch (444:9): [True: 209k, False: 85.6k]
  |  Branch (444:40): [True: 198k, False: 11.0k]
  ------------------
  445|   198k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * n_seg);
  446|   198k|        return;
  447|   198k|    }
  448|       |
  449|  96.7k|    const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
  450|  96.7k|        hdr->loopfilter.mode_ref_delta_enabled ?
  ------------------
  |  Branch (450:9): [True: 60.2k, False: 36.4k]
  ------------------
  451|  96.7k|        &hdr->loopfilter.mode_ref_deltas : NULL;
  452|   327k|    for (int s = 0; s < n_seg; s++) {
  ------------------
  |  Branch (452:21): [True: 230k, False: 96.7k]
  ------------------
  453|   230k|        const Dav1dSegmentationData *const segd =
  454|   230k|            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
  ------------------
  |  Branch (454:13): [True: 153k, False: 77.6k]
  ------------------
  455|       |
  456|   230k|        calc_lf_value(lflvl_values[s][0], hdr->loopfilter.level_y[0],
  457|   230k|                      lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
  ------------------
  |  Branch (457:36): [True: 153k, False: 77.6k]
  ------------------
  458|   230k|        calc_lf_value(lflvl_values[s][1], hdr->loopfilter.level_y[1],
  459|   230k|                      lf_delta[hdr->delta.lf.multi ? 1 : 0],
  ------------------
  |  Branch (459:32): [True: 88.4k, False: 142k]
  ------------------
  460|   230k|                      segd ? segd->delta_lf_y_h : 0, mr_deltas);
  ------------------
  |  Branch (460:23): [True: 153k, False: 77.6k]
  ------------------
  461|   230k|        calc_lf_value_chroma(lflvl_values[s][2], hdr->loopfilter.level_u,
  462|   230k|                             lf_delta[hdr->delta.lf.multi ? 2 : 0],
  ------------------
  |  Branch (462:39): [True: 88.4k, False: 142k]
  ------------------
  463|   230k|                             segd ? segd->delta_lf_u : 0, mr_deltas);
  ------------------
  |  Branch (463:30): [True: 153k, False: 77.6k]
  ------------------
  464|   230k|        calc_lf_value_chroma(lflvl_values[s][3], hdr->loopfilter.level_v,
  465|   230k|                             lf_delta[hdr->delta.lf.multi ? 3 : 0],
  ------------------
  |  Branch (465:39): [True: 88.4k, False: 142k]
  ------------------
  466|   230k|                             segd ? segd->delta_lf_v : 0, mr_deltas);
  ------------------
  |  Branch (466:30): [True: 153k, False: 77.6k]
  ------------------
  467|   230k|    }
  468|  96.7k|}
lf_mask.c:mask_edges_intra:
  152|  1.75M|{
  153|  1.75M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  154|  1.75M|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  155|  1.75M|    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
  156|  1.75M|    int y, x;
  157|       |
  158|       |    // left block edge
  159|  1.75M|    unsigned mask = 1U << by4;
  160|  9.18M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (160:17): [True: 7.42M, False: 1.75M]
  ------------------
  161|  7.42M|        const int sidx = mask >= 0x10000;
  162|  7.42M|        const unsigned smask = mask >> (sidx << 4);
  163|  7.42M|        masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask;
  164|  7.42M|    }
  165|       |
  166|       |    // top block edge
  167|  9.52M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (167:35): [True: 7.76M, False: 1.75M]
  ------------------
  168|  7.76M|        const int sidx = mask >= 0x10000;
  169|  7.76M|        const unsigned smask = mask >> (sidx << 4);
  170|  7.76M|        masks[1][by4][imin(thl4c, a[x])][sidx] |= smask;
  171|  7.76M|    }
  172|       |
  173|       |    // inner (tx) left|right edges
  174|  1.75M|    const int hstep = t_dim->w;
  175|  1.75M|    unsigned t = 1U << by4;
  176|  1.75M|    unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
  177|  1.75M|    unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
  178|  2.17M|    for (x = hstep; x < w4; x += hstep) {
  ------------------
  |  Branch (178:21): [True: 421k, False: 1.75M]
  ------------------
  179|   421k|        if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (179:13): [True: 281k, False: 140k]
  ------------------
  180|   421k|        if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (180:13): [True: 202k, False: 218k]
  ------------------
  181|   421k|    }
  182|       |
  183|       |    //            top
  184|       |    // inner (tx) --- edges
  185|       |    //           bottom
  186|  1.75M|    const int vstep = t_dim->h;
  187|  1.75M|    t = 1U << bx4;
  188|  1.75M|    inner = (unsigned) ((((uint64_t) t) << w4) - t);
  189|  1.75M|    inner1 = inner & 0xffff;
  190|  1.75M|    inner2 = inner >> 16;
  191|  2.19M|    for (y = vstep; y < h4; y += vstep) {
  ------------------
  |  Branch (191:21): [True: 434k, False: 1.75M]
  ------------------
  192|   434k|        if (inner1) masks[1][by4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (192:13): [True: 271k, False: 162k]
  ------------------
  193|   434k|        if (inner2) masks[1][by4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (193:13): [True: 245k, False: 188k]
  ------------------
  194|   434k|    }
  195|       |
  196|  1.75M|    dav1d_memset_likely_pow2(a, thl4c, w4);
  197|  1.75M|    dav1d_memset_likely_pow2(l, twl4c, h4);
  198|  1.75M|}
lf_mask.c:mask_edges_chroma:
  207|  2.50M|{
  208|  2.50M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  209|  2.50M|    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
  210|  2.50M|    const int twl4c = !!twl4, thl4c = !!thl4;
  211|  2.50M|    int y, x;
  212|  2.50M|    const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
  213|  2.50M|    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
  214|  2.50M|    const unsigned vmax = 1 << vmask, hmax = 1 << hmask;
  215|       |
  216|       |    // left block edge
  217|  2.50M|    unsigned mask = 1U << cby4;
  218|  10.0M|    for (y = 0; y < ch4; y++, mask <<= 1) {
  ------------------
  |  Branch (218:17): [True: 7.57M, False: 2.50M]
  ------------------
  219|  7.57M|        const int sidx = mask >= vmax;
  220|  7.57M|        const unsigned smask = mask >> (sidx << vbits);
  221|  7.57M|        masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask;
  222|  7.57M|    }
  223|       |
  224|       |    // top block edge
  225|  9.73M|    for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
  ------------------
  |  Branch (225:36): [True: 7.23M, False: 2.50M]
  ------------------
  226|  7.23M|        const int sidx = mask >= hmax;
  227|  7.23M|        const unsigned smask = mask >> (sidx << hbits);
  228|  7.23M|        masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask;
  229|  7.23M|    }
  230|       |
  231|  2.50M|    if (!skip_inter) {
  ------------------
  |  Branch (231:9): [True: 1.92M, False: 573k]
  ------------------
  232|       |        // inner (tx) left|right edges
  233|  1.92M|        const int hstep = t_dim->w;
  234|  1.92M|        unsigned t = 1U << cby4;
  235|  1.92M|        unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
  236|  1.92M|        unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
  237|  2.00M|        for (x = hstep; x < cw4; x += hstep) {
  ------------------
  |  Branch (237:25): [True: 78.6k, False: 1.92M]
  ------------------
  238|  78.6k|            if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1;
  ------------------
  |  Branch (238:17): [True: 67.0k, False: 11.6k]
  ------------------
  239|  78.6k|            if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2;
  ------------------
  |  Branch (239:17): [True: 57.8k, False: 20.8k]
  ------------------
  240|  78.6k|        }
  241|       |
  242|       |        //            top
  243|       |        // inner (tx) --- edges
  244|       |        //           bottom
  245|  1.92M|        const int vstep = t_dim->h;
  246|  1.92M|        t = 1U << cbx4;
  247|  1.92M|        inner = (unsigned) ((((uint64_t) t) << cw4) - t);
  248|  1.92M|        inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
  249|  2.05M|        for (y = vstep; y < ch4; y += vstep) {
  ------------------
  |  Branch (249:25): [True: 130k, False: 1.92M]
  ------------------
  250|   130k|            if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1;
  ------------------
  |  Branch (250:17): [True: 113k, False: 16.6k]
  ------------------
  251|   130k|            if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2;
  ------------------
  |  Branch (251:17): [True: 100k, False: 29.5k]
  ------------------
  252|   130k|        }
  253|  1.92M|    }
  254|       |
  255|  2.50M|    dav1d_memset_likely_pow2(a, thl4c, cw4);
  256|  2.50M|    dav1d_memset_likely_pow2(l, twl4c, ch4);
  257|  2.50M|}
lf_mask.c:mask_edges_inter:
   85|  2.98M|{
   86|  2.98M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
   87|  2.98M|    int y, x;
   88|       |
   89|  2.98M|    ALIGN_STK_16(uint8_t, txa, 2 /* edge */, [2 /* txsz, step */][32 /* y */][32 /* x */]);
  ------------------
  |  |  100|  2.98M|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  2.98M|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
   90|  6.51M|    for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
  ------------------
  |  Branch (90:32): [True: 3.53M, False: 2.98M]
  ------------------
   91|  11.0M|        for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
  ------------------
  |  Branch (91:36): [True: 7.49M, False: 3.53M]
  ------------------
   92|  7.49M|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
   93|  7.49M|                      max_tx, 0, y_off, x_off, tx_masks);
   94|       |
   95|       |    // left block edge
   96|  2.98M|    unsigned mask = 1U << by4;
   97|  32.4M|    for (y = 0; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (97:17): [True: 29.5M, False: 2.98M]
  ------------------
   98|  29.5M|        const int sidx = mask >= 0x10000;
   99|  29.5M|        const unsigned smask = mask >> (sidx << 4);
  100|  29.5M|        masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask;
  101|  29.5M|    }
  102|       |
  103|       |    // top block edge
  104|  28.4M|    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (104:35): [True: 25.5M, False: 2.98M]
  ------------------
  105|  25.5M|        const int sidx = mask >= 0x10000;
  106|  25.5M|        const unsigned smask = mask >> (sidx << 4);
  107|  25.5M|        masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask;
  108|  25.5M|    }
  109|       |
  110|  2.98M|    if (!skip) {
  ------------------
  |  Branch (110:9): [True: 1.09M, False: 1.88M]
  ------------------
  111|       |        // inner (tx) left|right edges
  112|  5.29M|        for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
  ------------------
  |  Branch (112:39): [True: 4.19M, False: 1.09M]
  ------------------
  113|  4.19M|            const int sidx = mask >= 0x10000U;
  114|  4.19M|            const unsigned smask = mask >> (sidx << 4);
  115|  4.19M|            int ltx = txa[0][0][y][0];
  116|  4.19M|            int step = txa[0][1][y][0];
  117|  5.22M|            for (x = step; x < w4; x += step) {
  ------------------
  |  Branch (117:28): [True: 1.02M, False: 4.19M]
  ------------------
  118|  1.02M|                const int rtx = txa[0][0][y][x];
  119|  1.02M|                masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask;
  120|  1.02M|                ltx = rtx;
  121|  1.02M|                step = txa[0][1][y][x];
  122|  1.02M|            }
  123|  4.19M|        }
  124|       |
  125|       |        //            top
  126|       |        // inner (tx) --- edges
  127|       |        //           bottom
  128|  5.60M|        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
  ------------------
  |  Branch (128:39): [True: 4.50M, False: 1.09M]
  ------------------
  129|  4.50M|            const int sidx = mask >= 0x10000U;
  130|  4.50M|            const unsigned smask = mask >> (sidx << 4);
  131|  4.50M|            int ttx = txa[1][0][0][x];
  132|  4.50M|            int step = txa[1][1][0][x];
  133|  5.45M|            for (y = step; y < h4; y += step) {
  ------------------
  |  Branch (133:28): [True: 953k, False: 4.50M]
  ------------------
  134|   953k|                const int btx = txa[1][0][y][x];
  135|   953k|                masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask;
  136|   953k|                ttx = btx;
  137|   953k|                step = txa[1][1][y][x];
  138|   953k|            }
  139|  4.50M|        }
  140|  1.09M|    }
  141|       |
  142|  32.5M|    for (y = 0; y < h4; y++)
  ------------------
  |  Branch (142:17): [True: 29.5M, False: 2.98M]
  ------------------
  143|  29.5M|        l[y] = txa[0][0][y][w4 - 1];
  144|  2.98M|    memcpy(a, txa[1][0][h4 - 1], w4);
  145|  2.98M|}
lf_mask.c:decomp_tx:
   44|  8.27M|{
   45|  8.27M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
   46|  8.27M|    const int is_split = (from == (int) TX_4X4 || depth > 1) ? 0 :
  ------------------
  |  Branch (46:27): [True: 4.00M, False: 4.27M]
  |  Branch (46:51): [True: 184k, False: 4.08M]
  ------------------
   47|  8.27M|        (tx_masks[depth] >> (y_off * 4 + x_off)) & 1;
   48|       |
   49|  8.27M|    if (is_split) {
  ------------------
  |  Branch (49:9): [True: 258k, False: 8.02M]
  ------------------
   50|   258k|        const enum RectTxfmSize sub = t_dim->sub;
   51|   258k|        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;
   52|       |
   53|   258k|        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
   54|   258k|        if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (54:13): [True: 204k, False: 54.3k]
  ------------------
   55|   204k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
   56|   204k|                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
   57|   258k|        if (t_dim->h >= t_dim->w) {
  ------------------
  |  Branch (57:13): [True: 187k, False: 71.2k]
  ------------------
   58|   187k|            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
   59|   187k|                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
   60|   187k|            if (t_dim->w >= t_dim->h)
  ------------------
  |  Branch (60:17): [True: 133k, False: 54.3k]
  ------------------
   61|   133k|                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
   62|   133k|                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
   63|   187k|        }
   64|  8.02M|    } else {
   65|  8.02M|        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);
   66|       |
   67|  8.02M|#define set_ctx(rep_macro) \
   68|  8.02M|        for (int y = 0; y < t_dim->h; y++) { \
   69|  8.02M|            rep_macro(txa[0][0][y], 0, lw); \
   70|  8.02M|            rep_macro(txa[1][0][y], 0, lh); \
   71|  8.02M|            txa[0][1][y][0] = t_dim->w; \
   72|  8.02M|        }
   73|  8.02M|        case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  8.02M|    switch (var) { \
  |  |   81|  4.20M|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |   68|  8.77M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 4.57M, False: 4.20M]
  |  |  |  |  ------------------
  |  |  |  |   69|  4.57M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  4.57M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  4.57M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  4.57M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  4.57M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  4.57M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  4.57M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  4.57M|        }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 4.20M, False: 3.81M]
  |  |  ------------------
  |  |   82|   867k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |   68|  3.02M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 2.15M, False: 867k]
  |  |  |  |  ------------------
  |  |  |  |   69|  2.15M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|  2.15M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.15M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  2.15M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|  2.15M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  2.15M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  2.15M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  2.15M|        }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 867k, False: 7.15M]
  |  |  ------------------
  |  |   83|   720k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |   68|  3.80M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 3.08M, False: 720k]
  |  |  |  |  ------------------
  |  |  |  |   69|  3.08M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|  3.08M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  3.08M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  3.08M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|  3.08M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  3.08M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  3.08M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  3.08M|        }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 720k, False: 7.30M]
  |  |  ------------------
  |  |   84|   212k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |   68|  1.52M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 1.30M, False: 212k]
  |  |  |  |  ------------------
  |  |  |  |   69|  1.30M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  1.30M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.30M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  1.30M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  1.30M|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|  1.30M|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  1.30M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  1.30M|        }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 212k, False: 7.80M]
  |  |  ------------------
  |  |   85|  2.01M|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |   68|  34.0M|        for (int y = 0; y < t_dim->h; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (68:25): [True: 31.9M, False: 2.01M]
  |  |  |  |  ------------------
  |  |  |  |   69|  31.9M|            rep_macro(txa[0][0][y], 0, lw); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  31.9M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  31.9M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  31.9M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  31.9M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 31.9M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   70|  31.9M|            rep_macro(txa[1][0][y], 0, lh); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  31.9M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  31.9M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  31.9M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  31.9M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 31.9M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   71|  31.9M|            txa[0][1][y][0] = t_dim->w; \
  |  |  |  |   72|  31.9M|        }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 2.01M, False: 6.00M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 8.02M]
  |  |  ------------------
  |  |   87|  8.02M|    }
  ------------------
  |  Branch (73:9): [Folded, False: 0]
  ------------------
   74|  8.02M|#undef set_ctx
   75|  8.02M|        dav1d_memset_pow2[t_dim->lw](txa[1][1][0], t_dim->h);
   76|  8.02M|    }
   77|  8.27M|}
lf_mask.c:calc_lf_value:
  408|   627k|{
  409|   627k|    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
  410|       |
  411|   627k|    if (!mr_delta) {
  ------------------
  |  Branch (411:9): [True: 287k, False: 340k]
  ------------------
  412|   287k|        memset(lflvl_values, base, sizeof(*lflvl_values) * 8);
  413|   340k|    } else {
  414|   340k|        const int sh = base >= 32;
  415|   340k|        lflvl_values[0][0] = lflvl_values[0][1] =
  416|   340k|            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
  417|  2.71M|        for (int r = 1; r < 8; r++) {
  ------------------
  |  Branch (417:25): [True: 2.37M, False: 340k]
  ------------------
  418|  7.11M|            for (int m = 0; m < 2; m++) {
  ------------------
  |  Branch (418:29): [True: 4.74M, False: 2.37M]
  ------------------
  419|  4.74M|                const int delta =
  420|  4.74M|                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
  421|  4.74M|                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
  422|  4.74M|            }
  423|  2.37M|        }
  424|   340k|    }
  425|   627k|}
lf_mask.c:calc_lf_value_chroma:
  431|   461k|{
  432|   461k|    if (!base_lvl)
  ------------------
  |  Branch (432:9): [True: 294k, False: 166k]
  ------------------
  433|   294k|        memset(lflvl_values, 0, sizeof(*lflvl_values) * 8);
  434|   166k|    else
  435|   166k|        calc_lf_value(lflvl_values, base_lvl, lf_delta, seg_delta, mr_delta);
  436|   461k|}

dav1d_version:
   61|  10.2k|COLD const char *dav1d_version(void) {
   62|  10.2k|    return DAV1D_VERSION;
  ------------------
  |  |    2|  10.2k|#define DAV1D_VERSION "1718ff9"
  ------------------
   63|  10.2k|}
dav1d_default_settings:
   71|  10.2k|COLD void dav1d_default_settings(Dav1dSettings *const s) {
   72|  10.2k|    s->n_threads = 0;
   73|  10.2k|    s->max_frame_delay = 0;
   74|  10.2k|    s->apply_grain = 1;
   75|  10.2k|    s->allocator.cookie = NULL;
   76|  10.2k|    s->allocator.alloc_picture_callback = dav1d_default_picture_alloc;
   77|  10.2k|    s->allocator.release_picture_callback = dav1d_default_picture_release;
   78|  10.2k|    s->logger.cookie = NULL;
   79|       |    s->logger.callback = dav1d_log_default_callback;
  ------------------
  |  |   43|  10.2k|#define dav1d_log_default_callback NULL
  ------------------
   80|  10.2k|    s->operating_point = 0;
   81|  10.2k|    s->all_layers = 1; // just until the tests are adjusted
   82|  10.2k|    s->frame_size_limit = 0;
   83|  10.2k|    s->strict_std_compliance = 0;
   84|  10.2k|    s->output_invisible_frames = 0;
   85|  10.2k|    s->inloop_filters = DAV1D_INLOOPFILTER_ALL;
   86|  10.2k|    s->decode_frame_type = DAV1D_DECODEFRAMETYPE_ALL;
   87|  10.2k|}
dav1d_open:
  140|  10.2k|COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
  141|  10.2k|    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  142|  10.2k|    pthread_once(&initted, init_internal);
  143|       |
  144|  10.2k|    validate_input_or_ret(c_out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  10.2k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  145|  10.2k|    validate_input_or_ret(s != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  10.2k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  146|  10.2k|    validate_input_or_ret(s->n_threads >= 0 &&
  ------------------
  |  |   52|  20.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  147|  10.2k|                          s->n_threads <= DAV1D_MAX_THREADS, DAV1D_ERR(EINVAL));
  148|  10.2k|    validate_input_or_ret(s->max_frame_delay >= 0 &&
  ------------------
  |  |   52|  20.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  149|  10.2k|                          s->max_frame_delay <= DAV1D_MAX_FRAME_DELAY, DAV1D_ERR(EINVAL));
  150|  10.2k|    validate_input_or_ret(s->allocator.alloc_picture_callback != NULL,
  ------------------
  |  |   52|  10.2k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  151|  10.2k|                          DAV1D_ERR(EINVAL));
  152|  10.2k|    validate_input_or_ret(s->allocator.release_picture_callback != NULL,
  ------------------
  |  |   52|  10.2k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  153|  10.2k|                          DAV1D_ERR(EINVAL));
  154|  10.2k|    validate_input_or_ret(s->operating_point >= 0 &&
  ------------------
  |  |   52|  20.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  155|  10.2k|                          s->operating_point <= 31, DAV1D_ERR(EINVAL));
  156|  10.2k|    validate_input_or_ret(s->decode_frame_type >= DAV1D_DECODEFRAMETYPE_ALL &&
  ------------------
  |  |   52|  20.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  |  Branch (52:11): [True: 10.2k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  157|  10.2k|                          s->decode_frame_type <= DAV1D_DECODEFRAMETYPE_KEY, DAV1D_ERR(EINVAL));
  158|       |
  159|  10.2k|    pthread_attr_t thread_attr;
  160|  10.2k|    if (pthread_attr_init(&thread_attr)) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (160:9): [True: 0, False: 10.2k]
  ------------------
  161|  10.2k|    size_t stack_size = 1024 * 1024 + get_stack_size_internal(&thread_attr);
  162|       |
  163|  10.2k|    pthread_attr_setstacksize(&thread_attr, stack_size);
  164|       |
  165|  10.2k|    Dav1dContext *const c = *c_out = dav1d_alloc_aligned(ALLOC_COMMON_CTX, sizeof(*c), 64);
  ------------------
  |  |  134|  10.2k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  166|  10.2k|    if (!c) goto error;
  ------------------
  |  Branch (166:9): [True: 0, False: 10.2k]
  ------------------
  167|  10.2k|    memset(c, 0, sizeof(*c));
  168|       |
  169|  10.2k|    c->allocator = s->allocator;
  170|  10.2k|    c->logger = s->logger;
  171|  10.2k|    c->apply_grain = s->apply_grain;
  172|  10.2k|    c->operating_point = s->operating_point;
  173|  10.2k|    c->all_layers = s->all_layers;
  174|  10.2k|    c->frame_size_limit = s->frame_size_limit;
  175|  10.2k|    c->strict_std_compliance = s->strict_std_compliance;
  176|  10.2k|    c->output_invisible_frames = s->output_invisible_frames;
  177|  10.2k|    c->inloop_filters = s->inloop_filters;
  178|  10.2k|    c->decode_frame_type = s->decode_frame_type;
  179|       |
  180|  10.2k|    dav1d_data_props_set_defaults(&c->cached_error_props);
  181|       |
  182|  10.2k|    if (dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->seq_hdr_pool) ||
  ------------------
  |  |  131|  20.4k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  183|  10.2k|        dav1d_mem_pool_init(ALLOC_OBU_HDR, &c->frame_hdr_pool) ||
  ------------------
  |  |  131|  20.4k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  184|  10.2k|        dav1d_mem_pool_init(ALLOC_SEGMAP, &c->segmap_pool) ||
  ------------------
  |  |  131|  20.4k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  185|  10.2k|        dav1d_mem_pool_init(ALLOC_REFMVS, &c->refmvs_pool) ||
  ------------------
  |  |  131|  20.4k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  186|  10.2k|        dav1d_mem_pool_init(ALLOC_PIC_CTX, &c->pic_ctx_pool) ||
  ------------------
  |  |  131|  20.4k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  187|  10.2k|        dav1d_mem_pool_init(ALLOC_CDF, &c->cdf_pool))
  ------------------
  |  |  131|  10.2k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  188|      0|    {
  189|      0|        goto error;
  190|      0|    }
  191|       |
  192|  10.2k|    if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc &&
  ------------------
  |  Branch (192:9): [True: 10.2k, False: 0]
  ------------------
  193|  10.2k|        c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (193:9): [True: 10.2k, False: 0]
  ------------------
  194|  10.2k|    {
  195|  10.2k|        if (c->allocator.cookie) goto error;
  ------------------
  |  Branch (195:13): [True: 0, False: 10.2k]
  ------------------
  196|  10.2k|        if (dav1d_mem_pool_init(ALLOC_PIC, &c->picture_pool)) goto error;
  ------------------
  |  |  131|  10.2k|#define dav1d_mem_pool_init(type, pool) dav1d_mem_pool_init(pool)
  |  |  ------------------
  |  |  |  Branch (131:41): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  197|  10.2k|        c->allocator.cookie = c->picture_pool;
  198|  10.2k|    } else if (c->allocator.alloc_picture_callback   == dav1d_default_picture_alloc ||
  ------------------
  |  Branch (198:16): [True: 0, False: 0]
  ------------------
  199|      0|               c->allocator.release_picture_callback == dav1d_default_picture_release)
  ------------------
  |  Branch (199:16): [True: 0, False: 0]
  ------------------
  200|      0|    {
  201|      0|        goto error;
  202|      0|    }
  203|       |
  204|       |    /* On 32-bit systems extremely large frame sizes can cause overflows in
  205|       |     * dav1d_decode_frame() malloc size calculations. Prevent that from occuring
  206|       |     * by enforcing a maximum frame size limit, chosen to roughly correspond to
  207|       |     * the largest size possible to decode without exhausting virtual memory. */
  208|  10.2k|    if (sizeof(size_t) < 8 && s->frame_size_limit - 1 >= 8192 * 8192) {
  ------------------
  |  Branch (208:9): [Folded, False: 10.2k]
  |  Branch (208:31): [True: 0, False: 0]
  ------------------
  209|      0|        c->frame_size_limit = 8192 * 8192;
  210|      0|        if (s->frame_size_limit)
  ------------------
  |  Branch (210:13): [True: 0, False: 0]
  ------------------
  211|      0|            dav1d_log(c, "Frame size limit reduced from %u to %u.\n",
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  212|      0|                      s->frame_size_limit, c->frame_size_limit);
  213|      0|    }
  214|       |
  215|  10.2k|    c->flush = &c->flush_mem;
  216|  10.2k|    atomic_init(c->flush, 0);
  217|       |
  218|  10.2k|    get_num_threads(c, s, &c->n_tc, &c->n_fc);
  219|       |
  220|  10.2k|    c->fc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->fc) * c->n_fc, 32);
  ------------------
  |  |  134|  10.2k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  221|  10.2k|    if (!c->fc) goto error;
  ------------------
  |  Branch (221:9): [True: 0, False: 10.2k]
  ------------------
  222|  10.2k|    memset(c->fc, 0, sizeof(*c->fc) * c->n_fc);
  223|       |
  224|  10.2k|    c->tc = dav1d_alloc_aligned(ALLOC_THREAD_CTX, sizeof(*c->tc) * c->n_tc, 64);
  ------------------
  |  |  134|  10.2k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  225|  10.2k|    if (!c->tc) goto error;
  ------------------
  |  Branch (225:9): [True: 0, False: 10.2k]
  ------------------
  226|  10.2k|    memset(c->tc, 0, sizeof(*c->tc) * c->n_tc);
  227|  10.2k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (227:9): [True: 10.2k, False: 0]
  ------------------
  228|  10.2k|        if (pthread_mutex_init(&c->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (228:13): [True: 0, False: 10.2k]
  ------------------
  229|  10.2k|        if (pthread_cond_init(&c->task_thread.cond, NULL)) {
  ------------------
  |  Branch (229:13): [True: 0, False: 10.2k]
  ------------------
  230|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  231|      0|            goto error;
  232|      0|        }
  233|  10.2k|        if (pthread_cond_init(&c->task_thread.delayed_fg.cond, NULL)) {
  ------------------
  |  Branch (233:13): [True: 0, False: 10.2k]
  ------------------
  234|      0|            pthread_cond_destroy(&c->task_thread.cond);
  235|      0|            pthread_mutex_destroy(&c->task_thread.lock);
  236|      0|            goto error;
  237|      0|        }
  238|  10.2k|        c->task_thread.cur = c->n_fc;
  239|  10.2k|        atomic_init(&c->task_thread.reset_task_cur, UINT_MAX);
  240|  10.2k|        atomic_init(&c->task_thread.cond_signaled, 0);
  241|  10.2k|        c->task_thread.inited = 1;
  242|  10.2k|    }
  243|       |
  244|  10.2k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (244:9): [True: 10.2k, False: 0]
  ------------------
  245|  10.2k|        const size_t out_delayed_sz = sizeof(*c->frame_thread.out_delayed) * c->n_fc;
  246|  10.2k|        c->frame_thread.out_delayed =
  247|  10.2k|            dav1d_malloc(ALLOC_THREAD_CTX, out_delayed_sz);
  ------------------
  |  |  132|  10.2k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  248|  10.2k|        if (!c->frame_thread.out_delayed) goto error;
  ------------------
  |  Branch (248:13): [True: 0, False: 10.2k]
  ------------------
  249|  10.2k|        memset(c->frame_thread.out_delayed, 0, out_delayed_sz);
  250|  10.2k|    }
  251|  51.0k|    for (unsigned n = 0; n < c->n_fc; n++) {
  ------------------
  |  Branch (251:26): [True: 40.8k, False: 10.2k]
  ------------------
  252|  40.8k|        Dav1dFrameContext *const f = &c->fc[n];
  253|  40.8k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (253:13): [True: 40.8k, False: 0]
  ------------------
  254|  40.8k|            if (pthread_mutex_init(&f->task_thread.lock, NULL)) goto error;
  ------------------
  |  Branch (254:17): [True: 0, False: 40.8k]
  ------------------
  255|  40.8k|            if (pthread_cond_init(&f->task_thread.cond, NULL)) {
  ------------------
  |  Branch (255:17): [True: 0, False: 40.8k]
  ------------------
  256|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  257|      0|                goto error;
  258|      0|            }
  259|  40.8k|            if (pthread_mutex_init(&f->task_thread.pending_tasks.lock, NULL)) {
  ------------------
  |  Branch (259:17): [True: 0, False: 40.8k]
  ------------------
  260|      0|                pthread_cond_destroy(&f->task_thread.cond);
  261|      0|                pthread_mutex_destroy(&f->task_thread.lock);
  262|      0|                goto error;
  263|      0|            }
  264|  40.8k|        }
  265|  40.8k|        f->c = c;
  266|  40.8k|        f->task_thread.ttd = &c->task_thread;
  267|  40.8k|        f->lf.last_sharpness = -1;
  268|  40.8k|    }
  269|       |
  270|  51.0k|    for (unsigned m = 0; m < c->n_tc; m++) {
  ------------------
  |  Branch (270:26): [True: 40.8k, False: 10.2k]
  ------------------
  271|  40.8k|        Dav1dTaskContext *const t = &c->tc[m];
  272|  40.8k|        t->f = &c->fc[0];
  273|  40.8k|        t->task_thread.ttd = &c->task_thread;
  274|  40.8k|        t->c = c;
  275|  40.8k|        memset(t->cf_16bpc, 0, sizeof(t->cf_16bpc));
  276|  40.8k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (276:13): [True: 40.8k, False: 0]
  ------------------
  277|  40.8k|            if (pthread_mutex_init(&t->task_thread.td.lock, NULL)) goto error;
  ------------------
  |  Branch (277:17): [True: 0, False: 40.8k]
  ------------------
  278|  40.8k|            if (pthread_cond_init(&t->task_thread.td.cond, NULL)) {
  ------------------
  |  Branch (278:17): [True: 0, False: 40.8k]
  ------------------
  279|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  280|      0|                goto error;
  281|      0|            }
  282|  40.8k|            if (pthread_create(&t->task_thread.td.thread, &thread_attr, dav1d_worker_task, t)) {
  ------------------
  |  Branch (282:17): [True: 0, False: 40.8k]
  ------------------
  283|      0|                pthread_cond_destroy(&t->task_thread.td.cond);
  284|      0|                pthread_mutex_destroy(&t->task_thread.td.lock);
  285|      0|                goto error;
  286|      0|            }
  287|  40.8k|            t->task_thread.td.inited = 1;
  288|  40.8k|        }
  289|  40.8k|    }
  290|  10.2k|    dav1d_pal_dsp_init(&c->pal_dsp);
  291|  10.2k|    dav1d_refmvs_dsp_init(&c->refmvs_dsp);
  292|       |
  293|  10.2k|    pthread_attr_destroy(&thread_attr);
  294|       |
  295|  10.2k|    return 0;
  296|       |
  297|      0|error:
  298|      0|    if (c) close_internal(c_out, 0);
  ------------------
  |  Branch (298:9): [True: 0, False: 0]
  ------------------
  299|      0|    pthread_attr_destroy(&thread_attr);
  300|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  301|  10.2k|}
dav1d_send_data:
  439|   341k|{
  440|   341k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   341k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 341k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  441|   341k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   341k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 341k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  442|       |
  443|   341k|    if (in->data) {
  ------------------
  |  Branch (443:9): [True: 341k, False: 0]
  ------------------
  444|   341k|        validate_input_or_ret(in->sz > 0 && in->sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   682k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 341k, False: 0]
  |  |  |  Branch (52:11): [True: 341k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  445|   341k|        c->drain = 0;
  446|   341k|    }
  447|   341k|    if (c->in.data)
  ------------------
  |  Branch (447:9): [True: 7.99k, False: 333k]
  ------------------
  448|  7.99k|        return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  7.99k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  449|   333k|    dav1d_data_ref(&c->in, in);
  450|       |
  451|   333k|    int res = gen_picture(c);
  452|   333k|    if (!res)
  ------------------
  |  Branch (452:9): [True: 293k, False: 39.7k]
  ------------------
  453|   293k|        dav1d_data_unref_internal(in);
  454|       |
  455|   333k|    return res;
  456|   341k|}
dav1d_get_picture:
  459|   324k|{
  460|   324k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   324k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 324k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  461|   324k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|   324k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 324k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  462|       |
  463|   324k|    const int drain = c->drain;
  464|   324k|    c->drain = 1;
  465|       |
  466|   324k|    int res = gen_picture(c);
  467|   324k|    if (res < 0)
  ------------------
  |  Branch (467:9): [True: 187, False: 324k]
  ------------------
  468|    187|        return res;
  469|       |
  470|   324k|    if (c->cached_error) {
  ------------------
  |  Branch (470:9): [True: 140k, False: 184k]
  ------------------
  471|   140k|        const int res = c->cached_error;
  472|   140k|        c->cached_error = 0;
  473|   140k|        return res;
  474|   140k|    }
  475|       |
  476|   184k|    if (output_picture_ready(c, c->n_fc == 1))
  ------------------
  |  Branch (476:9): [True: 125k, False: 58.3k]
  ------------------
  477|   125k|        return output_image(c, out);
  478|       |
  479|  58.3k|    if (c->n_fc > 1 && drain)
  ------------------
  |  Branch (479:9): [True: 58.3k, False: 0]
  |  Branch (479:24): [True: 19.1k, False: 39.1k]
  ------------------
  480|  19.1k|        return drain_picture(c, out);
  481|       |
  482|  39.1k|    return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  39.1k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  483|  58.3k|}
dav1d_apply_grain:
  487|  6.80k|{
  488|  6.80k|    validate_input_or_ret(c != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  6.80k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 6.80k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  489|  6.80k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  6.80k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 6.80k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  490|  6.80k|    validate_input_or_ret(in != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  6.80k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 6.80k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  491|       |
  492|  6.80k|    if (!has_grain(in)) {
  ------------------
  |  Branch (492:9): [True: 0, False: 6.80k]
  ------------------
  493|      0|        dav1d_picture_ref(out, in);
  494|      0|        return 0;
  495|      0|    }
  496|       |
  497|  6.80k|    int res = dav1d_picture_alloc_copy(c, out, in->p.w, in);
  498|  6.80k|    if (res < 0) goto error;
  ------------------
  |  Branch (498:9): [True: 0, False: 6.80k]
  ------------------
  499|       |
  500|  6.80k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (500:9): [True: 6.80k, False: 0]
  ------------------
  501|  6.80k|        dav1d_task_delayed_fg(c, out, in);
  502|  6.80k|    } else {
  503|      0|        switch (out->p.bpc) {
  504|      0|#if CONFIG_8BPC
  505|      0|        case 8:
  ------------------
  |  Branch (505:9): [True: 0, False: 0]
  ------------------
  506|      0|            dav1d_apply_grain_8bpc(&c->dsp[0].fg, out, in);
  507|      0|            break;
  508|      0|#endif
  509|      0|#if CONFIG_16BPC
  510|      0|        case 10:
  ------------------
  |  Branch (510:9): [True: 0, False: 0]
  ------------------
  511|      0|        case 12:
  ------------------
  |  Branch (511:9): [True: 0, False: 0]
  ------------------
  512|      0|            dav1d_apply_grain_16bpc(&c->dsp[(out->p.bpc >> 1) - 4].fg, out, in);
  513|      0|            break;
  514|      0|#endif
  515|      0|        default: abort();
  ------------------
  |  Branch (515:9): [True: 0, False: 0]
  ------------------
  516|      0|        }
  517|      0|    }
  518|       |
  519|  6.80k|    return 0;
  520|       |
  521|      0|error:
  522|      0|    dav1d_picture_unref_internal(out);
  523|      0|    return res;
  524|  6.80k|}
dav1d_flush:
  526|  10.2k|void dav1d_flush(Dav1dContext *const c) {
  527|  10.2k|    dav1d_data_unref_internal(&c->in);
  528|  10.2k|    if (c->out.p.frame_hdr)
  ------------------
  |  Branch (528:9): [True: 0, False: 10.2k]
  ------------------
  529|      0|        dav1d_thread_picture_unref(&c->out);
  530|  10.2k|    if (c->cache.p.frame_hdr)
  ------------------
  |  Branch (530:9): [True: 0, False: 10.2k]
  ------------------
  531|      0|        dav1d_thread_picture_unref(&c->cache);
  532|       |
  533|  10.2k|    c->drain = 0;
  534|  10.2k|    c->cached_error = 0;
  535|       |
  536|  91.8k|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (536:21): [True: 81.6k, False: 10.2k]
  ------------------
  537|  81.6k|        if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (537:13): [True: 71.5k, False: 10.0k]
  ------------------
  538|  71.5k|            dav1d_thread_picture_unref(&c->refs[i].p);
  539|  81.6k|        dav1d_ref_dec(&c->refs[i].segmap);
  540|  81.6k|        dav1d_ref_dec(&c->refs[i].refmvs);
  541|  81.6k|        dav1d_cdf_thread_unref(&c->cdf[i]);
  542|  81.6k|    }
  543|  10.2k|    c->frame_hdr = NULL;
  544|  10.2k|    c->seq_hdr = NULL;
  545|  10.2k|    dav1d_ref_dec(&c->seq_hdr_ref);
  546|       |
  547|  10.2k|    c->mastering_display = NULL;
  548|  10.2k|    c->content_light = NULL;
  549|  10.2k|    c->itut_t35 = NULL;
  550|  10.2k|    c->n_itut_t35 = 0;
  551|  10.2k|    dav1d_ref_dec(&c->mastering_display_ref);
  552|  10.2k|    dav1d_ref_dec(&c->content_light_ref);
  553|  10.2k|    dav1d_ref_dec(&c->itut_t35_ref);
  554|       |
  555|  10.2k|    dav1d_data_props_unref_internal(&c->cached_error_props);
  556|       |
  557|  10.2k|    if (c->n_fc == 1 && c->n_tc == 1) return;
  ------------------
  |  Branch (557:9): [True: 0, False: 10.2k]
  |  Branch (557:25): [True: 0, False: 0]
  ------------------
  558|  10.2k|    atomic_store(c->flush, 1);
  559|       |
  560|  10.2k|    if (c->n_tc > 1) {
  ------------------
  |  Branch (560:9): [True: 10.2k, False: 0]
  ------------------
  561|  10.2k|        pthread_mutex_lock(&c->task_thread.lock);
  562|       |        // stop running tasks in worker threads
  563|  51.0k|        for (unsigned i = 0; i < c->n_tc; i++) {
  ------------------
  |  Branch (563:30): [True: 40.8k, False: 10.2k]
  ------------------
  564|  40.8k|            Dav1dTaskContext *const tc = &c->tc[i];
  565|  43.1k|            while (!tc->task_thread.flushed) {
  ------------------
  |  Branch (565:20): [True: 2.30k, False: 40.8k]
  ------------------
  566|  2.30k|                pthread_cond_wait(&tc->task_thread.td.cond, &c->task_thread.lock);
  567|  2.30k|            }
  568|  40.8k|        }
  569|  51.0k|        for (unsigned i = 0; i < c->n_fc; i++) {
  ------------------
  |  Branch (569:30): [True: 40.8k, False: 10.2k]
  ------------------
  570|  40.8k|            c->fc[i].task_thread.task_head = NULL;
  571|  40.8k|            c->fc[i].task_thread.task_tail = NULL;
  572|  40.8k|            c->fc[i].task_thread.task_cur_prev = NULL;
  573|  40.8k|            c->fc[i].task_thread.pending_tasks.head = NULL;
  574|  40.8k|            c->fc[i].task_thread.pending_tasks.tail = NULL;
  575|  40.8k|            atomic_init(&c->fc[i].task_thread.pending_tasks.merge, 0);
  576|  40.8k|        }
  577|  10.2k|        atomic_init(&c->task_thread.first, 0);
  578|  10.2k|        c->task_thread.cur = c->n_fc;
  579|  10.2k|        atomic_store(&c->task_thread.reset_task_cur, UINT_MAX);
  580|  10.2k|        atomic_store(&c->task_thread.cond_signaled, 0);
  581|  10.2k|        pthread_mutex_unlock(&c->task_thread.lock);
  582|  10.2k|    }
  583|       |
  584|  10.2k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (584:9): [True: 10.2k, False: 0]
  ------------------
  585|  51.0k|        for (unsigned n = 0, next = c->frame_thread.next; n < c->n_fc; n++, next++) {
  ------------------
  |  Branch (585:59): [True: 40.8k, False: 10.2k]
  ------------------
  586|  40.8k|            if (next == c->n_fc) next = 0;
  ------------------
  |  Branch (586:17): [True: 8.56k, False: 32.2k]
  ------------------
  587|  40.8k|            Dav1dFrameContext *const f = &c->fc[next];
  588|  40.8k|            dav1d_decode_frame_exit(f, -1);
  589|  40.8k|            f->n_tile_data = 0;
  590|  40.8k|            f->task_thread.retval = 0;
  591|  40.8k|            f->task_thread.error = 0;
  592|  40.8k|            Dav1dThreadPicture *out_delayed = &c->frame_thread.out_delayed[next];
  593|  40.8k|            if (out_delayed->p.frame_hdr) {
  ------------------
  |  Branch (593:17): [True: 3.39k, False: 37.4k]
  ------------------
  594|  3.39k|                dav1d_thread_picture_unref(out_delayed);
  595|  3.39k|            }
  596|  40.8k|        }
  597|  10.2k|        c->frame_thread.next = 0;
  598|  10.2k|    }
  599|       |    atomic_store(c->flush, 0);
  600|  10.2k|}
dav1d_close:
  602|  10.2k|COLD void dav1d_close(Dav1dContext **const c_out) {
  603|  10.2k|    validate_input(c_out != NULL);
  ------------------
  |  |   59|  10.2k|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  10.2k|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 10.2k]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  604|       |#if TRACK_HEAP_ALLOCATIONS
  605|       |    dav1d_log_alloc_stats(*c_out);
  606|       |#endif
  607|  10.2k|    close_internal(c_out, 1);
  608|  10.2k|}
dav1d_picture_unref:
  727|   134k|void dav1d_picture_unref(Dav1dPicture *const p) {
  728|   134k|    dav1d_picture_unref_internal(p);
  729|   134k|}
dav1d_data_create:
  731|   334k|uint8_t *dav1d_data_create(Dav1dData *const buf, const size_t sz) {
  732|   334k|    return dav1d_data_create_internal(buf, sz);
  733|   334k|}
dav1d_data_unref:
  756|  40.9k|void dav1d_data_unref(Dav1dData *const buf) {
  757|  40.9k|    dav1d_data_unref_internal(buf);
  758|  40.9k|}
lib.c:get_num_threads:
  111|  10.2k|{
  112|       |    /* ceil(sqrt(n)) */
  113|  10.2k|    static const uint8_t fc_lut[49] = {
  114|  10.2k|        1,                                     /*     1 */
  115|  10.2k|        2, 2, 2,                               /*  2- 4 */
  116|  10.2k|        3, 3, 3, 3, 3,                         /*  5- 9 */
  117|  10.2k|        4, 4, 4, 4, 4, 4, 4,                   /* 10-16 */
  118|  10.2k|        5, 5, 5, 5, 5, 5, 5, 5, 5,             /* 17-25 */
  119|  10.2k|        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,       /* 26-36 */
  120|  10.2k|        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* 37-49 */
  121|  10.2k|    };
  122|  10.2k|    *n_tc = s->n_threads ? s->n_threads :
  ------------------
  |  Branch (122:13): [True: 10.2k, False: 0]
  ------------------
  123|  10.2k|        iclip(dav1d_num_logical_processors(c), 1, DAV1D_MAX_THREADS);
  ------------------
  |  |   46|      0|#define DAV1D_MAX_THREADS 256
  ------------------
  124|  10.2k|    *n_fc = s->max_frame_delay ? umin(s->max_frame_delay, *n_tc) :
  ------------------
  |  Branch (124:13): [True: 10.2k, False: 0]
  ------------------
  125|  10.2k|            *n_tc < 50 ? fc_lut[*n_tc - 1] : 8; // min(8, ceil(sqrt(n)))
  ------------------
  |  Branch (125:13): [True: 0, False: 0]
  ------------------
  126|  10.2k|}
lib.c:init_internal:
   53|      1|static COLD void init_internal(void) {
   54|      1|    dav1d_init_cpu();
   55|      1|    dav1d_init_ii_wedge_masks();
   56|      1|    dav1d_init_intra_edge_tree();
   57|      1|    dav1d_init_qm_tables();
   58|      1|    dav1d_init_thread();
  ------------------
  |  |  144|      1|#define dav1d_init_thread() do {} while (0)
  |  |  ------------------
  |  |  |  Branch (144:42): [Folded, False: 1]
  |  |  ------------------
  ------------------
   59|      1|}
lib.c:get_stack_size_internal:
   93|  10.2k|static COLD size_t get_stack_size_internal(const pthread_attr_t *const thread_attr) {
   94|       |    /* glibc has an issue where the size of the TLS is subtracted from the stack
   95|       |     * size instead of allocated separately. As a result the specified stack
   96|       |     * size may be insufficient when used in an application with large amounts
   97|       |     * of TLS data. The following is a workaround to compensate for that.
   98|       |     * See https://sourceware.org/bugzilla/show_bug.cgi?id=11787 */
   99|  10.2k|    size_t (*const get_minstack)(const pthread_attr_t*) =
  100|  10.2k|        dlsym(RTLD_DEFAULT, "__pthread_get_minstack");
  101|  10.2k|    if (get_minstack)
  ------------------
  |  Branch (101:9): [True: 10.2k, False: 0]
  ------------------
  102|  10.2k|        return get_minstack(thread_attr) - PTHREAD_STACK_MIN;
  103|      0|    return 0;
  104|  10.2k|}
lib.c:gen_picture:
  413|   658k|{
  414|   658k|    Dav1dData *const in = &c->in;
  415|       |
  416|   658k|    if (output_picture_ready(c, 0))
  ------------------
  |  Branch (416:9): [True: 260k, False: 398k]
  ------------------
  417|   260k|        return 0;
  418|       |
  419|   511k|    while (in->sz > 0) {
  ------------------
  |  Branch (419:12): [True: 420k, False: 91.7k]
  ------------------
  420|   420k|        const ptrdiff_t res = dav1d_parse_obus(c, in);
  421|   420k|        if (res < 0) {
  ------------------
  |  Branch (421:13): [True: 41.0k, False: 379k]
  ------------------
  422|  41.0k|            dav1d_data_unref_internal(in);
  423|   379k|        } else {
  424|   379k|            assert((size_t)res <= in->sz);
  ------------------
  |  Branch (424:13): [True: 379k, False: 0]
  ------------------
  425|   379k|            in->sz -= res;
  426|   379k|            in->data += res;
  427|   379k|            if (!in->sz) dav1d_data_unref_internal(in);
  ------------------
  |  Branch (427:17): [True: 292k, False: 86.7k]
  ------------------
  428|   379k|        }
  429|   420k|        if (output_picture_ready(c, 0))
  ------------------
  |  Branch (429:13): [True: 266k, False: 153k]
  ------------------
  430|   266k|            break;
  431|   153k|        if (res < 0)
  ------------------
  |  Branch (431:13): [True: 39.9k, False: 113k]
  ------------------
  432|  39.9k|            return (int)res;
  433|   153k|    }
  434|       |
  435|   358k|    return 0;
  436|   398k|}
lib.c:output_picture_ready:
  332|  1.27M|static int output_picture_ready(Dav1dContext *const c, const int drain) {
  333|  1.27M|    if (c->cached_error) return 1;
  ------------------
  |  Branch (333:9): [True: 279k, False: 999k]
  ------------------
  334|   999k|    if (!c->all_layers && c->max_spatial_id) {
  ------------------
  |  Branch (334:9): [True: 0, False: 999k]
  |  Branch (334:27): [True: 0, False: 0]
  ------------------
  335|      0|        if (c->out.p.data[0] && c->cache.p.data[0]) {
  ------------------
  |  Branch (335:13): [True: 0, False: 0]
  |  Branch (335:33): [True: 0, False: 0]
  ------------------
  336|      0|            if (c->max_spatial_id == c->cache.p.frame_hdr->spatial_id ||
  ------------------
  |  Branch (336:17): [True: 0, False: 0]
  ------------------
  337|      0|                c->out.flags & PICTURE_FLAG_NEW_TEMPORAL_UNIT)
  ------------------
  |  Branch (337:17): [True: 0, False: 0]
  ------------------
  338|      0|                return 1;
  339|      0|            dav1d_thread_picture_unref(&c->cache);
  340|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  341|      0|            return 0;
  342|      0|        } else if (c->cache.p.data[0] && drain) {
  ------------------
  |  Branch (342:20): [True: 0, False: 0]
  |  Branch (342:42): [True: 0, False: 0]
  ------------------
  343|      0|            return 1;
  344|      0|        } else if (c->out.p.data[0]) {
  ------------------
  |  Branch (344:20): [True: 0, False: 0]
  ------------------
  345|      0|            dav1d_thread_picture_move_ref(&c->cache, &c->out);
  346|      0|            return 0;
  347|      0|        }
  348|      0|    }
  349|       |
  350|   999k|    return !!c->out.p.data[0];
  351|   999k|}
lib.c:output_image:
  312|   134k|{
  313|   134k|    int res = 0;
  314|       |
  315|   134k|    Dav1dThreadPicture *const in = (c->all_layers || !c->max_spatial_id)
  ------------------
  |  Branch (315:37): [True: 134k, False: 0]
  |  Branch (315:54): [True: 0, False: 0]
  ------------------
  316|   134k|                                   ? &c->out : &c->cache;
  317|   134k|    if (!c->apply_grain || !has_grain(&in->p)) {
  ------------------
  |  Branch (317:9): [True: 0, False: 134k]
  |  Branch (317:28): [True: 127k, False: 6.80k]
  ------------------
  318|   127k|        dav1d_picture_move_ref(out, &in->p);
  319|   127k|        dav1d_thread_picture_unref(in);
  320|   127k|        goto end;
  321|   127k|    }
  322|       |
  323|  6.80k|    res = dav1d_apply_grain(c, out, &in->p);
  324|  6.80k|    dav1d_thread_picture_unref(in);
  325|   134k|end:
  326|   134k|    if (!c->all_layers && c->max_spatial_id && c->out.p.data[0]) {
  ------------------
  |  Branch (326:9): [True: 0, False: 134k]
  |  Branch (326:27): [True: 0, False: 0]
  |  Branch (326:48): [True: 0, False: 0]
  ------------------
  327|      0|        dav1d_thread_picture_move_ref(in, &c->out);
  328|      0|    }
  329|   134k|    return res;
  330|  6.80k|}
lib.c:drain_picture:
  353|  19.1k|static int drain_picture(Dav1dContext *const c, Dav1dPicture *const out) {
  354|  19.1k|    unsigned drain_count = 0;
  355|  19.1k|    int drained = 0;
  356|  56.2k|    do {
  357|  56.2k|        const unsigned next = c->frame_thread.next;
  358|  56.2k|        Dav1dFrameContext *const f = &c->fc[next];
  359|  56.2k|        pthread_mutex_lock(&c->task_thread.lock);
  360|  67.3k|        while (f->n_tile_data > 0)
  ------------------
  |  Branch (360:16): [True: 11.1k, False: 56.2k]
  ------------------
  361|  11.1k|            pthread_cond_wait(&f->task_thread.cond,
  362|  11.1k|                              &f->task_thread.ttd->lock);
  363|  56.2k|        Dav1dThreadPicture *const out_delayed =
  364|  56.2k|            &c->frame_thread.out_delayed[next];
  365|  56.2k|        if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (365:13): [True: 13.8k, False: 42.4k]
  |  Branch (365:39): [True: 63, False: 42.3k]
  ------------------
  366|  13.8k|            unsigned first = atomic_load(&c->task_thread.first);
  367|  13.8k|            if (first + 1U < c->n_fc)
  ------------------
  |  Branch (367:17): [True: 12.8k, False: 1.00k]
  ------------------
  368|  13.8k|                atomic_fetch_add(&c->task_thread.first, 1U);
  369|  1.00k|            else
  370|  13.8k|                atomic_store(&c->task_thread.first, 0);
  371|  13.8k|            atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
  372|  13.8k|                                           &first, UINT_MAX);
  373|  13.8k|            if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (373:17): [True: 13.8k, False: 19]
  |  Branch (373:39): [True: 1.80k, False: 12.0k]
  ------------------
  374|  1.80k|                c->task_thread.cur--;
  375|  13.8k|            drained = 1;
  376|  42.3k|        } else if (drained) {
  ------------------
  |  Branch (376:20): [True: 381, False: 41.9k]
  ------------------
  377|    381|            pthread_mutex_unlock(&c->task_thread.lock);
  378|    381|            break;
  379|    381|        }
  380|  55.8k|        if (++c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (380:13): [True: 14.0k, False: 41.8k]
  ------------------
  381|  14.0k|            c->frame_thread.next = 0;
  382|  55.8k|        pthread_mutex_unlock(&c->task_thread.lock);
  383|  55.8k|        const int error = f->task_thread.retval;
  384|  55.8k|        if (error) {
  ------------------
  |  Branch (384:13): [True: 4.12k, False: 51.7k]
  ------------------
  385|  4.12k|            f->task_thread.retval = 0;
  386|  4.12k|            dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
  387|  4.12k|            dav1d_thread_picture_unref(out_delayed);
  388|  4.12k|            return error;
  389|  4.12k|        }
  390|  51.7k|        if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (390:13): [True: 9.70k, False: 42.0k]
  ------------------
  391|  9.70k|            const unsigned progress =
  392|  9.70k|                atomic_load_explicit(&out_delayed->progress[1],
  393|  9.70k|                                     memory_order_relaxed);
  394|  9.70k|            if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (394:18): [True: 8.73k, False: 965]
  |  Branch (394:42): [True: 0, False: 965]
  ------------------
  395|  8.73k|                progress != FRAME_ERROR)
  ------------------
  |  |   35|  8.73k|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (395:17): [True: 8.59k, False: 139]
  ------------------
  396|  8.59k|            {
  397|  8.59k|                dav1d_thread_picture_ref(&c->out, out_delayed);
  398|  8.59k|                c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
  399|  8.59k|            }
  400|  9.70k|            dav1d_thread_picture_unref(out_delayed);
  401|  9.70k|            if (output_picture_ready(c, 0))
  ------------------
  |  Branch (401:17): [True: 8.59k, False: 1.10k]
  ------------------
  402|  8.59k|                return output_image(c, out);
  403|  9.70k|        }
  404|  51.7k|    } while (++drain_count < c->n_fc);
  ------------------
  |  Branch (404:14): [True: 37.0k, False: 6.07k]
  ------------------
  405|       |
  406|  6.45k|    if (output_picture_ready(c, 1))
  ------------------
  |  Branch (406:9): [True: 0, False: 6.45k]
  ------------------
  407|      0|        return output_image(c, out);
  408|       |
  409|  6.45k|    return DAV1D_ERR(EAGAIN);
  ------------------
  |  |   58|  6.45k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  410|  6.45k|}
lib.c:has_grain:
  304|   141k|{
  305|   141k|    const Dav1dFilmGrainData *fgdata = &pic->frame_hdr->film_grain.data;
  306|   141k|    return fgdata->num_y_points || fgdata->num_uv_points[0] ||
  ------------------
  |  Branch (306:12): [True: 4.43k, False: 136k]
  |  Branch (306:36): [True: 1.10k, False: 135k]
  ------------------
  307|   135k|           fgdata->num_uv_points[1] || (fgdata->clip_to_restricted_range &&
  ------------------
  |  Branch (307:12): [True: 4.99k, False: 130k]
  |  Branch (307:41): [True: 3.56k, False: 127k]
  ------------------
  308|  3.56k|                                        fgdata->chroma_scaling_from_luma);
  ------------------
  |  Branch (308:41): [True: 3.07k, False: 488]
  ------------------
  309|   141k|}
lib.c:close_internal:
  610|  10.2k|static COLD void close_internal(Dav1dContext **const c_out, int flush) {
  611|  10.2k|    Dav1dContext *const c = *c_out;
  612|  10.2k|    if (!c) return;
  ------------------
  |  Branch (612:9): [True: 0, False: 10.2k]
  ------------------
  613|       |
  614|  10.2k|    if (flush) dav1d_flush(c);
  ------------------
  |  Branch (614:9): [True: 10.2k, False: 0]
  ------------------
  615|       |
  616|  10.2k|    if (c->tc) {
  ------------------
  |  Branch (616:9): [True: 10.2k, False: 0]
  ------------------
  617|  10.2k|        struct TaskThreadData *ttd = &c->task_thread;
  618|  10.2k|        if (ttd->inited) {
  ------------------
  |  Branch (618:13): [True: 10.2k, False: 0]
  ------------------
  619|  10.2k|            pthread_mutex_lock(&ttd->lock);
  620|  51.0k|            for (unsigned n = 0; n < c->n_tc && c->tc[n].task_thread.td.inited; n++)
  ------------------
  |  Branch (620:34): [True: 40.8k, False: 10.2k]
  |  Branch (620:49): [True: 40.8k, False: 0]
  ------------------
  621|  40.8k|                c->tc[n].task_thread.die = 1;
  622|  10.2k|            pthread_cond_broadcast(&ttd->cond);
  623|  10.2k|            pthread_mutex_unlock(&ttd->lock);
  624|  51.0k|            for (unsigned n = 0; n < c->n_tc; n++) {
  ------------------
  |  Branch (624:34): [True: 40.8k, False: 10.2k]
  ------------------
  625|  40.8k|                Dav1dTaskContext *const pf = &c->tc[n];
  626|  40.8k|                if (!pf->task_thread.td.inited) break;
  ------------------
  |  Branch (626:21): [True: 0, False: 40.8k]
  ------------------
  627|  40.8k|                pthread_join(pf->task_thread.td.thread, NULL);
  628|  40.8k|                pthread_cond_destroy(&pf->task_thread.td.cond);
  629|  40.8k|                pthread_mutex_destroy(&pf->task_thread.td.lock);
  630|  40.8k|            }
  631|  10.2k|            pthread_cond_destroy(&ttd->delayed_fg.cond);
  632|  10.2k|            pthread_cond_destroy(&ttd->cond);
  633|  10.2k|            pthread_mutex_destroy(&ttd->lock);
  634|  10.2k|        }
  635|  10.2k|        dav1d_free_aligned(c->tc);
  ------------------
  |  |  136|  10.2k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  636|  10.2k|    }
  637|       |
  638|  51.0k|    for (unsigned n = 0; c->fc && n < c->n_fc; n++) {
  ------------------
  |  Branch (638:26): [True: 51.0k, False: 0]
  |  Branch (638:35): [True: 40.8k, False: 10.2k]
  ------------------
  639|  40.8k|        Dav1dFrameContext *const f = &c->fc[n];
  640|       |
  641|       |        // clean-up threading stuff
  642|  40.8k|        if (c->n_fc > 1) {
  ------------------
  |  Branch (642:13): [True: 40.8k, False: 0]
  ------------------
  643|  40.8k|            dav1d_free(f->tile_thread.lowest_pixel_mem);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  644|  40.8k|            dav1d_free(f->frame_thread.b);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  645|  40.8k|            dav1d_free_aligned(f->frame_thread.cbi);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  646|  40.8k|            dav1d_free_aligned(f->frame_thread.pal_idx);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  647|  40.8k|            dav1d_free_aligned(f->frame_thread.cf);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  648|  40.8k|            dav1d_free(f->frame_thread.tile_start_off);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  649|  40.8k|            dav1d_free_aligned(f->frame_thread.pal);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  650|  40.8k|        }
  651|  40.8k|        if (c->n_tc > 1) {
  ------------------
  |  Branch (651:13): [True: 40.8k, False: 0]
  ------------------
  652|  40.8k|            pthread_mutex_destroy(&f->task_thread.pending_tasks.lock);
  653|  40.8k|            pthread_cond_destroy(&f->task_thread.cond);
  654|  40.8k|            pthread_mutex_destroy(&f->task_thread.lock);
  655|  40.8k|        }
  656|  40.8k|        dav1d_free(f->frame_thread.frame_progress);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  657|  40.8k|        dav1d_free(f->task_thread.tasks);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  658|  40.8k|        dav1d_free(f->task_thread.tile_tasks[0]);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  659|  40.8k|        dav1d_free_aligned(f->ts);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  660|  40.8k|        dav1d_free_aligned(f->ipred_edge[0]);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  661|  40.8k|        dav1d_free(f->a);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  662|  40.8k|        dav1d_free(f->tile);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  663|  40.8k|        dav1d_free(f->lf.mask);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  664|  40.8k|        dav1d_free(f->lf.level);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  665|  40.8k|        dav1d_free(f->lf.lr_mask);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  666|  40.8k|        dav1d_free(f->lf.tx_lpf_right_edge[0]);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  667|  40.8k|        dav1d_free(f->lf.start_of_tile_row);
  ------------------
  |  |  135|  40.8k|#define dav1d_free(ptr) free(ptr)
  ------------------
  668|  40.8k|        dav1d_free_aligned(f->rf.r);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  669|  40.8k|        dav1d_free_aligned(f->lf.cdef_line_buf);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  670|  40.8k|        dav1d_free_aligned(f->lf.lr_line_buf);
  ------------------
  |  |  136|  40.8k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  671|  40.8k|    }
  672|  10.2k|    dav1d_free_aligned(c->fc);
  ------------------
  |  |  136|  10.2k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  673|  10.2k|    if (c->n_fc > 1 && c->frame_thread.out_delayed) {
  ------------------
  |  Branch (673:9): [True: 10.2k, False: 0]
  |  Branch (673:24): [True: 10.2k, False: 0]
  ------------------
  674|  51.0k|        for (unsigned n = 0; n < c->n_fc; n++)
  ------------------
  |  Branch (674:30): [True: 40.8k, False: 10.2k]
  ------------------
  675|  40.8k|            if (c->frame_thread.out_delayed[n].p.frame_hdr)
  ------------------
  |  Branch (675:17): [True: 0, False: 40.8k]
  ------------------
  676|      0|                dav1d_thread_picture_unref(&c->frame_thread.out_delayed[n]);
  677|  10.2k|        dav1d_free(c->frame_thread.out_delayed);
  ------------------
  |  |  135|  10.2k|#define dav1d_free(ptr) free(ptr)
  ------------------
  678|  10.2k|    }
  679|  10.2k|    for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (679:21): [True: 85, False: 10.2k]
  ------------------
  680|     85|        dav1d_data_unref_internal(&c->tile[n].data);
  681|  10.2k|    dav1d_free(c->tile);
  ------------------
  |  |  135|  10.2k|#define dav1d_free(ptr) free(ptr)
  ------------------
  682|  91.8k|    for (int n = 0; n < 8; n++) {
  ------------------
  |  Branch (682:21): [True: 81.6k, False: 10.2k]
  ------------------
  683|  81.6k|        dav1d_cdf_thread_unref(&c->cdf[n]);
  684|  81.6k|        if (c->refs[n].p.p.frame_hdr)
  ------------------
  |  Branch (684:13): [True: 0, False: 81.6k]
  ------------------
  685|      0|            dav1d_thread_picture_unref(&c->refs[n].p);
  686|  81.6k|        dav1d_ref_dec(&c->refs[n].refmvs);
  687|  81.6k|        dav1d_ref_dec(&c->refs[n].segmap);
  688|  81.6k|    }
  689|  10.2k|    dav1d_ref_dec(&c->seq_hdr_ref);
  690|  10.2k|    dav1d_ref_dec(&c->frame_hdr_ref);
  691|       |
  692|  10.2k|    dav1d_ref_dec(&c->mastering_display_ref);
  693|  10.2k|    dav1d_ref_dec(&c->content_light_ref);
  694|  10.2k|    dav1d_ref_dec(&c->itut_t35_ref);
  695|       |
  696|  10.2k|    dav1d_mem_pool_end(c->seq_hdr_pool);
  697|  10.2k|    dav1d_mem_pool_end(c->frame_hdr_pool);
  698|  10.2k|    dav1d_mem_pool_end(c->segmap_pool);
  699|  10.2k|    dav1d_mem_pool_end(c->refmvs_pool);
  700|  10.2k|    dav1d_mem_pool_end(c->cdf_pool);
  701|  10.2k|    dav1d_mem_pool_end(c->picture_pool);
  702|  10.2k|    dav1d_mem_pool_end(c->pic_ctx_pool);
  703|       |
  704|  10.2k|    dav1d_freep_aligned(c_out);
  705|  10.2k|}

dav1d_loop_filter_dsp_init_8bpc:
  259|  3.49k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  3.49k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  3.49k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  3.49k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  3.49k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  3.49k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  3.49k|#endif
  275|  3.49k|#endif
  276|  3.49k|}
dav1d_loop_filter_dsp_init_16bpc:
  259|  5.72k|COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
  260|  5.72k|    c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
  261|  5.72k|    c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
  262|  5.72k|    c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
  263|  5.72k|    c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
  264|       |
  265|  5.72k|#if HAVE_ASM
  266|       |#if ARCH_AARCH64 || ARCH_ARM
  267|       |    loop_filter_dsp_init_arm(c);
  268|       |#elif ARCH_LOONGARCH64
  269|       |    loop_filter_dsp_init_loongarch(c);
  270|       |#elif ARCH_PPC64LE
  271|       |    loop_filter_dsp_init_ppc(c);
  272|       |#elif ARCH_X86
  273|       |    loop_filter_dsp_init_x86(c);
  274|  5.72k|#endif
  275|  5.72k|#endif
  276|  5.72k|}

dav1d_loop_restoration_dsp_init_8bpc:
 1367|  3.49k|{
 1368|  3.49k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  3.49k|    c->sgr[0] = sgr_5x5_c;
 1370|  3.49k|    c->sgr[1] = sgr_3x3_c;
 1371|  3.49k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  3.49k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  3.49k|#endif
 1383|  3.49k|#endif
 1384|  3.49k|}
looprestoration_tmpl.c:sgr_5x5_c:
  830|  6.86k|{
  831|  6.86k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  6.86k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.86k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  832|  6.86k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  6.86k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.86k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  833|  6.86k|    int32_t *sumsq_ptrs[5], *sumsq_rows[5];
  834|  6.86k|    coef *sum_ptrs[5], *sum_rows[5];
  835|  41.1k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (835:21): [True: 34.3k, False: 6.86k]
  ------------------
  836|  34.3k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  34.3k|#define BUF_STRIDE (384 + 16)
  ------------------
  837|  34.3k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  34.3k|#define BUF_STRIDE (384 + 16)
  ------------------
  838|  34.3k|    }
  839|       |
  840|  6.86k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  6.86k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.86k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  841|  6.86k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  6.86k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  6.86k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  842|  6.86k|    int32_t *A_ptrs[2];
  843|  6.86k|    coef *B_ptrs[2];
  844|  20.5k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (844:21): [True: 13.7k, False: 6.86k]
  ------------------
  845|  13.7k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  13.7k|#define BUF_STRIDE (384 + 16)
  ------------------
  846|  13.7k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  13.7k|#define BUF_STRIDE (384 + 16)
  ------------------
  847|  13.7k|    }
  848|  6.86k|    const pixel *src = dst;
  849|  6.86k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  6.86k|#define PXSTRIDE(x) (x)
  ------------------
  850|       |
  851|  6.86k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (851:9): [True: 4.48k, False: 2.37k]
  ------------------
  852|  4.48k|        sumsq_ptrs[0] = sumsq_rows[0];
  853|  4.48k|        sumsq_ptrs[1] = sumsq_rows[0];
  854|  4.48k|        sumsq_ptrs[2] = sumsq_rows[1];
  855|  4.48k|        sumsq_ptrs[3] = sumsq_rows[2];
  856|  4.48k|        sumsq_ptrs[4] = sumsq_rows[3];
  857|  4.48k|        sum_ptrs[0] = sum_rows[0];
  858|  4.48k|        sum_ptrs[1] = sum_rows[0];
  859|  4.48k|        sum_ptrs[2] = sum_rows[1];
  860|  4.48k|        sum_ptrs[3] = sum_rows[2];
  861|  4.48k|        sum_ptrs[4] = sum_rows[3];
  862|       |
  863|  4.48k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  864|  4.48k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.48k|#define PXSTRIDE(x) (x)
  ------------------
  865|  4.48k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  866|       |
  867|  4.48k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  868|  4.48k|        left++;
  869|  4.48k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.48k|#define PXSTRIDE(x) (x)
  ------------------
  870|       |
  871|  4.48k|        if (--h <= 0)
  ------------------
  |  Branch (871:13): [True: 275, False: 4.21k]
  ------------------
  872|    275|            goto vert_1;
  873|       |
  874|  4.21k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  875|  4.21k|        left++;
  876|  4.21k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.21k|#define PXSTRIDE(x) (x)
  ------------------
  877|  4.21k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  878|  4.21k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.21k|#define BITDEPTH_MAX 0xff
  ------------------
  879|  4.21k|        rotate(A_ptrs, B_ptrs, 2);
  880|       |
  881|  4.21k|        if (--h <= 0)
  ------------------
  |  Branch (881:13): [True: 295, False: 3.91k]
  ------------------
  882|    295|            goto vert_2;
  883|       |
  884|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  885|       |        // one of them to point at the previously unused rows[4].
  886|  3.91k|        sumsq_ptrs[3] = sumsq_rows[4];
  887|  3.91k|        sum_ptrs[3] = sum_rows[4];
  888|  3.91k|    } else {
  889|  2.37k|        sumsq_ptrs[0] = sumsq_rows[0];
  890|  2.37k|        sumsq_ptrs[1] = sumsq_rows[0];
  891|  2.37k|        sumsq_ptrs[2] = sumsq_rows[0];
  892|  2.37k|        sumsq_ptrs[3] = sumsq_rows[0];
  893|  2.37k|        sumsq_ptrs[4] = sumsq_rows[0];
  894|  2.37k|        sum_ptrs[0] = sum_rows[0];
  895|  2.37k|        sum_ptrs[1] = sum_rows[0];
  896|  2.37k|        sum_ptrs[2] = sum_rows[0];
  897|  2.37k|        sum_ptrs[3] = sum_rows[0];
  898|  2.37k|        sum_ptrs[4] = sum_rows[0];
  899|       |
  900|  2.37k|        sgr_box5_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  901|  2.37k|        left++;
  902|  2.37k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.37k|#define PXSTRIDE(x) (x)
  ------------------
  903|       |
  904|  2.37k|        if (--h <= 0)
  ------------------
  |  Branch (904:13): [True: 154, False: 2.22k]
  ------------------
  905|    154|            goto vert_1;
  906|       |
  907|  2.22k|        sumsq_ptrs[4] = sumsq_rows[1];
  908|  2.22k|        sum_ptrs[4] = sum_rows[1];
  909|       |
  910|  2.22k|        sgr_box5_row_h(sumsq_rows[1], sum_rows[1], left, src, w, edges);
  911|  2.22k|        left++;
  912|  2.22k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.22k|#define PXSTRIDE(x) (x)
  ------------------
  913|       |
  914|  2.22k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  915|  2.22k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.22k|#define BITDEPTH_MAX 0xff
  ------------------
  916|  2.22k|        rotate(A_ptrs, B_ptrs, 2);
  917|       |
  918|  2.22k|        if (--h <= 0)
  ------------------
  |  Branch (918:13): [True: 145, False: 2.08k]
  ------------------
  919|    145|            goto vert_2;
  920|       |
  921|  2.08k|        sumsq_ptrs[3] = sumsq_rows[2];
  922|  2.08k|        sumsq_ptrs[4] = sumsq_rows[3];
  923|  2.08k|        sum_ptrs[3] = sum_rows[2];
  924|  2.08k|        sum_ptrs[4] = sum_rows[3];
  925|       |
  926|  2.08k|        sgr_box5_row_h(sumsq_rows[2], sum_rows[2], left, src, w, edges);
  927|  2.08k|        left++;
  928|  2.08k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  2.08k|#define PXSTRIDE(x) (x)
  ------------------
  929|       |
  930|  2.08k|        if (--h <= 0)
  ------------------
  |  Branch (930:13): [True: 120, False: 1.96k]
  ------------------
  931|    120|            goto odd;
  932|       |
  933|  1.96k|        sgr_box5_row_h(sumsq_rows[3], sum_rows[3], left, src, w, edges);
  934|  1.96k|        left++;
  935|  1.96k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.96k|#define PXSTRIDE(x) (x)
  ------------------
  936|       |
  937|  1.96k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  938|  1.96k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.96k|#define BITDEPTH_MAX 0xff
  ------------------
  939|  1.96k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  940|  1.96k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  941|       |
  942|  1.96k|        if (--h <= 0)
  ------------------
  |  Branch (942:13): [True: 93, False: 1.86k]
  ------------------
  943|     93|            goto vert_2;
  944|       |
  945|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
  946|       |        // one of them to point at the previously unused rows[4].
  947|  1.86k|        sumsq_ptrs[3] = sumsq_rows[4];
  948|  1.86k|        sum_ptrs[3] = sum_rows[4];
  949|  1.86k|    }
  950|       |
  951|   142k|    do {
  952|   142k|        sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], left, src, w, edges);
  953|   142k|        left++;
  954|   142k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|   142k|#define PXSTRIDE(x) (x)
  ------------------
  955|       |
  956|   142k|        if (--h <= 0)
  ------------------
  |  Branch (956:13): [True: 939, False: 141k]
  ------------------
  957|    939|            goto odd;
  958|       |
  959|   141k|        sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], left, src, w, edges);
  960|   141k|        left++;
  961|   141k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|   141k|#define PXSTRIDE(x) (x)
  ------------------
  962|       |
  963|   141k|        sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  964|   141k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|   141k|#define BITDEPTH_MAX 0xff
  ------------------
  965|   141k|        sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  966|   141k|                    w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  967|   141k|    } while (--h > 0);
  ------------------
  |  Branch (967:14): [True: 136k, False: 4.84k]
  ------------------
  968|       |
  969|  4.84k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (969:9): [True: 309, False: 4.53k]
  ------------------
  970|    309|        goto vert_2;
  971|       |
  972|  4.53k|    sgr_box5_row_h(sumsq_ptrs[3], sum_ptrs[3], NULL, lpf_bottom, w, edges);
  973|  4.53k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.53k|#define PXSTRIDE(x) (x)
  ------------------
  974|  4.53k|    sgr_box5_row_h(sumsq_ptrs[4], sum_ptrs[4], NULL, lpf_bottom, w, edges);
  975|       |
  976|  5.38k|output_2:
  977|  5.38k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  978|  5.38k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  5.38k|#define BITDEPTH_MAX 0xff
  ------------------
  979|  5.38k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  980|  5.38k|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
  981|  5.38k|    return;
  982|       |
  983|    842|vert_2:
  984|       |    // Duplicate the last row twice more
  985|    842|    sumsq_ptrs[3] = sumsq_ptrs[2];
  986|    842|    sumsq_ptrs[4] = sumsq_ptrs[2];
  987|    842|    sum_ptrs[3] = sum_ptrs[2];
  988|    842|    sum_ptrs[4] = sum_ptrs[2];
  989|    842|    goto output_2;
  990|       |
  991|  1.05k|odd:
  992|       |    // Copy the last row as padding once
  993|  1.05k|    sumsq_ptrs[4] = sumsq_ptrs[3];
  994|  1.05k|    sum_ptrs[4] = sum_ptrs[3];
  995|       |
  996|  1.05k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
  997|  1.05k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.05k|#define BITDEPTH_MAX 0xff
  ------------------
  998|  1.05k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
  999|  1.05k|                w, 2, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1000|       |
 1001|  1.48k|output_1:
 1002|       |    // Duplicate the last row twice more
 1003|  1.48k|    sumsq_ptrs[3] = sumsq_ptrs[2];
 1004|  1.48k|    sumsq_ptrs[4] = sumsq_ptrs[2];
 1005|  1.48k|    sum_ptrs[3] = sum_ptrs[2];
 1006|  1.48k|    sum_ptrs[4] = sum_ptrs[2];
 1007|       |
 1008|  1.48k|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1009|  1.48k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.48k|#define BITDEPTH_MAX 0xff
  ------------------
 1010|       |    // Output only one row
 1011|  1.48k|    sgr_finish2(&dst, stride, A_ptrs, B_ptrs,
 1012|  1.48k|                w, 1, params->sgr.w0 HIGHBD_TAIL_SUFFIX);
 1013|  1.48k|    return;
 1014|       |
 1015|    429|vert_1:
 1016|       |    // Copy the last row as padding once
 1017|    429|    sumsq_ptrs[4] = sumsq_ptrs[3];
 1018|    429|    sum_ptrs[4] = sum_ptrs[3];
 1019|       |
 1020|    429|    sgr_box5_vert(sumsq_ptrs, sum_ptrs, A_ptrs[1], B_ptrs[1],
 1021|    429|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|    429|#define BITDEPTH_MAX 0xff
  ------------------
 1022|    429|    rotate(A_ptrs, B_ptrs, 2);
 1023|       |
 1024|    429|    goto output_1;
 1025|  1.05k|}
looprestoration_tmpl.c:sgr_box5_row_h:
  441|  1.19M|{
  442|  1.19M|    sumsq++;
  443|  1.19M|    sum++;
  444|  1.19M|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][1] : src[-3]) : src[0];
  ------------------
  |  Branch (444:13): [True: 528k, False: 668k]
  |  Branch (444:37): [True: 497k, False: 30.4k]
  ------------------
  445|  1.19M|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (445:13): [True: 528k, False: 668k]
  |  Branch (445:37): [True: 497k, False: 30.4k]
  ------------------
  446|  1.19M|    int c = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (446:13): [True: 528k, False: 668k]
  |  Branch (446:37): [True: 497k, False: 30.4k]
  ------------------
  447|  1.19M|    int d = src[0];
  448|   111M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (448:22): [True: 110M, False: 1.19M]
  ------------------
  449|   110M|        int e = (x + 2 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 2] : src[w - 1];
  ------------------
  |  Branch (449:18): [True: 106M, False: 3.57M]
  |  Branch (449:31): [True: 1.60M, False: 1.97M]
  ------------------
  450|   110M|        sum[x] = a + b + c + d + e;
  451|   110M|        sumsq[x] = a * a + b * b + c * c + d * d + e * e;
  452|   110M|        a = b;
  453|   110M|        b = c;
  454|   110M|        c = d;
  455|   110M|        d = e;
  456|   110M|    }
  457|  1.19M|}
looprestoration_tmpl.c:sgr_box5_vert:
  537|   593k|{
  538|   593k|    sgr_box5_row_v(sumsq, sum, sumsq_out, sum_out, w);
  539|   593k|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 25, 164);
  540|   593k|    rotate5_x2(sumsq, sum);
  541|   593k|}
looprestoration_tmpl.c:sgr_box5_row_v:
  488|   593k|{
  489|  55.0M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (489:21): [True: 54.4M, False: 593k]
  ------------------
  490|  54.4M|        int sq_a = sumsq[0][x];
  491|  54.4M|        int sq_b = sumsq[1][x];
  492|  54.4M|        int sq_c = sumsq[2][x];
  493|  54.4M|        int sq_d = sumsq[3][x];
  494|  54.4M|        int sq_e = sumsq[4][x];
  495|  54.4M|        int s_a = sum[0][x];
  496|  54.4M|        int s_b = sum[1][x];
  497|  54.4M|        int s_c = sum[2][x];
  498|  54.4M|        int s_d = sum[3][x];
  499|  54.4M|        int s_e = sum[4][x];
  500|  54.4M|        sumsq_out[x] = sq_a + sq_b + sq_c + sq_d + sq_e;
  501|  54.4M|        sum_out[x] = s_a + s_b + s_c + s_d + s_e;
  502|  54.4M|    }
  503|   593k|}
looprestoration_tmpl.c:sgr_calc_row_ab:
  507|  1.72M|{
  508|  1.72M|    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
  ------------------
  |  |   58|  1.72M|#define bitdepth_from_max(x) 8
  ------------------
  509|   148M|    for (int i = 0; i < w + 2; i++) {
  ------------------
  |  Branch (509:21): [True: 146M, False: 1.72M]
  ------------------
  510|   146M|        const int a =
  511|   146M|            (AA[i] + ((1 << (2 * bitdepth_min_8)) >> 1)) >> (2 * bitdepth_min_8);
  512|   146M|        const int b =
  513|   146M|            (BB[i] + ((1 << bitdepth_min_8) >> 1)) >> bitdepth_min_8;
  514|       |
  515|   146M|        const unsigned p = imax(a * n - b * b, 0);
  516|   146M|        const unsigned z = (p * s + (1 << 19)) >> 20;
  517|   146M|        const unsigned x = dav1d_sgr_x_by_x[umin(z, 255)];
  518|       |
  519|       |        // This is where we invert A and B, so that B is of size coef.
  520|   146M|        AA[i] = (x * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
  521|   146M|        BB[i] = x;
  522|   146M|    }
  523|  1.72M|}
looprestoration_tmpl.c:rotate5_x2:
  402|   591k|{
  403|   591k|    int32_t *tmp32[2];
  404|   591k|    coef *tmpc[2];
  405|  1.77M|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (405:21): [True: 1.18M, False: 591k]
  ------------------
  406|  1.18M|        tmp32[i] = sumsq_ptrs[i];
  407|  1.18M|        tmpc[i] = sum_ptrs[i];
  408|  1.18M|    }
  409|  2.36M|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (409:21): [True: 1.77M, False: 591k]
  ------------------
  410|  1.77M|        sumsq_ptrs[i] = sumsq_ptrs[i + 2];
  411|  1.77M|        sum_ptrs[i] = sum_ptrs[i + 2];
  412|  1.77M|    }
  413|  1.77M|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (413:21): [True: 1.18M, False: 591k]
  ------------------
  414|  1.18M|        sumsq_ptrs[3 + i] = tmp32[i];
  415|  1.18M|        sum_ptrs[3 + i] = tmpc[i];
  416|  1.18M|    }
  417|   591k|}
looprestoration_tmpl.c:rotate:
  390|  2.83M|{
  391|  2.83M|    int32_t *tmp32 = sumsq_ptrs[0];
  392|  2.83M|    coef *tmpc = sum_ptrs[0];
  393|  8.77M|    for (int i = 0; i < n - 1; i++) {
  ------------------
  |  Branch (393:21): [True: 5.93M, False: 2.83M]
  ------------------
  394|  5.93M|        sumsq_ptrs[i] = sumsq_ptrs[i + 1];
  395|  5.93M|        sum_ptrs[i] = sum_ptrs[i + 1];
  396|  5.93M|    }
  397|  2.83M|    sumsq_ptrs[n - 1] = tmp32;
  398|  2.83M|    sum_ptrs[n - 1] = tmpc;
  399|  2.83M|}
looprestoration_tmpl.c:sgr_finish2:
  645|   151k|{
  646|   151k|    ALIGN_STK_16(coef, tmp, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|   151k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   151k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  647|       |
  648|   151k|    sgr_finish_filter2(tmp, *dst, stride, A_ptrs, B_ptrs, w, h);
  649|   151k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  650|   151k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|   151k|#define PXSTRIDE(x) (x)
  ------------------
  651|   151k|    if (h > 1) {
  ------------------
  |  Branch (651:9): [True: 150k, False: 1.64k]
  ------------------
  652|   150k|        sgr_weighted_row1(*dst, tmp + FILTER_OUT_STRIDE, w, w1 HIGHBD_TAIL_SUFFIX);
  ------------------
  |  |  572|   150k|#define FILTER_OUT_STRIDE (384)
  ------------------
  653|   150k|        *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|   150k|#define PXSTRIDE(x) (x)
  ------------------
  654|   150k|    }
  655|   151k|    rotate(A_ptrs, B_ptrs, 2);
  656|   151k|}
looprestoration_tmpl.c:sgr_finish_filter2:
  579|   566k|{
  580|   566k|#define SIX_NEIGHBORS(P, i)\
  581|   566k|    ((P[0][i]     + P[1][i]) * 6 +   \
  582|   566k|     (P[0][i - 1] + P[1][i - 1] +    \
  583|   566k|      P[0][i + 1] + P[1][i + 1]) * 5)
  584|  51.5M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (584:21): [True: 51.0M, False: 566k]
  ------------------
  585|  51.0M|        const int a = SIX_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  581|  51.0M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  51.0M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  51.0M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  586|  51.0M|        const int b = SIX_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  581|  51.0M|    ((P[0][i]     + P[1][i]) * 6 +   \
  |  |  582|  51.0M|     (P[0][i - 1] + P[1][i - 1] +    \
  |  |  583|  51.0M|      P[0][i + 1] + P[1][i + 1]) * 5)
  ------------------
  587|  51.0M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  588|  51.0M|    }
  589|   566k|    if (h <= 1)
  ------------------
  |  Branch (589:9): [True: 4.58k, False: 562k]
  ------------------
  590|  4.58k|        return;
  591|   562k|    tmp += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   562k|#define FILTER_OUT_STRIDE (384)
  ------------------
  592|   562k|    src += PXSTRIDE(src_stride);
  ------------------
  |  |   53|   562k|#define PXSTRIDE(x) (x)
  ------------------
  593|   562k|    const int32_t *A = &A_ptrs[1][1];
  594|   562k|    const coef *B = &B_ptrs[1][1];
  595|  51.9M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (595:21): [True: 51.3M, False: 562k]
  ------------------
  596|  51.3M|        const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
  597|  51.3M|        const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
  598|  51.3M|        tmp[i] = (b - a * src[i] + (1 << 7)) >> 8;
  599|  51.3M|    }
  600|   562k|#undef SIX_NEIGHBORS
  601|   562k|}
looprestoration_tmpl.c:sgr_weighted_row1:
  605|   556k|{
  606|  60.5M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (606:21): [True: 59.9M, False: 556k]
  ------------------
  607|  59.9M|        const int v = w1 * t1[i];
  608|  59.9M|        dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  59.9M|#define iclip_pixel iclip_u8
  ------------------
  609|  59.9M|    }
  610|   556k|}
looprestoration_tmpl.c:sgr_3x3_c:
  684|  5.83k|{
  685|  5.83k|#define BUF_STRIDE (384 + 16)
  686|  5.83k|    ALIGN_STK_16(int32_t, sumsq_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.83k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.83k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  687|  5.83k|    ALIGN_STK_16(coef, sum_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.83k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.83k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  688|  5.83k|    int32_t *sumsq_ptrs[3], *sumsq_rows[3];
  689|  5.83k|    coef *sum_ptrs[3], *sum_rows[3];
  690|  23.3k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (690:21): [True: 17.5k, False: 5.83k]
  ------------------
  691|  17.5k|        sumsq_rows[i] = &sumsq_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  17.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  692|  17.5k|        sum_rows[i] = &sum_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  17.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  693|  17.5k|    }
  694|       |
  695|  5.83k|    ALIGN_STK_16(int32_t, A_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.83k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.83k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  696|  5.83k|    ALIGN_STK_16(coef, B_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  5.83k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  5.83k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  697|  5.83k|    int32_t *A_ptrs[3];
  698|  5.83k|    coef *B_ptrs[3];
  699|  23.3k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (699:21): [True: 17.5k, False: 5.83k]
  ------------------
  700|  17.5k|        A_ptrs[i] = &A_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  17.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  701|  17.5k|        B_ptrs[i] = &B_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  17.5k|#define BUF_STRIDE (384 + 16)
  ------------------
  702|  17.5k|    }
  703|  5.83k|    const pixel *src = dst;
  704|  5.83k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  5.83k|#define PXSTRIDE(x) (x)
  ------------------
  705|       |
  706|  5.83k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (706:9): [True: 4.03k, False: 1.80k]
  ------------------
  707|  4.03k|        sumsq_ptrs[0] = sumsq_rows[0];
  708|  4.03k|        sumsq_ptrs[1] = sumsq_rows[1];
  709|  4.03k|        sumsq_ptrs[2] = sumsq_rows[2];
  710|  4.03k|        sum_ptrs[0] = sum_rows[0];
  711|  4.03k|        sum_ptrs[1] = sum_rows[1];
  712|  4.03k|        sum_ptrs[2] = sum_rows[2];
  713|       |
  714|  4.03k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], NULL, lpf, w, edges);
  715|  4.03k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.03k|#define PXSTRIDE(x) (x)
  ------------------
  716|  4.03k|        sgr_box3_row_h(sumsq_rows[1], sum_rows[1], NULL, lpf, w, edges);
  717|       |
  718|  4.03k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  719|  4.03k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.03k|#define BITDEPTH_MAX 0xff
  ------------------
  720|  4.03k|        left++;
  721|  4.03k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.03k|#define PXSTRIDE(x) (x)
  ------------------
  722|  4.03k|        rotate(A_ptrs, B_ptrs, 3);
  723|       |
  724|  4.03k|        if (--h <= 0)
  ------------------
  |  Branch (724:13): [True: 310, False: 3.72k]
  ------------------
  725|    310|            goto vert_1;
  726|       |
  727|  3.72k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  728|  3.72k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.72k|#define BITDEPTH_MAX 0xff
  ------------------
  729|  3.72k|        left++;
  730|  3.72k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  3.72k|#define PXSTRIDE(x) (x)
  ------------------
  731|  3.72k|        rotate(A_ptrs, B_ptrs, 3);
  732|       |
  733|  3.72k|        if (--h <= 0)
  ------------------
  |  Branch (733:13): [True: 597, False: 3.12k]
  ------------------
  734|    597|            goto vert_2;
  735|  3.72k|    } else {
  736|  1.80k|        sumsq_ptrs[0] = sumsq_rows[0];
  737|  1.80k|        sumsq_ptrs[1] = sumsq_rows[0];
  738|  1.80k|        sumsq_ptrs[2] = sumsq_rows[0];
  739|  1.80k|        sum_ptrs[0] = sum_rows[0];
  740|  1.80k|        sum_ptrs[1] = sum_rows[0];
  741|  1.80k|        sum_ptrs[2] = sum_rows[0];
  742|       |
  743|  1.80k|        sgr_box3_row_h(sumsq_rows[0], sum_rows[0], left, src, w, edges);
  744|  1.80k|        left++;
  745|  1.80k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.80k|#define PXSTRIDE(x) (x)
  ------------------
  746|       |
  747|  1.80k|        sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  748|  1.80k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.80k|#define BITDEPTH_MAX 0xff
  ------------------
  749|  1.80k|        rotate(A_ptrs, B_ptrs, 3);
  750|       |
  751|  1.80k|        if (--h <= 0)
  ------------------
  |  Branch (751:13): [True: 115, False: 1.68k]
  ------------------
  752|    115|            goto vert_1;
  753|       |
  754|  1.68k|        sumsq_ptrs[2] = sumsq_rows[1];
  755|  1.68k|        sum_ptrs[2] = sum_rows[1];
  756|       |
  757|  1.68k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  758|  1.68k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.68k|#define BITDEPTH_MAX 0xff
  ------------------
  759|  1.68k|        left++;
  760|  1.68k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  1.68k|#define PXSTRIDE(x) (x)
  ------------------
  761|  1.68k|        rotate(A_ptrs, B_ptrs, 3);
  762|       |
  763|  1.68k|        if (--h <= 0)
  ------------------
  |  Branch (763:13): [True: 79, False: 1.60k]
  ------------------
  764|     79|            goto vert_2;
  765|       |
  766|  1.60k|        sumsq_ptrs[2] = sumsq_rows[2];
  767|  1.60k|        sum_ptrs[2] = sum_rows[2];
  768|  1.60k|    }
  769|       |
  770|   244k|    do {
  771|   244k|        sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  772|   244k|                    left, src, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|   244k|#define BITDEPTH_MAX 0xff
  ------------------
  773|   244k|        left++;
  774|   244k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|   244k|#define PXSTRIDE(x) (x)
  ------------------
  775|       |
  776|   244k|        sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  777|   244k|                    w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  778|   244k|    } while (--h > 0);
  ------------------
  |  Branch (778:14): [True: 240k, False: 4.73k]
  ------------------
  779|       |
  780|  4.73k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (780:9): [True: 690, False: 4.04k]
  ------------------
  781|    690|        goto vert_2;
  782|       |
  783|  4.04k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  784|  4.04k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.04k|#define BITDEPTH_MAX 0xff
  ------------------
  785|  4.04k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.04k|#define PXSTRIDE(x) (x)
  ------------------
  786|       |
  787|  4.04k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  788|  4.04k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  789|       |
  790|  4.04k|    sgr_box3_hv(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  791|  4.04k|                NULL, lpf_bottom, w, params->sgr.s1, edges, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.04k|#define BITDEPTH_MAX 0xff
  ------------------
  792|       |
  793|  4.04k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  794|  4.04k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  795|  4.04k|    return;
  796|       |
  797|  1.36k|vert_2:
  798|  1.36k|    sumsq_ptrs[2] = sumsq_ptrs[1];
  799|  1.36k|    sum_ptrs[2] = sum_ptrs[1];
  800|  1.36k|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  801|  1.36k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.36k|#define BITDEPTH_MAX 0xff
  ------------------
  802|       |
  803|  1.36k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  804|  1.36k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  805|       |
  806|  1.79k|output_1:
  807|  1.79k|    sumsq_ptrs[2] = sumsq_ptrs[1];
  808|  1.79k|    sum_ptrs[2] = sum_ptrs[1];
  809|  1.79k|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  810|  1.79k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.79k|#define BITDEPTH_MAX 0xff
  ------------------
  811|       |
  812|  1.79k|    sgr_finish1(&dst, stride, A_ptrs, B_ptrs,
  813|  1.79k|                w, params->sgr.w1 HIGHBD_TAIL_SUFFIX);
  814|  1.79k|    return;
  815|       |
  816|    425|vert_1:
  817|    425|    sumsq_ptrs[2] = sumsq_ptrs[1];
  818|    425|    sum_ptrs[2] = sum_ptrs[1];
  819|    425|    sgr_box3_vert(sumsq_ptrs, sum_ptrs, A_ptrs[2], B_ptrs[2],
  820|    425|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|    425|#define BITDEPTH_MAX 0xff
  ------------------
  821|    425|    rotate(A_ptrs, B_ptrs, 3);
  822|    425|    goto output_1;
  823|  1.36k|}
looprestoration_tmpl.c:sgr_box3_row_h:
  423|  1.14M|{
  424|  1.14M|    sumsq++;
  425|  1.14M|    sum++;
  426|  1.14M|    int a = edges & LR_HAVE_LEFT ? (left ? left[0][2] : src[-2]) : src[0];
  ------------------
  |  Branch (426:13): [True: 590k, False: 552k]
  |  Branch (426:37): [True: 556k, False: 34.6k]
  ------------------
  427|  1.14M|    int b = edges & LR_HAVE_LEFT ? (left ? left[0][3] : src[-1]) : src[0];
  ------------------
  |  Branch (427:13): [True: 590k, False: 552k]
  |  Branch (427:37): [True: 556k, False: 34.6k]
  ------------------
  428|   113M|    for (int x = -1; x < w + 1; x++) {
  ------------------
  |  Branch (428:22): [True: 112M, False: 1.14M]
  ------------------
  429|   112M|        int c = (x + 1 < w || (edges & LR_HAVE_RIGHT)) ? src[x + 1] : src[w - 1];
  ------------------
  |  Branch (429:18): [True: 110M, False: 2.29M]
  |  Branch (429:31): [True: 1.16M, False: 1.12M]
  ------------------
  430|   112M|        sum[x] = a + b + c;
  431|   112M|        sumsq[x] = a * a + b * b + c * c;
  432|   112M|        a = b;
  433|   112M|        b = c;
  434|   112M|    }
  435|  1.14M|}
looprestoration_tmpl.c:sgr_box3_hv:
  550|   262k|{
  551|   262k|    sgr_box3_row_h(sumsq[2], sum[2], left, src, w, edges);
  552|   262k|    sgr_box3_vert(sumsq, sum, AA, BB, w, s, bitdepth_max);
  553|   262k|}
looprestoration_tmpl.c:sgr_box3_vert:
  528|  1.12M|{
  529|  1.12M|    sgr_box3_row_v(sumsq, sum, sumsq_out, sum_out, w);
  530|  1.12M|    sgr_calc_row_ab(sumsq_out, sum_out, w, s, bitdepth_max, 9, 455);
  531|  1.12M|    rotate(sumsq, sum, 3);
  532|  1.12M|}
looprestoration_tmpl.c:sgr_box3_row_v:
  472|  1.12M|{
  473|   110M|    for (int x = 0; x < w + 2; x++) {
  ------------------
  |  Branch (473:21): [True: 109M, False: 1.12M]
  ------------------
  474|   109M|        int sq_a = sumsq[0][x];
  475|   109M|        int sq_b = sumsq[1][x];
  476|   109M|        int sq_c = sumsq[2][x];
  477|   109M|        int s_a = sum[0][x];
  478|   109M|        int s_b = sum[1][x];
  479|   109M|        int s_c = sum[2][x];
  480|   109M|        sumsq_out[x] = sq_a + sq_b + sq_c;
  481|   109M|        sum_out[x] = s_a + s_b + s_c;
  482|   109M|    }
  483|  1.12M|}
looprestoration_tmpl.c:sgr_finish1:
  631|   255k|{
  632|       |    // Only one single row, no stride needed
  633|   255k|    ALIGN_STK_16(coef, tmp, 384,);
  ------------------
  |  |  100|   255k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   255k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  634|       |
  635|   255k|    sgr_finish_filter_row1(tmp, *dst, A_ptrs, B_ptrs, w);
  636|   255k|    sgr_weighted_row1(*dst, tmp, w, w1 HIGHBD_TAIL_SUFFIX);
  637|   255k|    *dst += PXSTRIDE(stride);
  ------------------
  |  |   53|   255k|#define PXSTRIDE(x) (x)
  ------------------
  638|   255k|    rotate(A_ptrs, B_ptrs, 3);
  639|   255k|}
looprestoration_tmpl.c:sgr_finish_filter_row1:
  559|  1.08M|{
  560|  1.08M|#define EIGHT_NEIGHBORS(P, i)\
  561|  1.08M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  562|  1.08M|     (P[0][i - 1] + P[2][i - 1] +                           \
  563|  1.08M|      P[0][i + 1] + P[2][i + 1]) * 3)
  564|   101M|    for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (564:21): [True: 100M, False: 1.08M]
  ------------------
  565|   100M|        const int a = EIGHT_NEIGHBORS(B_ptrs, i + 1);
  ------------------
  |  |  561|   100M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|   100M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|   100M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  566|   100M|        const int b = EIGHT_NEIGHBORS(A_ptrs, i + 1);
  ------------------
  |  |  561|   100M|    ((P[1][i] + P[1][i - 1] + P[1][i + 1] + P[0][i] + P[2][i]) * 4 + \
  |  |  562|   100M|     (P[0][i - 1] + P[2][i - 1] +                           \
  |  |  563|   100M|      P[0][i + 1] + P[2][i + 1]) * 3)
  ------------------
  567|   100M|        tmp[i] = (b - a * src[i] + (1 << 8)) >> 9;
  568|   100M|    }
  569|  1.08M|#undef EIGHT_NEIGHBORS
  570|  1.08M|}
looprestoration_tmpl.c:sgr_mix_c:
 1032|  17.2k|{
 1033|  17.2k|    ALIGN_STK_16(int32_t, sumsq5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1034|  17.2k|    ALIGN_STK_16(coef, sum5_buf, BUF_STRIDE * 5 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1035|  17.2k|    int32_t *sumsq5_ptrs[5], *sumsq5_rows[5];
 1036|  17.2k|    coef *sum5_ptrs[5], *sum5_rows[5];
 1037|   103k|    for (int i = 0; i < 5; i++) {
  ------------------
  |  Branch (1037:21): [True: 86.2k, False: 17.2k]
  ------------------
 1038|  86.2k|        sumsq5_rows[i] = &sumsq5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  86.2k|#define BUF_STRIDE (384 + 16)
  ------------------
 1039|  86.2k|        sum5_rows[i] = &sum5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  86.2k|#define BUF_STRIDE (384 + 16)
  ------------------
 1040|  86.2k|    }
 1041|  17.2k|    ALIGN_STK_16(int32_t, sumsq3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1042|  17.2k|    ALIGN_STK_16(coef, sum3_buf, BUF_STRIDE * 3 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1043|  17.2k|    int32_t *sumsq3_ptrs[3], *sumsq3_rows[3];
 1044|  17.2k|    coef *sum3_ptrs[3], *sum3_rows[3];
 1045|  68.9k|    for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1045:21): [True: 51.7k, False: 17.2k]
  ------------------
 1046|  51.7k|        sumsq3_rows[i] = &sumsq3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  51.7k|#define BUF_STRIDE (384 + 16)
  ------------------
 1047|  51.7k|        sum3_rows[i] = &sum3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  51.7k|#define BUF_STRIDE (384 + 16)
  ------------------
 1048|  51.7k|    }
 1049|       |
 1050|  17.2k|    ALIGN_STK_16(int32_t, A5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1051|  17.2k|    ALIGN_STK_16(coef, B5_buf, BUF_STRIDE * 2 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1052|  17.2k|    int32_t *A5_ptrs[2];
 1053|  17.2k|    coef *B5_ptrs[2];
 1054|  51.7k|    for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1054:21): [True: 34.4k, False: 17.2k]
  ------------------
 1055|  34.4k|        A5_ptrs[i] = &A5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  34.4k|#define BUF_STRIDE (384 + 16)
  ------------------
 1056|  34.4k|        B5_ptrs[i] = &B5_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  34.4k|#define BUF_STRIDE (384 + 16)
  ------------------
 1057|  34.4k|    }
 1058|  17.2k|    ALIGN_STK_16(int32_t, A3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1059|  17.2k|    ALIGN_STK_16(coef, B3_buf, BUF_STRIDE * 4 + 16,);
  ------------------
  |  |  100|  17.2k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|  17.2k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
 1060|  17.2k|    int32_t *A3_ptrs[4];
 1061|  17.2k|    coef *B3_ptrs[4];
 1062|  86.2k|    for (int i = 0; i < 4; i++) {
  ------------------
  |  Branch (1062:21): [True: 68.9k, False: 17.2k]
  ------------------
 1063|  68.9k|        A3_ptrs[i] = &A3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  68.9k|#define BUF_STRIDE (384 + 16)
  ------------------
 1064|  68.9k|        B3_ptrs[i] = &B3_buf[i * BUF_STRIDE];
  ------------------
  |  |  685|  68.9k|#define BUF_STRIDE (384 + 16)
  ------------------
 1065|  68.9k|    }
 1066|  17.2k|    const pixel *src = dst;
 1067|  17.2k|    const pixel *lpf_bottom = lpf + 6*PXSTRIDE(stride);
  ------------------
  |  |   53|  17.2k|#define PXSTRIDE(x) (x)
  ------------------
 1068|       |
 1069|  17.2k|    if (edges & LR_HAVE_TOP) {
  ------------------
  |  Branch (1069:9): [True: 12.0k, False: 5.22k]
  ------------------
 1070|  12.0k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1071|  12.0k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1072|  12.0k|        sumsq5_ptrs[2] = sumsq5_rows[1];
 1073|  12.0k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1074|  12.0k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1075|  12.0k|        sum5_ptrs[0] = sum5_rows[0];
 1076|  12.0k|        sum5_ptrs[1] = sum5_rows[0];
 1077|  12.0k|        sum5_ptrs[2] = sum5_rows[1];
 1078|  12.0k|        sum5_ptrs[3] = sum5_rows[2];
 1079|  12.0k|        sum5_ptrs[4] = sum5_rows[3];
 1080|       |
 1081|  12.0k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1082|  12.0k|        sumsq3_ptrs[1] = sumsq3_rows[1];
 1083|  12.0k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1084|  12.0k|        sum3_ptrs[0] = sum3_rows[0];
 1085|  12.0k|        sum3_ptrs[1] = sum3_rows[1];
 1086|  12.0k|        sum3_ptrs[2] = sum3_rows[2];
 1087|       |
 1088|  12.0k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1089|  12.0k|                        sumsq5_rows[0], sum5_rows[0],
 1090|  12.0k|                        NULL, lpf, w, edges);
 1091|  12.0k|        lpf += PXSTRIDE(stride);
  ------------------
  |  |   53|  12.0k|#define PXSTRIDE(x) (x)
  ------------------
 1092|  12.0k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1093|  12.0k|                        sumsq5_rows[1], sum5_rows[1],
 1094|  12.0k|                        NULL, lpf, w, edges);
 1095|       |
 1096|  12.0k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1097|  12.0k|                        sumsq5_rows[2], sum5_rows[2],
 1098|  12.0k|                        left, src, w, edges);
 1099|  12.0k|        left++;
 1100|  12.0k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  12.0k|#define PXSTRIDE(x) (x)
  ------------------
 1101|       |
 1102|  12.0k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1103|  12.0k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  12.0k|#define BITDEPTH_MAX 0xff
  ------------------
 1104|  12.0k|        rotate(A3_ptrs, B3_ptrs, 4);
 1105|       |
 1106|  12.0k|        if (--h <= 0)
  ------------------
  |  Branch (1106:13): [True: 809, False: 11.2k]
  ------------------
 1107|    809|            goto vert_1;
 1108|       |
 1109|  11.2k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1110|  11.2k|                        sumsq5_rows[3], sum5_rows[3],
 1111|  11.2k|                        left, src, w, edges);
 1112|  11.2k|        left++;
 1113|  11.2k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  11.2k|#define PXSTRIDE(x) (x)
  ------------------
 1114|  11.2k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1115|  11.2k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  11.2k|#define BITDEPTH_MAX 0xff
  ------------------
 1116|  11.2k|        rotate(A5_ptrs, B5_ptrs, 2);
 1117|  11.2k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1118|  11.2k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  11.2k|#define BITDEPTH_MAX 0xff
  ------------------
 1119|  11.2k|        rotate(A3_ptrs, B3_ptrs, 4);
 1120|       |
 1121|  11.2k|        if (--h <= 0)
  ------------------
  |  Branch (1121:13): [True: 1.11k, False: 10.1k]
  ------------------
 1122|  1.11k|            goto vert_2;
 1123|       |
 1124|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1125|       |        // one of them to point at the previously unused rows[4].
 1126|  10.1k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1127|  10.1k|        sum5_ptrs[3] = sum5_rows[4];
 1128|  10.1k|    } else {
 1129|  5.22k|        sumsq5_ptrs[0] = sumsq5_rows[0];
 1130|  5.22k|        sumsq5_ptrs[1] = sumsq5_rows[0];
 1131|  5.22k|        sumsq5_ptrs[2] = sumsq5_rows[0];
 1132|  5.22k|        sumsq5_ptrs[3] = sumsq5_rows[0];
 1133|  5.22k|        sumsq5_ptrs[4] = sumsq5_rows[0];
 1134|  5.22k|        sum5_ptrs[0] = sum5_rows[0];
 1135|  5.22k|        sum5_ptrs[1] = sum5_rows[0];
 1136|  5.22k|        sum5_ptrs[2] = sum5_rows[0];
 1137|  5.22k|        sum5_ptrs[3] = sum5_rows[0];
 1138|  5.22k|        sum5_ptrs[4] = sum5_rows[0];
 1139|       |
 1140|  5.22k|        sumsq3_ptrs[0] = sumsq3_rows[0];
 1141|  5.22k|        sumsq3_ptrs[1] = sumsq3_rows[0];
 1142|  5.22k|        sumsq3_ptrs[2] = sumsq3_rows[0];
 1143|  5.22k|        sum3_ptrs[0] = sum3_rows[0];
 1144|  5.22k|        sum3_ptrs[1] = sum3_rows[0];
 1145|  5.22k|        sum3_ptrs[2] = sum3_rows[0];
 1146|       |
 1147|  5.22k|        sgr_box35_row_h(sumsq3_rows[0], sum3_rows[0],
 1148|  5.22k|                        sumsq5_rows[0], sum5_rows[0],
 1149|  5.22k|                        left, src, w, edges);
 1150|  5.22k|        left++;
 1151|  5.22k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  5.22k|#define PXSTRIDE(x) (x)
  ------------------
 1152|       |
 1153|  5.22k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1154|  5.22k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  5.22k|#define BITDEPTH_MAX 0xff
  ------------------
 1155|  5.22k|        rotate(A3_ptrs, B3_ptrs, 4);
 1156|       |
 1157|  5.22k|        if (--h <= 0)
  ------------------
  |  Branch (1157:13): [True: 697, False: 4.52k]
  ------------------
 1158|    697|            goto vert_1;
 1159|       |
 1160|  4.52k|        sumsq5_ptrs[4] = sumsq5_rows[1];
 1161|  4.52k|        sum5_ptrs[4] = sum5_rows[1];
 1162|       |
 1163|  4.52k|        sumsq3_ptrs[2] = sumsq3_rows[1];
 1164|  4.52k|        sum3_ptrs[2] = sum3_rows[1];
 1165|       |
 1166|  4.52k|        sgr_box35_row_h(sumsq3_rows[1], sum3_rows[1],
 1167|  4.52k|                        sumsq5_rows[1], sum5_rows[1],
 1168|  4.52k|                        left, src, w, edges);
 1169|  4.52k|        left++;
 1170|  4.52k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.52k|#define PXSTRIDE(x) (x)
  ------------------
 1171|       |
 1172|  4.52k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1173|  4.52k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.52k|#define BITDEPTH_MAX 0xff
  ------------------
 1174|  4.52k|        rotate(A5_ptrs, B5_ptrs, 2);
 1175|  4.52k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1176|  4.52k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.52k|#define BITDEPTH_MAX 0xff
  ------------------
 1177|  4.52k|        rotate(A3_ptrs, B3_ptrs, 4);
 1178|       |
 1179|  4.52k|        if (--h <= 0)
  ------------------
  |  Branch (1179:13): [True: 200, False: 4.32k]
  ------------------
 1180|    200|            goto vert_2;
 1181|       |
 1182|  4.32k|        sumsq5_ptrs[3] = sumsq5_rows[2];
 1183|  4.32k|        sumsq5_ptrs[4] = sumsq5_rows[3];
 1184|  4.32k|        sum5_ptrs[3] = sum5_rows[2];
 1185|  4.32k|        sum5_ptrs[4] = sum5_rows[3];
 1186|       |
 1187|  4.32k|        sumsq3_ptrs[2] = sumsq3_rows[2];
 1188|  4.32k|        sum3_ptrs[2] = sum3_rows[2];
 1189|       |
 1190|  4.32k|        sgr_box35_row_h(sumsq3_rows[2], sum3_rows[2],
 1191|  4.32k|                        sumsq5_rows[2], sum5_rows[2],
 1192|  4.32k|                        left, src, w, edges);
 1193|  4.32k|        left++;
 1194|  4.32k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.32k|#define PXSTRIDE(x) (x)
  ------------------
 1195|       |
 1196|  4.32k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1197|  4.32k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.32k|#define BITDEPTH_MAX 0xff
  ------------------
 1198|  4.32k|        rotate(A3_ptrs, B3_ptrs, 4);
 1199|       |
 1200|  4.32k|        if (--h <= 0)
  ------------------
  |  Branch (1200:13): [True: 154, False: 4.16k]
  ------------------
 1201|    154|            goto odd;
 1202|       |
 1203|  4.16k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1204|  4.16k|                        sumsq5_rows[3], sum5_rows[3],
 1205|  4.16k|                        left, src, w, edges);
 1206|  4.16k|        left++;
 1207|  4.16k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|  4.16k|#define PXSTRIDE(x) (x)
  ------------------
 1208|       |
 1209|  4.16k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1210|  4.16k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.16k|#define BITDEPTH_MAX 0xff
  ------------------
 1211|  4.16k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1212|  4.16k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  4.16k|#define BITDEPTH_MAX 0xff
  ------------------
 1213|  4.16k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1214|  4.16k|                       w, 2, params->sgr.w0, params->sgr.w1
 1215|  4.16k|                       HIGHBD_TAIL_SUFFIX);
 1216|       |
 1217|  4.16k|        if (--h <= 0)
  ------------------
  |  Branch (1217:13): [True: 125, False: 4.04k]
  ------------------
 1218|    125|            goto vert_2;
 1219|       |
 1220|       |        // ptrs are rotated by 2; both [3] and [4] now point at rows[0]; set
 1221|       |        // one of them to point at the previously unused rows[4].
 1222|  4.04k|        sumsq5_ptrs[3] = sumsq5_rows[4];
 1223|  4.04k|        sum5_ptrs[3] = sum5_rows[4];
 1224|  4.04k|    }
 1225|       |
 1226|   394k|    do {
 1227|   394k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1228|   394k|                        sumsq5_ptrs[3], sum5_ptrs[3],
 1229|   394k|                        left, src, w, edges);
 1230|   394k|        left++;
 1231|   394k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|   394k|#define PXSTRIDE(x) (x)
  ------------------
 1232|       |
 1233|   394k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1234|   394k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|   394k|#define BITDEPTH_MAX 0xff
  ------------------
 1235|   394k|        rotate(A3_ptrs, B3_ptrs, 4);
 1236|       |
 1237|   394k|        if (--h <= 0)
  ------------------
  |  Branch (1237:13): [True: 1.43k, False: 393k]
  ------------------
 1238|  1.43k|            goto odd;
 1239|       |
 1240|   393k|        sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1241|   393k|                        sumsq5_ptrs[4], sum5_ptrs[4],
 1242|   393k|                        left, src, w, edges);
 1243|   393k|        left++;
 1244|   393k|        src += PXSTRIDE(stride);
  ------------------
  |  |   53|   393k|#define PXSTRIDE(x) (x)
  ------------------
 1245|       |
 1246|   393k|        sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1247|   393k|                      w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|   393k|#define BITDEPTH_MAX 0xff
  ------------------
 1248|   393k|        sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1249|   393k|                      w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|   393k|#define BITDEPTH_MAX 0xff
  ------------------
 1250|   393k|        sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1251|   393k|                       w, 2, params->sgr.w0, params->sgr.w1
 1252|   393k|                       HIGHBD_TAIL_SUFFIX);
 1253|   393k|    } while (--h > 0);
  ------------------
  |  Branch (1253:14): [True: 380k, False: 12.7k]
  ------------------
 1254|       |
 1255|  12.7k|    if (!(edges & LR_HAVE_BOTTOM))
  ------------------
  |  Branch (1255:9): [True: 707, False: 12.0k]
  ------------------
 1256|    707|        goto vert_2;
 1257|       |
 1258|  12.0k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1259|  12.0k|                    sumsq5_ptrs[3], sum5_ptrs[3],
 1260|  12.0k|                    NULL, lpf_bottom, w, edges);
 1261|  12.0k|    lpf_bottom += PXSTRIDE(stride);
  ------------------
  |  |   53|  12.0k|#define PXSTRIDE(x) (x)
  ------------------
 1262|  12.0k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1263|  12.0k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  12.0k|#define BITDEPTH_MAX 0xff
  ------------------
 1264|  12.0k|    rotate(A3_ptrs, B3_ptrs, 4);
 1265|       |
 1266|  12.0k|    sgr_box35_row_h(sumsq3_ptrs[2], sum3_ptrs[2],
 1267|  12.0k|                    sumsq5_ptrs[4], sum5_ptrs[4],
 1268|  12.0k|                    NULL, lpf_bottom, w, edges);
 1269|       |
 1270|  14.1k|output_2:
 1271|  14.1k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1272|  14.1k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  14.1k|#define BITDEPTH_MAX 0xff
  ------------------
 1273|  14.1k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1274|  14.1k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  14.1k|#define BITDEPTH_MAX 0xff
  ------------------
 1275|  14.1k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1276|  14.1k|                   w, 2, params->sgr.w0, params->sgr.w1
 1277|  14.1k|                   HIGHBD_TAIL_SUFFIX);
 1278|  14.1k|    return;
 1279|       |
 1280|  2.14k|vert_2:
 1281|       |    // Duplicate the last row twice more
 1282|  2.14k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1283|  2.14k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1284|  2.14k|    sum5_ptrs[3] = sum5_ptrs[2];
 1285|  2.14k|    sum5_ptrs[4] = sum5_ptrs[2];
 1286|       |
 1287|  2.14k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1288|  2.14k|    sum3_ptrs[2] = sum3_ptrs[1];
 1289|  2.14k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1290|  2.14k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  2.14k|#define BITDEPTH_MAX 0xff
  ------------------
 1291|  2.14k|    rotate(A3_ptrs, B3_ptrs, 4);
 1292|       |
 1293|  2.14k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1294|  2.14k|    sum3_ptrs[2] = sum3_ptrs[1];
 1295|       |
 1296|  2.14k|    goto output_2;
 1297|       |
 1298|  1.58k|odd:
 1299|       |    // Copy the last row as padding once
 1300|  1.58k|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1301|  1.58k|    sum5_ptrs[4] = sum5_ptrs[3];
 1302|       |
 1303|  1.58k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1304|  1.58k|    sum3_ptrs[2] = sum3_ptrs[1];
 1305|       |
 1306|  1.58k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1307|  1.58k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.58k|#define BITDEPTH_MAX 0xff
  ------------------
 1308|  1.58k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1309|  1.58k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.58k|#define BITDEPTH_MAX 0xff
  ------------------
 1310|  1.58k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1311|  1.58k|                   w, 2, params->sgr.w0, params->sgr.w1
 1312|  1.58k|                   HIGHBD_TAIL_SUFFIX);
 1313|       |
 1314|  3.09k|output_1:
 1315|       |    // Duplicate the last row twice more
 1316|  3.09k|    sumsq5_ptrs[3] = sumsq5_ptrs[2];
 1317|  3.09k|    sumsq5_ptrs[4] = sumsq5_ptrs[2];
 1318|  3.09k|    sum5_ptrs[3] = sum5_ptrs[2];
 1319|  3.09k|    sum5_ptrs[4] = sum5_ptrs[2];
 1320|       |
 1321|  3.09k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1322|  3.09k|    sum3_ptrs[2] = sum3_ptrs[1];
 1323|       |
 1324|  3.09k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1325|  3.09k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.09k|#define BITDEPTH_MAX 0xff
  ------------------
 1326|  3.09k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1327|  3.09k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  3.09k|#define BITDEPTH_MAX 0xff
  ------------------
 1328|  3.09k|    rotate(A3_ptrs, B3_ptrs, 4);
 1329|       |    // Output only one row
 1330|  3.09k|    sgr_finish_mix(&dst, stride, A5_ptrs, B5_ptrs, A3_ptrs, B3_ptrs,
 1331|  3.09k|                   w, 1, params->sgr.w0, params->sgr.w1
 1332|  3.09k|                   HIGHBD_TAIL_SUFFIX);
 1333|  3.09k|    return;
 1334|       |
 1335|  1.50k|vert_1:
 1336|       |    // Copy the last row as padding once
 1337|  1.50k|    sumsq5_ptrs[4] = sumsq5_ptrs[3];
 1338|  1.50k|    sum5_ptrs[4] = sum5_ptrs[3];
 1339|       |
 1340|  1.50k|    sumsq3_ptrs[2] = sumsq3_ptrs[1];
 1341|  1.50k|    sum3_ptrs[2] = sum3_ptrs[1];
 1342|       |
 1343|  1.50k|    sgr_box5_vert(sumsq5_ptrs, sum5_ptrs, A5_ptrs[1], B5_ptrs[1],
 1344|  1.50k|                  w, params->sgr.s0, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.50k|#define BITDEPTH_MAX 0xff
  ------------------
 1345|  1.50k|    rotate(A5_ptrs, B5_ptrs, 2);
 1346|  1.50k|    sgr_box3_vert(sumsq3_ptrs, sum3_ptrs, A3_ptrs[3], B3_ptrs[3],
 1347|  1.50k|                  w, params->sgr.s1, BITDEPTH_MAX);
  ------------------
  |  |   59|  1.50k|#define BITDEPTH_MAX 0xff
  ------------------
 1348|  1.50k|    rotate(A3_ptrs, B3_ptrs, 4);
 1349|       |
 1350|  1.50k|    goto output_1;
 1351|  1.58k|}
looprestoration_tmpl.c:sgr_box35_row_h:
  464|   873k|{
  465|   873k|    sgr_box3_row_h(sumsq3, sum3, left, src, w, edges);
  466|   873k|    sgr_box5_row_h(sumsq5, sum5, left, src, w, edges);
  467|   873k|}
looprestoration_tmpl.c:sgr_finish_mix:
  663|   415k|{
  664|   415k|    ALIGN_STK_16(coef, tmp5, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|   415k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   415k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  665|   415k|    ALIGN_STK_16(coef, tmp3, 2*FILTER_OUT_STRIDE,);
  ------------------
  |  |  100|   415k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   415k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  666|       |
  667|   415k|    sgr_finish_filter2(tmp5, *dst, stride, A5_ptrs, B5_ptrs, w, h);
  668|   415k|    sgr_finish_filter_row1(tmp3, *dst, A3_ptrs, B3_ptrs, w);
  669|   415k|    if (h > 1)
  ------------------
  |  Branch (669:9): [True: 412k, False: 2.62k]
  ------------------
  670|   412k|        sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |  572|   412k|#define FILTER_OUT_STRIDE (384)
  ------------------
                      sgr_finish_filter_row1(tmp3 + FILTER_OUT_STRIDE, *dst + PXSTRIDE(stride),
  ------------------
  |  |   53|   412k|#define PXSTRIDE(x) (x)
  ------------------
  671|   412k|                               &A3_ptrs[1], &B3_ptrs[1], w);
  672|   415k|    sgr_weighted2(*dst, stride, tmp5, tmp3, w, h, w0, w1 HIGHBD_TAIL_SUFFIX);
  673|   415k|    *dst += h*PXSTRIDE(stride);
  ------------------
  |  |   53|   415k|#define PXSTRIDE(x) (x)
  ------------------
  674|   415k|    rotate(A5_ptrs, B5_ptrs, 2);
  675|   415k|    rotate(A3_ptrs, B3_ptrs, 4);
  676|   415k|}
looprestoration_tmpl.c:sgr_weighted2:
  616|   413k|{
  617|  1.23M|    for (int j = 0; j < h; j++) {
  ------------------
  |  Branch (617:21): [True: 825k, False: 413k]
  ------------------
  618|  73.8M|        for (int i = 0; i < w; i++) {
  ------------------
  |  Branch (618:25): [True: 73.0M, False: 825k]
  ------------------
  619|  73.0M|            const int v = w0 * t1[i] + w1 * t2[i];
  620|  73.0M|            dst[i] = iclip_pixel(dst[i] + ((v + (1 << 10)) >> 11));
  ------------------
  |  |   49|  73.0M|#define iclip_pixel iclip_u8
  ------------------
  621|  73.0M|        }
  622|   825k|        dst += PXSTRIDE(dst_stride);
  ------------------
  |  |   53|   825k|#define PXSTRIDE(x) (x)
  ------------------
  623|   825k|        t1 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   825k|#define FILTER_OUT_STRIDE (384)
  ------------------
  624|   825k|        t2 += FILTER_OUT_STRIDE;
  ------------------
  |  |  572|   825k|#define FILTER_OUT_STRIDE (384)
  ------------------
  625|   825k|    }
  626|   413k|}
dav1d_loop_restoration_dsp_init_16bpc:
 1367|  5.72k|{
 1368|  5.72k|    c->wiener[0] = c->wiener[1] = wiener_c;
 1369|  5.72k|    c->sgr[0] = sgr_5x5_c;
 1370|  5.72k|    c->sgr[1] = sgr_3x3_c;
 1371|  5.72k|    c->sgr[2] = sgr_mix_c;
 1372|       |
 1373|  5.72k|#if HAVE_ASM
 1374|       |#if ARCH_AARCH64 || ARCH_ARM
 1375|       |    loop_restoration_dsp_init_arm(c, bpc);
 1376|       |#elif ARCH_LOONGARCH64
 1377|       |    loop_restoration_dsp_init_loongarch(c, bpc);
 1378|       |#elif ARCH_PPC64LE
 1379|       |    loop_restoration_dsp_init_ppc(c, bpc);
 1380|       |#elif ARCH_X86
 1381|       |    loop_restoration_dsp_init_x86(c, bpc);
 1382|  5.72k|#endif
 1383|  5.72k|#endif
 1384|  5.72k|}

dav1d_lr_sbrow_8bpc:
  170|   130k|{
  171|   130k|    const int offset_y = 8 * !!sby;
  172|   130k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|   130k|    const int restore_planes = f->lf.restore_planes;
  174|   130k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|   130k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 127k, False: 3.09k]
  ------------------
  177|   127k|        const int h = f->sr_cur.p.p.h;
  178|   127k|        const int w = f->sr_cur.p.p.w;
  179|   127k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|   127k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|   127k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|   127k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  ------------------
  |  |   53|   127k|#define PXSTRIDE(x) (x)
  ------------------
  183|   127k|                 h, row_h, 0);
  184|   127k|    }
  185|   130k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 6.68k, False: 123k]
  ------------------
  186|  6.68k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  6.68k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  6.68k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  6.68k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  6.68k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  6.68k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  6.68k|        const int offset_uv = offset_y >> ss_ver;
  193|  6.68k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  6.68k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 3.95k, False: 2.73k]
  ------------------
  195|  3.95k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  3.95k|#define PXSTRIDE(x) (x)
  ------------------
  196|  3.95k|                     w, h, row_h, 1);
  197|       |
  198|  6.68k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 5.39k, False: 1.28k]
  ------------------
  199|  5.39k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  ------------------
  |  |   53|  5.39k|#define PXSTRIDE(x) (x)
  ------------------
  200|  5.39k|                     w, h, row_h, 2);
  201|  6.68k|    }
  202|   130k|}
lr_apply_tmpl.c:lr_sbrow:
  109|   206k|{
  110|   206k|    const int chroma = !!plane;
  111|   206k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
  112|   206k|    const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
  113|   206k|    const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
  114|       |
  115|   206k|    const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!plane];
  116|   206k|    const int unit_size = 1 << unit_size_log2;
  117|   206k|    const int half_unit_size = unit_size >> 1;
  118|   206k|    const int max_unit_size = unit_size + half_unit_size;
  119|       |
  120|       |    // Y coordinate of the sbrow (y is 8 luma pixel rows above row_y)
  121|   206k|    const int row_y = y + ((8 >> ss_ver) * !!y);
  122|       |
  123|       |    // FIXME This is an ugly hack to lookup the proper AV1Filter unit for
  124|       |    // chroma planes. Question: For Multithreaded decoding, is it better
  125|       |    // to store the chroma LR information with collocated Luma information?
  126|       |    // In other words. For a chroma restoration unit locate at 128,128 and
  127|       |    // with a 4:2:0 chroma subsampling, do we store the filter information at
  128|       |    // the AV1Filter unit located at (128,128) or (256,256)
  129|       |    // TODO Support chroma subsampling.
  130|   206k|    const int shift_hor = 7 - ss_hor;
  131|       |
  132|       |    /* maximum sbrow height is 128 + 8 rows offset */
  133|   206k|    ALIGN_STK_16(pixel, pre_lr_border, 2, [128 + 8][4]);
  ------------------
  |  |  100|   206k|    ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
  |  |  ------------------
  |  |  |  |   86|   206k|    line __attribute__((aligned(align)))
  |  |  ------------------
  ------------------
  134|   206k|    const Av1RestorationUnit *lr[2];
  135|       |
  136|   206k|    enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) | LR_HAVE_RIGHT;
  ------------------
  |  Branch (136:31): [True: 187k, False: 18.8k]
  ------------------
  137|       |
  138|   206k|    int aligned_unit_pos = row_y & ~(unit_size - 1);
  139|   206k|    if (aligned_unit_pos && aligned_unit_pos + half_unit_size > h)
  ------------------
  |  Branch (139:9): [True: 179k, False: 27.3k]
  |  Branch (139:29): [True: 5.43k, False: 173k]
  ------------------
  140|  5.43k|        aligned_unit_pos -= unit_size;
  141|   206k|    aligned_unit_pos <<= ss_ver;
  142|   206k|    const int sb_idx = (aligned_unit_pos >> 7) * f->sr_sb128w;
  143|   206k|    const int unit_idx = ((aligned_unit_pos >> 6) & 1) << 1;
  144|   206k|    lr[0] = &f->lf.lr_mask[sb_idx].lr[plane][unit_idx];
  145|   206k|    int restore = lr[0]->type != DAV1D_RESTORATION_NONE;
  146|   206k|    int x = 0, bit = 0;
  147|   255k|    for (; x + max_unit_size <= w; p += unit_size, edges |= LR_HAVE_LEFT, bit ^= 1) {
  ------------------
  |  Branch (147:12): [True: 49.2k, False: 206k]
  ------------------
  148|  49.2k|        const int next_x = x + unit_size;
  149|  49.2k|        const int next_u_idx = unit_idx + ((next_x >> (shift_hor - 1)) & 1);
  150|  49.2k|        lr[!bit] =
  151|  49.2k|            &f->lf.lr_mask[sb_idx + (next_x >> shift_hor)].lr[plane][next_u_idx];
  152|  49.2k|        const int restore_next = lr[!bit]->type != DAV1D_RESTORATION_NONE;
  153|  49.2k|        if (restore_next)
  ------------------
  |  Branch (153:13): [True: 30.4k, False: 18.7k]
  ------------------
  154|  30.4k|            backup4xU(pre_lr_border[bit], p + unit_size - 4, p_stride, row_h - y);
  155|  49.2k|        if (restore)
  ------------------
  |  Branch (155:13): [True: 30.6k, False: 18.5k]
  ------------------
  156|  30.6k|            lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_size, row_h,
  157|  30.6k|                      lr[bit], edges);
  158|  49.2k|        x = next_x;
  159|  49.2k|        restore = restore_next;
  160|  49.2k|    }
  161|   206k|    if (restore) {
  ------------------
  |  Branch (161:9): [True: 25.6k, False: 180k]
  ------------------
  162|  25.6k|        edges &= ~LR_HAVE_RIGHT;
  163|  25.6k|        const int unit_w = w - x;
  164|  25.6k|        lr_stripe(f, p, pre_lr_border[!bit], x, y, plane, unit_w, row_h, lr[bit], edges);
  165|  25.6k|    }
  166|   206k|}
lr_apply_tmpl.c:backup4xU:
  102|  30.4k|{
  103|  2.80M|    for (; u > 0; u--, dst++, src += PXSTRIDE(src_stride))
  ------------------
  |  |   53|  2.77M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (103:12): [True: 2.77M, False: 30.4k]
  ------------------
  104|  2.77M|        pixel_copy(dst, src, 4);
  ------------------
  |  |   47|  2.77M|#define pixel_copy memcpy
  ------------------
  105|  30.4k|}
lr_apply_tmpl.c:lr_stripe:
   40|  56.3k|{
   41|  56.3k|    const Dav1dDSPContext *const dsp = f->dsp;
   42|  56.3k|    const int chroma = !!plane;
   43|  56.3k|    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
   44|  56.3k|    const ptrdiff_t stride = f->sr_cur.p.stride[chroma];
   45|  56.3k|    const int sby = (y + (y ? 8 << ss_ver : 0)) >> (6 - ss_ver + f->seq_hdr->sb128);
  ------------------
  |  Branch (45:27): [True: 40.4k, False: 15.8k]
  ------------------
   46|  56.3k|    const int have_tt = f->c->n_tc > 1;
   47|  56.3k|    const pixel *lpf = f->lf.lr_lpf_line[plane] +
   48|  56.3k|        have_tt * (sby * (4 << f->seq_hdr->sb128) - 4) * PXSTRIDE(stride) + x;
  ------------------
  |  |   53|  56.3k|#define PXSTRIDE(x) (x)
  ------------------
   49|       |
   50|       |    // The first stripe of the frame is shorter by 8 luma pixel rows.
   51|  56.3k|    int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);
   52|       |
   53|  56.3k|    looprestorationfilter_fn lr_fn;
   54|  56.3k|    LooprestorationParams params;
   55|  56.3k|    if (lr->type == DAV1D_RESTORATION_WIENER) {
  ------------------
  |  Branch (55:9): [True: 26.7k, False: 29.5k]
  ------------------
   56|  26.7k|        int16_t (*const filter)[8] = params.filter;
   57|  26.7k|        filter[0][0] = filter[0][6] = lr->filter_h[0];
   58|  26.7k|        filter[0][1] = filter[0][5] = lr->filter_h[1];
   59|  26.7k|        filter[0][2] = filter[0][4] = lr->filter_h[2];
   60|  26.7k|        filter[0][3] = -(filter[0][0] + filter[0][1] + filter[0][2]) * 2;
   61|       |#if BITDEPTH != 8
   62|       |        /* For 8-bit SIMD it's beneficial to handle the +128 separately
   63|       |         * in order to avoid overflows. */
   64|       |        filter[0][3] += 128;
   65|       |#endif
   66|       |
   67|  26.7k|        filter[1][0] = filter[1][6] = lr->filter_v[0];
   68|  26.7k|        filter[1][1] = filter[1][5] = lr->filter_v[1];
   69|  26.7k|        filter[1][2] = filter[1][4] = lr->filter_v[2];
   70|  26.7k|        filter[1][3] = 128 - (filter[1][0] + filter[1][1] + filter[1][2]) * 2;
   71|       |
   72|  26.7k|        lr_fn = dsp->lr.wiener[!(filter[0][0] | filter[1][0])];
   73|  29.5k|    } else {
   74|  29.5k|        assert(lr->type >= DAV1D_RESTORATION_SGRPROJ);
  ------------------
  |  Branch (74:9): [True: 29.5k, False: 18.4E]
  ------------------
   75|  29.5k|        const int sgr_idx = lr->type - DAV1D_RESTORATION_SGRPROJ;
   76|  29.5k|        const uint16_t *const sgr_params = dav1d_sgr_params[sgr_idx];
   77|  29.5k|        params.sgr.s0 = sgr_params[0];
   78|  29.5k|        params.sgr.s1 = sgr_params[1];
   79|  29.5k|        params.sgr.w0 = lr->sgr_weights[0];
   80|  29.5k|        params.sgr.w1 = 128 - (lr->sgr_weights[0] + lr->sgr_weights[1]);
   81|       |
   82|  29.5k|        lr_fn = dsp->lr.sgr[!!sgr_params[0] + !!sgr_params[1] * 2 - 1];
   83|  29.5k|    }
   84|       |
   85|  98.7k|    while (y + stripe_h <= row_h) {
  ------------------
  |  Branch (85:12): [True: 98.7k, False: 2]
  ------------------
   86|       |        // Change the HAVE_BOTTOM bit in edges to (sby + 1 != f->sbh || y + stripe_h != row_h)
   87|  98.7k|        edges ^= (-(sby + 1 != f->sbh || y + stripe_h != row_h) ^ edges) & LR_HAVE_BOTTOM;
  ------------------
  |  Branch (87:21): [True: 70.2k, False: 28.5k]
  |  Branch (87:42): [True: 12.9k, False: 15.6k]
  ------------------
   88|  98.7k|        lr_fn(p, stride, left, lpf, unit_w, stripe_h, &params, edges HIGHBD_CALL_SUFFIX);
   89|       |
   90|  98.7k|        left += stripe_h;
   91|  98.7k|        y += stripe_h;
   92|  98.7k|        p += stripe_h * PXSTRIDE(stride);
  ------------------
  |  |   53|  98.7k|#define PXSTRIDE(x) (x)
  ------------------
   93|  98.7k|        edges |= LR_HAVE_TOP;
   94|  98.7k|        stripe_h = imin(64 >> ss_ver, row_h - y);
   95|  98.7k|        if (stripe_h == 0) break;
  ------------------
  |  Branch (95:13): [True: 56.3k, False: 42.4k]
  ------------------
   96|  42.4k|        lpf += 4 * PXSTRIDE(stride);
  ------------------
  |  |   53|  42.4k|#define PXSTRIDE(x) (x)
  ------------------
   97|  42.4k|    }
   98|  56.3k|}
dav1d_lr_sbrow_16bpc:
  170|  54.5k|{
  171|  54.5k|    const int offset_y = 8 * !!sby;
  172|  54.5k|    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
  173|  54.5k|    const int restore_planes = f->lf.restore_planes;
  174|  54.5k|    const int not_last = sby + 1 < f->sbh;
  175|       |
  176|  54.5k|    if (restore_planes & LR_RESTORE_Y) {
  ------------------
  |  Branch (176:9): [True: 53.0k, False: 1.42k]
  ------------------
  177|  53.0k|        const int h = f->sr_cur.p.p.h;
  178|  53.0k|        const int w = f->sr_cur.p.p.w;
  179|  53.0k|        const int next_row_y = (sby + 1) << (6 + f->seq_hdr->sb128);
  180|  53.0k|        const int row_h = imin(next_row_y - 8 * not_last, h);
  181|  53.0k|        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset_y;
  182|  53.0k|        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
  183|  53.0k|                 h, row_h, 0);
  184|  53.0k|    }
  185|  54.5k|    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
  ------------------
  |  Branch (185:9): [True: 9.99k, False: 44.5k]
  ------------------
  186|  9.99k|        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  187|  9.99k|        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  188|  9.99k|        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
  189|  9.99k|        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
  190|  9.99k|        const int next_row_y = (sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128);
  191|  9.99k|        const int row_h = imin(next_row_y - (8 >> ss_ver) * not_last, h);
  192|  9.99k|        const int offset_uv = offset_y >> ss_ver;
  193|  9.99k|        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
  194|  9.99k|        if (restore_planes & LR_RESTORE_U)
  ------------------
  |  Branch (194:13): [True: 7.95k, False: 2.04k]
  ------------------
  195|  7.95k|            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  196|  7.95k|                     w, h, row_h, 1);
  197|       |
  198|  9.99k|        if (restore_planes & LR_RESTORE_V)
  ------------------
  |  Branch (198:13): [True: 8.93k, False: 1.06k]
  ------------------
  199|  8.93k|            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
  200|  8.93k|                     w, h, row_h, 2);
  201|  9.99k|    }
  202|  54.5k|}

dav1d_mc_dsp_init_8bpc:
  960|  3.49k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  3.49k|#define init_mc_fns(type, name) do { \
  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  3.49k|} while (0)
  967|       |
  968|  3.49k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  969|  3.49k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  970|  3.49k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  971|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  972|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  973|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  974|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  975|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  976|  3.49k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  977|  3.49k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  3.49k|#define init_mc_fns(type, name) do { \
  |  |  962|  3.49k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  3.49k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  3.49k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  3.49k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  3.49k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 3.49k]
  |  |  ------------------
  ------------------
  978|       |
  979|  3.49k|    c->avg      = avg_c;
  980|  3.49k|    c->w_avg    = w_avg_c;
  981|  3.49k|    c->mask     = mask_c;
  982|  3.49k|    c->blend    = blend_c;
  983|  3.49k|    c->blend_v  = blend_v_c;
  984|  3.49k|    c->blend_h  = blend_h_c;
  985|  3.49k|    c->w_mask[0] = w_mask_444_c;
  986|  3.49k|    c->w_mask[1] = w_mask_422_c;
  987|  3.49k|    c->w_mask[2] = w_mask_420_c;
  988|  3.49k|    c->warp8x8  = warp_affine_8x8_c;
  989|  3.49k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  3.49k|    c->emu_edge = emu_edge_c;
  991|  3.49k|    c->resize   = resize_c;
  992|       |
  993|  3.49k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  3.49k|#endif
 1005|  3.49k|#endif
 1006|  3.49k|}
dav1d_mc_dsp_init_16bpc:
  960|  5.72k|COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
  961|  5.72k|#define init_mc_fns(type, name) do { \
  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  966|  5.72k|} while (0)
  967|       |
  968|  5.72k|    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  969|  5.72k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  970|  5.72k|    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  971|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  972|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  973|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  974|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  975|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  976|  5.72k|    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  977|  5.72k|    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
  ------------------
  |  |  961|  5.72k|#define init_mc_fns(type, name) do { \
  |  |  962|  5.72k|    c->mc        [type] = put_##name##_c; \
  |  |  963|  5.72k|    c->mc_scaled [type] = put_##name##_scaled_c; \
  |  |  964|  5.72k|    c->mct       [type] = prep_##name##_c; \
  |  |  965|  5.72k|    c->mct_scaled[type] = prep_##name##_scaled_c; \
  |  |  966|  5.72k|} while (0)
  |  |  ------------------
  |  |  |  Branch (966:10): [Folded, False: 5.72k]
  |  |  ------------------
  ------------------
  978|       |
  979|  5.72k|    c->avg      = avg_c;
  980|  5.72k|    c->w_avg    = w_avg_c;
  981|  5.72k|    c->mask     = mask_c;
  982|  5.72k|    c->blend    = blend_c;
  983|  5.72k|    c->blend_v  = blend_v_c;
  984|  5.72k|    c->blend_h  = blend_h_c;
  985|  5.72k|    c->w_mask[0] = w_mask_444_c;
  986|  5.72k|    c->w_mask[1] = w_mask_422_c;
  987|  5.72k|    c->w_mask[2] = w_mask_420_c;
  988|  5.72k|    c->warp8x8  = warp_affine_8x8_c;
  989|  5.72k|    c->warp8x8t = warp_affine_8x8t_c;
  990|  5.72k|    c->emu_edge = emu_edge_c;
  991|  5.72k|    c->resize   = resize_c;
  992|       |
  993|  5.72k|#if HAVE_ASM
  994|       |#if ARCH_AARCH64 || ARCH_ARM
  995|       |    mc_dsp_init_arm(c);
  996|       |#elif ARCH_LOONGARCH64
  997|       |    mc_dsp_init_loongarch(c);
  998|       |#elif ARCH_PPC64LE
  999|       |    mc_dsp_init_ppc(c);
 1000|       |#elif ARCH_RISCV
 1001|       |    mc_dsp_init_riscv(c);
 1002|       |#elif ARCH_X86
 1003|       |    mc_dsp_init_x86(c);
 1004|  5.72k|#endif
 1005|  5.72k|#endif
 1006|  5.72k|}

dav1d_mem_pool_push:
  224|  1.24M|void dav1d_mem_pool_push(Dav1dMemPool *const pool, void *const ptr) {
  225|  1.24M|    pthread_mutex_lock(&pool->lock);
  226|  1.24M|    Dav1dMemPoolBuffer *const buf = (Dav1dMemPoolBuffer*)((uintptr_t)ptr - 64);
  227|  1.24M|    const int ref_cnt = --pool->ref_cnt;
  228|  1.24M|    if (!pool->end) {
  ------------------
  |  Branch (228:9): [True: 1.22M, False: 24.2k]
  ------------------
  229|  1.22M|        buf->next = pool->buf;
  230|  1.22M|        pool->buf = buf;
  231|  1.22M|        pthread_mutex_unlock(&pool->lock);
  232|  1.22M|        assert(ref_cnt > 0);
  ------------------
  |  Branch (232:9): [True: 1.22M, False: 18.4E]
  ------------------
  233|  1.22M|    } else {
  234|  24.2k|        pthread_mutex_unlock(&pool->lock);
  235|  24.2k|        dav1d_free_aligned(buf);
  ------------------
  |  |  136|  24.2k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  236|  24.2k|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (236:13): [True: 24.2k, False: 18.4E]
  ------------------
  237|  24.2k|    }
  238|  1.24M|}
dav1d_mem_pool_pop:
  240|  1.24M|void *dav1d_mem_pool_pop(Dav1dMemPool *const pool, const size_t size) {
  241|  1.24M|    pthread_mutex_lock(&pool->lock);
  242|  1.24M|    Dav1dMemPoolBuffer *buf = pool->buf;
  243|  1.24M|    pool->ref_cnt++;
  244|       |
  245|  1.24M|    if (buf) {
  ------------------
  |  Branch (245:9): [True: 1.15M, False: 95.9k]
  ------------------
  246|  1.15M|        pool->buf = buf->next;
  247|  1.15M|        pthread_mutex_unlock(&pool->lock);
  248|  1.15M|        if (buf->size != size) {
  ------------------
  |  Branch (248:13): [True: 12.5k, False: 1.13M]
  ------------------
  249|       |            /* Reallocate if the size has changed */
  250|  12.5k|            dav1d_free_aligned(buf);
  ------------------
  |  |  136|  12.5k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  251|  12.5k|            goto alloc;
  252|  12.5k|        }
  253|       |#if TRACK_HEAP_ALLOCATIONS
  254|       |        dav1d_track_reuse(pool->type);
  255|       |#endif
  256|  1.15M|    } else {
  257|  95.9k|        pthread_mutex_unlock(&pool->lock);
  258|   108k|alloc:
  259|   108k|        buf = dav1d_alloc_aligned(pool->type, size + 64, 64);
  ------------------
  |  |  134|   108k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  260|   108k|        if (!buf) {
  ------------------
  |  Branch (260:13): [True: 0, False: 108k]
  ------------------
  261|      0|            pthread_mutex_lock(&pool->lock);
  262|      0|            const int ref_cnt = --pool->ref_cnt;
  263|      0|            pthread_mutex_unlock(&pool->lock);
  264|      0|            if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (264:17): [True: 0, False: 0]
  ------------------
  265|      0|            return NULL;
  266|      0|        }
  267|   108k|        buf->size = size;
  268|   108k|    }
  269|       |
  270|  1.24M|    return (void*)((uintptr_t)buf + 64);
  271|  1.24M|}
dav1d_mem_pool_init:
  275|  71.4k|{
  276|  71.4k|    Dav1dMemPool *const pool = dav1d_malloc(ALLOC_COMMON_CTX,
  ------------------
  |  |  132|  71.4k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
  277|  71.4k|                                            sizeof(Dav1dMemPool));
  278|  71.4k|    if (pool) {
  ------------------
  |  Branch (278:9): [True: 71.4k, False: 0]
  ------------------
  279|  71.4k|        if (!pthread_mutex_init(&pool->lock, NULL)) {
  ------------------
  |  Branch (279:13): [True: 71.4k, False: 0]
  ------------------
  280|  71.4k|            pool->buf = NULL;
  281|  71.4k|            pool->ref_cnt = 1;
  282|  71.4k|            pool->end = 0;
  283|       |#if TRACK_HEAP_ALLOCATIONS
  284|       |            pool->type = type;
  285|       |#endif
  286|  71.4k|            *ppool = pool;
  287|  71.4k|            return 0;
  288|  71.4k|        }
  289|      0|        dav1d_free(pool);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  290|      0|    }
  291|      0|    *ppool = NULL;
  292|      0|    return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  293|  71.4k|}
dav1d_mem_pool_end:
  295|  71.4k|COLD void dav1d_mem_pool_end(Dav1dMemPool *const pool) {
  296|  71.4k|    if (pool) {
  ------------------
  |  Branch (296:9): [True: 71.4k, False: 0]
  ------------------
  297|  71.4k|        pthread_mutex_lock(&pool->lock);
  298|  71.4k|        Dav1dMemPoolBuffer *buf = pool->buf;
  299|  71.4k|        const int ref_cnt = --pool->ref_cnt;
  300|  71.4k|        pool->buf = NULL;
  301|  71.4k|        pool->end = 1;
  302|  71.4k|        pthread_mutex_unlock(&pool->lock);
  303|       |
  304|   143k|        while (buf) {
  ------------------
  |  Branch (304:16): [True: 71.7k, False: 71.4k]
  ------------------
  305|  71.7k|            void *const ptr = buf;
  306|  71.7k|            buf = buf->next;
  307|  71.7k|            dav1d_free_aligned(ptr);
  ------------------
  |  |  136|  71.7k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  308|  71.7k|        }
  309|  71.4k|        if (!ref_cnt) mem_pool_destroy(pool);
  ------------------
  |  Branch (309:13): [True: 47.1k, False: 24.2k]
  ------------------
  310|  71.4k|    }
  311|  71.4k|}
mem.c:mem_pool_destroy:
  219|  71.4k|static COLD void mem_pool_destroy(Dav1dMemPool *const pool) {
  220|  71.4k|    pthread_mutex_destroy(&pool->lock);
  221|  71.4k|    dav1d_free(pool);
  ------------------
  |  |  135|  71.4k|#define dav1d_free(ptr) free(ptr)
  ------------------
  222|  71.4k|}

lib.c:dav1d_alloc_aligned_internal:
   89|  30.6k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  30.6k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 30.6k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  30.6k|    void *ptr;
   95|  30.6k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 30.6k]
  ------------------
   96|  30.6k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  30.6k|}
lib.c:dav1d_free_aligned_internal:
  113|   397k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|   397k|}
lib.c:dav1d_freep_aligned:
  144|  10.2k|static inline void dav1d_freep_aligned(void *ptr) {
  145|  10.2k|    void **mem = (void **) ptr;
  146|  10.2k|    if (*mem) {
  ------------------
  |  Branch (146:9): [True: 10.2k, False: 0]
  ------------------
  147|  10.2k|        dav1d_free_aligned(*mem);
  ------------------
  |  |  136|  10.2k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  148|       |        *mem = NULL;
  149|  10.2k|    }
  150|  10.2k|}
mem.c:dav1d_free_aligned_internal:
  113|   108k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|   108k|}
mem.c:dav1d_alloc_aligned_internal:
   89|   108k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|   108k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 108k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|   108k|    void *ptr;
   95|   108k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 108k]
  ------------------
   96|   108k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|   108k|}
ref.c:dav1d_alloc_aligned_internal:
   89|   335k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|   335k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 335k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|   335k|    void *ptr;
   95|   335k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 335k]
  ------------------
   96|   335k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|   335k|}
ref.c:dav1d_free_aligned_internal:
  113|   335k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|   335k|}
refmvs.c:dav1d_free_aligned_internal:
  113|  17.2k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|  17.2k|}
refmvs.c:dav1d_alloc_aligned_internal:
   89|  17.2k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|  17.2k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 17.2k, False: 0]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|  17.2k|    void *ptr;
   95|  17.2k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 17.2k]
  ------------------
   96|  17.2k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|  17.2k|}
decode.c:dav1d_free_aligned_internal:
  113|   165k|static inline void dav1d_free_aligned_internal(void *ptr) {
  114|       |#ifdef _WIN32
  115|       |    _aligned_free(ptr);
  116|       |#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_ALLOC
  117|       |    free(ptr);
  118|       |#else
  119|       |    if (ptr) free(((void **)ptr)[-1]);
  120|       |#endif
  121|   165k|}
decode.c:dav1d_alloc_aligned_internal:
   89|   162k|static inline void *dav1d_alloc_aligned_internal(const size_t sz, const size_t align) {
   90|   162k|    assert(!(align & (align - 1)));
  ------------------
  |  Branch (90:5): [True: 162k, False: 18.4E]
  ------------------
   91|       |#ifdef _WIN32
   92|       |    return _aligned_malloc(sz, align);
   93|       |#elif HAVE_POSIX_MEMALIGN
   94|   162k|    void *ptr;
   95|   162k|    if (posix_memalign(&ptr, align, sz)) return NULL;
  ------------------
  |  Branch (95:9): [True: 0, False: 162k]
  ------------------
   96|   162k|    return ptr;
   97|       |#elif HAVE_MEMALIGN
   98|       |    return memalign(align, sz);
   99|       |#elif HAVE_ALIGNED_ALLOC
  100|       |    // The C11 standard specifies that the size parameter
  101|       |    // must be an integral multiple of alignment.
  102|       |    return aligned_alloc(align, ROUND_UP(sz, align));
  103|       |#else
  104|       |    void *const buf = malloc(sz + align + sizeof(void *));
  105|       |    if (!buf) return NULL;
  106|       |
  107|       |    void *const ptr = (void *)(((uintptr_t)buf + sizeof(void *) + align - 1) & ~(align - 1));
  108|       |    ((void **)ptr)[-1] = buf;
  109|       |    return ptr;
  110|       |#endif
  111|   162k|}
decode.c:dav1d_freep_aligned:
  144|  3.29k|static inline void dav1d_freep_aligned(void *ptr) {
  145|  3.29k|    void **mem = (void **) ptr;
  146|  3.29k|    if (*mem) {
  ------------------
  |  Branch (146:9): [True: 3.29k, False: 0]
  ------------------
  147|  3.29k|        dav1d_free_aligned(*mem);
  ------------------
  |  |  136|  3.29k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  148|       |        *mem = NULL;
  149|  3.29k|    }
  150|  3.29k|}

dav1d_msac_decode_subexp:
   62|   202k|{
   63|   202k|    assert(n >> k == 8);
  ------------------
  |  Branch (63:5): [True: 202k, False: 25]
  ------------------
   64|       |
   65|   202k|    unsigned a = 0;
   66|   202k|    if (dav1d_msac_decode_bool_equi(s)) {
  ------------------
  |  |   53|   202k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (66:9): [True: 128k, False: 73.8k]
  ------------------
   67|   128k|        if (dav1d_msac_decode_bool_equi(s))
  ------------------
  |  |   53|   128k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (67:13): [True: 90.8k, False: 37.4k]
  ------------------
   68|  90.8k|            k += dav1d_msac_decode_bool_equi(s) + 1;
  ------------------
  |  |   53|  90.8k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   69|   128k|        a = 1 << k;
   70|   128k|    }
   71|   202k|    const unsigned v = dav1d_msac_decode_bools(s, k) + a;
   72|   202k|    return ref * 2 <= n ? inv_recenter(ref, v) :
  ------------------
  |  Branch (72:12): [True: 119k, False: 82.2k]
  ------------------
   73|   202k|                          n - 1 - inv_recenter(n - 1 - ref, v);
   74|   202k|}
dav1d_msac_init:
  206|   244k|{
  207|   244k|    s->buf_pos = data;
  208|   244k|    s->buf_end = data + sz;
  209|   244k|    s->dif = 0;
  210|   244k|    s->rng = 0x8000;
  211|   244k|    s->cnt = -15;
  212|   244k|    s->allow_update_cdf = !disable_cdf_update_flag;
  213|   244k|    ctx_refill(s);
  214|       |
  215|   244k|#if ARCH_X86_64 && HAVE_ASM
  216|   244k|    s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt_c;
  217|       |
  218|   244k|    msac_init_x86(s);
  219|   244k|#endif
  220|   244k|}
msac.c:ctx_refill:
   41|   244k|static inline void ctx_refill(MsacContext *const s) {
   42|   244k|    const uint8_t *buf_pos = s->buf_pos;
   43|   244k|    const uint8_t *buf_end = s->buf_end;
   44|   244k|    int c = EC_WIN_SIZE - s->cnt - 24;
  ------------------
  |  |   39|   244k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   45|   244k|    ec_win dif = s->dif;
   46|   768k|    do {
   47|   768k|        if (buf_pos >= buf_end) {
  ------------------
  |  Branch (47:13): [True: 219k, False: 549k]
  ------------------
   48|       |            // set remaining bits to 1;
   49|   219k|            dif |= ~(~(ec_win)0xff << c);
   50|   219k|            break;
   51|   219k|        }
   52|   549k|        dif |= (ec_win)(*buf_pos++ ^ 0xff) << c;
   53|   549k|        c -= 8;
   54|   549k|    } while (c >= 0);
  ------------------
  |  Branch (54:14): [True: 523k, False: 25.7k]
  ------------------
   55|   244k|    s->dif = dif;
   56|   244k|    s->cnt = EC_WIN_SIZE - c - 24;
  ------------------
  |  |   39|   244k|#define EC_WIN_SIZE (sizeof(ec_win) << 3)
  ------------------
   57|   244k|    s->buf_pos = buf_pos;
   58|   244k|}

decode.c:dav1d_msac_decode_bools:
   94|  1.05M|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|  1.05M|    unsigned v = 0;
   96|  2.49M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 1.43M, False: 1.05M]
  ------------------
   97|  1.43M|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  1.43M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|  1.05M|    return v;
   99|  1.05M|}
decode.c:dav1d_msac_decode_uniform:
  101|   144k|static inline int dav1d_msac_decode_uniform(MsacContext *const s, const unsigned n) {
  102|   144k|    assert(n > 0);
  ------------------
  |  Branch (102:5): [True: 144k, False: 0]
  ------------------
  103|   144k|    const int l = ulog2(n) + 1;
  104|   144k|    assert(l > 1);
  ------------------
  |  Branch (104:5): [True: 144k, False: 18.4E]
  ------------------
  105|   144k|    const unsigned m = (1 << l) - n;
  106|   144k|    const unsigned v = dav1d_msac_decode_bools(s, l - 1);
  107|   144k|    return v < m ? v : (v << 1) - m + dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  44.7k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (107:12): [True: 99.8k, False: 44.7k]
  ------------------
  108|   144k|}
msac.c:dav1d_msac_decode_bools:
   94|   202k|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|   202k|    unsigned v = 0;
   96|   912k|    while (n--)
  ------------------
  |  Branch (96:12): [True: 710k, False: 202k]
  ------------------
   97|   710k|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|   710k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|   202k|    return v;
   99|   202k|}
recon_tmpl.c:dav1d_msac_decode_bools:
   94|  8.06M|static inline unsigned dav1d_msac_decode_bools(MsacContext *const s, unsigned n) {
   95|  8.06M|    unsigned v = 0;
   96|  27.3M|    while (n--)
  ------------------
  |  Branch (96:12): [True: 19.2M, False: 8.06M]
  ------------------
   97|  19.2M|        v = (v << 1) | dav1d_msac_decode_bool_equi(s);
  ------------------
  |  |   53|  19.2M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
   98|  8.06M|    return v;
   99|  8.06M|}

dav1d_parse_sequence_header:
  304|  12.7k|{
  305|  12.7k|    validate_input_or_ret(out != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  12.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 12.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  306|  12.7k|    validate_input_or_ret(ptr != NULL, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  12.7k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:9): [True: 0, False: 12.7k]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  307|  12.7k|    validate_input_or_ret(sz > 0 && sz <= SIZE_MAX / 2, DAV1D_ERR(EINVAL));
  ------------------
  |  |   52|  25.4k|    if (!(x)) { \
  |  |  ------------------
  |  |  |  Branch (52:11): [True: 12.7k, False: 0]
  |  |  |  Branch (52:11): [True: 12.7k, False: 0]
  |  |  ------------------
  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  ------------------
  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  ------------------
  |  |   54|      0|                    #x, __func__); \
  |  |   55|      0|        debug_abort(); \
  |  |  ------------------
  |  |  |  |   39|      0|#define debug_abort abort
  |  |  ------------------
  |  |   56|      0|        return r; \
  |  |   57|      0|    }
  ------------------
  308|       |
  309|  12.7k|    GetBits gb;
  310|  12.7k|    dav1d_init_get_bits(&gb, ptr, sz);
  311|  12.7k|    int res = DAV1D_ERR(ENOENT);
  ------------------
  |  |   58|  12.7k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  312|       |
  313|  28.7k|    do {
  314|  28.7k|        dav1d_get_bit(&gb); // obu_forbidden_bit
  315|  28.7k|        const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
  316|  28.7k|        const int has_extension = dav1d_get_bit(&gb);
  317|  28.7k|        const int has_length_field = dav1d_get_bit(&gb);
  318|  28.7k|        dav1d_get_bits(&gb, 1 + 8 * has_extension); // ignore
  319|       |
  320|  28.7k|        const uint8_t *obu_end = gb.ptr_end;
  321|  28.7k|        if (has_length_field) {
  ------------------
  |  Branch (321:13): [True: 17.9k, False: 10.8k]
  ------------------
  322|  17.9k|            const size_t len = dav1d_get_uleb128(&gb);
  323|  17.9k|            if (len > (size_t)(obu_end - gb.ptr)) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    363|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (323:17): [True: 363, False: 17.5k]
  ------------------
  324|  17.5k|            obu_end = gb.ptr + len;
  325|  17.5k|        }
  326|       |
  327|  28.3k|        if (type == DAV1D_OBU_SEQ_HDR) {
  ------------------
  |  Branch (327:13): [True: 12.0k, False: 16.3k]
  ------------------
  328|  12.0k|            if ((res = parse_seq_hdr(out, &gb, 0)) < 0) return res;
  ------------------
  |  Branch (328:17): [True: 1.38k, False: 10.7k]
  ------------------
  329|  10.7k|            if (gb.ptr > obu_end) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    373|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (329:17): [True: 373, False: 10.3k]
  ------------------
  330|  10.3k|            dav1d_bytealign_get_bits(&gb);
  331|  10.3k|        }
  332|       |
  333|  26.6k|        if (gb.error) return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|    260|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (333:13): [True: 260, False: 26.3k]
  ------------------
  334|  26.6k|        assert(gb.state == 0 && gb.bits_left == 0);
  ------------------
  |  Branch (334:9): [True: 26.3k, False: 0]
  |  Branch (334:9): [True: 26.3k, False: 0]
  ------------------
  335|  26.3k|        gb.ptr = obu_end;
  336|  26.3k|    } while (gb.ptr < gb.ptr_end);
  ------------------
  |  Branch (336:14): [True: 16.0k, False: 10.3k]
  ------------------
  337|       |
  338|  10.3k|    return res;
  339|  12.7k|}
dav1d_parse_obus:
 1169|   420k|ptrdiff_t dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in) {
 1170|   420k|    GetBits gb;
 1171|   420k|    int res;
 1172|       |
 1173|   420k|    dav1d_init_get_bits(&gb, in->data, in->sz);
 1174|       |
 1175|       |    // obu header
 1176|   420k|    const int obu_forbidden_bit = dav1d_get_bit(&gb);
 1177|   420k|    if (c->strict_std_compliance && obu_forbidden_bit) goto error;
  ------------------
  |  Branch (1177:9): [True: 0, False: 420k]
  |  Branch (1177:37): [True: 0, False: 0]
  ------------------
 1178|   420k|    const enum Dav1dObuType type = dav1d_get_bits(&gb, 4);
 1179|   420k|    const int has_extension = dav1d_get_bit(&gb);
 1180|   420k|    const int has_length_field = dav1d_get_bit(&gb);
 1181|   420k|    dav1d_get_bit(&gb); // reserved
 1182|       |
 1183|   420k|    int temporal_id = 0, spatial_id = 0;
 1184|   420k|    if (has_extension) {
  ------------------
  |  Branch (1184:9): [True: 15.9k, False: 404k]
  ------------------
 1185|  15.9k|        temporal_id = dav1d_get_bits(&gb, 3);
 1186|  15.9k|        spatial_id = dav1d_get_bits(&gb, 2);
 1187|  15.9k|        dav1d_get_bits(&gb, 3); // reserved
 1188|  15.9k|    }
 1189|       |
 1190|   420k|    if (has_length_field) {
  ------------------
  |  Branch (1190:9): [True: 119k, False: 300k]
  ------------------
 1191|   119k|        const size_t len = dav1d_get_uleb128(&gb);
 1192|   119k|        if (len > (size_t)(gb.ptr_end - gb.ptr)) goto error;
  ------------------
  |  Branch (1192:13): [True: 5.36k, False: 114k]
  ------------------
 1193|   114k|        gb.ptr_end = gb.ptr + len;
 1194|   114k|    }
 1195|   414k|    if (gb.error) goto error;
  ------------------
  |  Branch (1195:9): [True: 505, False: 414k]
  ------------------
 1196|       |
 1197|       |    // We must have read a whole number of bytes at this point (1 byte
 1198|       |    // for the header and whole bytes at a time when reading the
 1199|       |    // leb128 length field).
 1200|   414k|    assert(gb.bits_left == 0);
  ------------------
  |  Branch (1200:5): [True: 414k, False: 0]
  ------------------
 1201|       |
 1202|       |    // skip obu not belonging to the selected temporal/spatial layer
 1203|   414k|    if (type != DAV1D_OBU_SEQ_HDR && type != DAV1D_OBU_TD &&
  ------------------
  |  Branch (1203:9): [True: 389k, False: 24.9k]
  |  Branch (1203:38): [True: 345k, False: 43.9k]
  ------------------
 1204|   345k|        has_extension && c->operating_point_idc != 0)
  ------------------
  |  Branch (1204:9): [True: 13.4k, False: 331k]
  |  Branch (1204:26): [True: 7.36k, False: 6.11k]
  ------------------
 1205|  7.36k|    {
 1206|  7.36k|        const int in_temporal_layer = (c->operating_point_idc >> temporal_id) & 1;
 1207|  7.36k|        const int in_spatial_layer = (c->operating_point_idc >> (spatial_id + 8)) & 1;
 1208|  7.36k|        if (!in_temporal_layer || !in_spatial_layer)
  ------------------
  |  Branch (1208:13): [True: 593, False: 6.76k]
  |  Branch (1208:35): [True: 410, False: 6.35k]
  ------------------
 1209|  1.00k|            return gb.ptr_end - gb.ptr_start;
 1210|  7.36k|    }
 1211|       |
 1212|   413k|    switch (type) {
 1213|  24.9k|    case DAV1D_OBU_SEQ_HDR: {
  ------------------
  |  Branch (1213:5): [True: 24.9k, False: 388k]
  ------------------
 1214|  24.9k|        Dav1dRef *ref = dav1d_ref_create_using_pool(c->seq_hdr_pool,
 1215|  24.9k|                                                    sizeof(Dav1dSequenceHeader));
 1216|  24.9k|        if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1216:13): [True: 0, False: 24.9k]
  ------------------
 1217|  24.9k|        Dav1dSequenceHeader *seq_hdr = ref->data;
 1218|  24.9k|        if ((res = parse_seq_hdr(seq_hdr, &gb, c->strict_std_compliance)) < 0) {
  ------------------
  |  Branch (1218:13): [True: 2.35k, False: 22.5k]
  ------------------
 1219|  2.35k|            dav1d_log(c, "Error parsing sequence header\n");
  ------------------
  |  |   44|  2.35k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 2.35k]
  |  |  ------------------
  ------------------
 1220|  2.35k|            dav1d_ref_dec(&ref);
 1221|  2.35k|            goto error;
 1222|  2.35k|        }
 1223|       |
 1224|  22.5k|        const int op_idx =
 1225|  22.5k|            c->operating_point < seq_hdr->num_operating_points ? c->operating_point : 0;
  ------------------
  |  Branch (1225:13): [True: 22.5k, False: 0]
  ------------------
 1226|  22.5k|        c->operating_point_idc = seq_hdr->operating_points[op_idx].idc;
 1227|  22.5k|        const unsigned spatial_mask = c->operating_point_idc >> 8;
 1228|  22.5k|        c->max_spatial_id = spatial_mask ? ulog2(spatial_mask) : 0;
  ------------------
  |  Branch (1228:29): [True: 5.76k, False: 16.8k]
  ------------------
 1229|       |
 1230|       |        // If we have read a sequence header which is different from
 1231|       |        // the old one, this is a new video sequence and can't use any
 1232|       |        // previous state. Free that state.
 1233|       |
 1234|  22.5k|        if (!c->seq_hdr) {
  ------------------
  |  Branch (1234:13): [True: 9.91k, False: 12.6k]
  ------------------
 1235|  9.91k|            c->frame_hdr = NULL;
 1236|  9.91k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1237|       |        // see 7.5, operating_parameter_info is allowed to change in
 1238|       |        // sequence headers of a single sequence
 1239|  12.6k|        } else if (memcmp(seq_hdr, c->seq_hdr, offsetof(Dav1dSequenceHeader, operating_parameter_info))) {
  ------------------
  |  Branch (1239:20): [True: 5.98k, False: 6.67k]
  ------------------
 1240|  5.98k|            c->frame_hdr = NULL;
 1241|  5.98k|            c->mastering_display = NULL;
 1242|  5.98k|            c->content_light = NULL;
 1243|  5.98k|            dav1d_ref_dec(&c->mastering_display_ref);
 1244|  5.98k|            dav1d_ref_dec(&c->content_light_ref);
 1245|  53.8k|            for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1245:29): [True: 47.8k, False: 5.98k]
  ------------------
 1246|  47.8k|                if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1246:21): [True: 31.9k, False: 15.9k]
  ------------------
 1247|  31.9k|                    dav1d_thread_picture_unref(&c->refs[i].p);
 1248|  47.8k|                dav1d_ref_dec(&c->refs[i].segmap);
 1249|  47.8k|                dav1d_ref_dec(&c->refs[i].refmvs);
 1250|  47.8k|                dav1d_cdf_thread_unref(&c->cdf[i]);
 1251|  47.8k|            }
 1252|  5.98k|            c->frame_flags |= PICTURE_FLAG_NEW_SEQUENCE;
 1253|       |        // If operating_parameter_info changed, signal it
 1254|  6.67k|        } else if (memcmp(seq_hdr->operating_parameter_info, c->seq_hdr->operating_parameter_info,
  ------------------
  |  Branch (1254:20): [True: 161, False: 6.51k]
  ------------------
 1255|  6.67k|                          sizeof(seq_hdr->operating_parameter_info)))
 1256|    161|        {
 1257|    161|            c->frame_flags |= PICTURE_FLAG_NEW_OP_PARAMS_INFO;
 1258|    161|        }
 1259|  22.5k|        dav1d_ref_dec(&c->seq_hdr_ref);
 1260|  22.5k|        c->seq_hdr_ref = ref;
 1261|  22.5k|        c->seq_hdr = seq_hdr;
 1262|  22.5k|        break;
 1263|  24.9k|    }
 1264|  1.19k|    case DAV1D_OBU_REDUNDANT_FRAME_HDR:
  ------------------
  |  Branch (1264:5): [True: 1.19k, False: 412k]
  ------------------
 1265|  1.19k|        if (c->frame_hdr) break;
  ------------------
  |  Branch (1265:13): [True: 485, False: 712]
  ------------------
 1266|       |        // fall-through
 1267|   309k|    case DAV1D_OBU_FRAME:
  ------------------
  |  Branch (1267:5): [True: 308k, False: 104k]
  ------------------
 1268|   329k|    case DAV1D_OBU_FRAME_HDR:
  ------------------
  |  Branch (1268:5): [True: 19.5k, False: 393k]
  ------------------
 1269|   329k|        if (!c->seq_hdr) goto error;
  ------------------
  |  Branch (1269:13): [True: 224, False: 328k]
  ------------------
 1270|   328k|        if (!c->frame_hdr_ref) {
  ------------------
  |  Branch (1270:13): [True: 286k, False: 42.1k]
  ------------------
 1271|   286k|            c->frame_hdr_ref = dav1d_ref_create_using_pool(c->frame_hdr_pool,
 1272|   286k|                                                           sizeof(Dav1dFrameHeader));
 1273|   286k|            if (!c->frame_hdr_ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1273:17): [True: 0, False: 286k]
  ------------------
 1274|   286k|        }
 1275|   328k|#ifndef NDEBUG
 1276|       |        // ensure that the reference is writable
 1277|   328k|        assert(dav1d_ref_is_writable(c->frame_hdr_ref));
  ------------------
  |  Branch (1277:9): [True: 328k, False: 0]
  ------------------
 1278|   328k|#endif
 1279|   328k|        c->frame_hdr = c->frame_hdr_ref->data;
 1280|   328k|        memset(c->frame_hdr, 0, sizeof(*c->frame_hdr));
 1281|   328k|        c->frame_hdr->temporal_id = temporal_id;
 1282|   328k|        c->frame_hdr->spatial_id = spatial_id;
 1283|   328k|        if ((res = parse_frame_hdr(c, &gb)) < 0) {
  ------------------
  |  Branch (1283:13): [True: 6.14k, False: 322k]
  ------------------
 1284|  6.14k|            c->frame_hdr = NULL;
 1285|  6.14k|            goto error;
 1286|  6.14k|        }
 1287|   325k|        for (int n = 0; n < c->n_tile_data; n++)
  ------------------
  |  Branch (1287:25): [True: 3.28k, False: 322k]
  ------------------
 1288|  3.28k|            dav1d_data_unref_internal(&c->tile[n].data);
 1289|   322k|        c->n_tile_data = 0;
 1290|   322k|        c->n_tiles = 0;
 1291|   322k|        if (type != DAV1D_OBU_FRAME) {
  ------------------
  |  Branch (1291:13): [True: 18.8k, False: 303k]
  ------------------
 1292|       |            // This is actually a frame header OBU so read the
 1293|       |            // trailing bit and check for overrun.
 1294|  18.8k|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1294:17): [True: 2.22k, False: 16.6k]
  ------------------
 1295|  2.22k|                c->frame_hdr = NULL;
 1296|  2.22k|                goto error;
 1297|  2.22k|            }
 1298|  18.8k|        }
 1299|       |
 1300|   320k|        if (c->frame_size_limit && (int64_t)c->frame_hdr->width[1] *
  ------------------
  |  Branch (1300:13): [True: 320k, False: 0]
  |  Branch (1300:36): [True: 478, False: 319k]
  ------------------
 1301|   320k|            c->frame_hdr->height > c->frame_size_limit)
 1302|    478|        {
 1303|    478|            dav1d_log(c, "Frame size %dx%d exceeds limit %u\n", c->frame_hdr->width[1],
  ------------------
  |  |   44|    478|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 478]
  |  |  ------------------
  ------------------
 1304|    478|                      c->frame_hdr->height, c->frame_size_limit);
 1305|    478|            c->frame_hdr = NULL;
 1306|    478|            return DAV1D_ERR(ERANGE);
  ------------------
  |  |   58|    478|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1307|    478|        }
 1308|       |
 1309|   319k|        if (type != DAV1D_OBU_FRAME)
  ------------------
  |  Branch (1309:13): [True: 16.6k, False: 303k]
  ------------------
 1310|  16.6k|            break;
 1311|       |        // OBU_FRAMEs shouldn't be signaled with show_existing_frame
 1312|   303k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1312:13): [True: 762, False: 302k]
  ------------------
 1313|    762|            c->frame_hdr = NULL;
 1314|    762|            goto error;
 1315|    762|        }
 1316|       |
 1317|       |        // This is the frame header at the start of a frame OBU.
 1318|       |        // There's no trailing bit at the end to skip, but we do need
 1319|       |        // to align to the next byte.
 1320|   302k|        dav1d_bytealign_get_bits(&gb);
 1321|       |        // fall-through
 1322|   304k|    case DAV1D_OBU_TILE_GRP: {
  ------------------
  |  Branch (1322:5): [True: 2.22k, False: 411k]
  ------------------
 1323|   304k|        if (!c->frame_hdr) goto error;
  ------------------
  |  Branch (1323:13): [True: 1.26k, False: 303k]
  ------------------
 1324|   303k|        if (c->n_tile_data_alloc < c->n_tile_data + 1) {
  ------------------
  |  Branch (1324:13): [True: 9.46k, False: 294k]
  ------------------
 1325|  9.46k|            if ((c->n_tile_data + 1) > INT_MAX / (int)sizeof(*c->tile)) goto error;
  ------------------
  |  Branch (1325:17): [True: 0, False: 9.46k]
  ------------------
 1326|  9.46k|            struct Dav1dTileGroup *tile = dav1d_realloc(ALLOC_TILE, c->tile,
  ------------------
  |  |  133|  9.46k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1327|  9.46k|                                                        (c->n_tile_data + 1) * sizeof(*c->tile));
 1328|  9.46k|            if (!tile) goto error;
  ------------------
  |  Branch (1328:17): [True: 0, False: 9.46k]
  ------------------
 1329|  9.46k|            c->tile = tile;
 1330|  9.46k|            memset(c->tile + c->n_tile_data, 0, sizeof(*c->tile));
 1331|  9.46k|            c->n_tile_data_alloc = c->n_tile_data + 1;
 1332|  9.46k|        }
 1333|   303k|        parse_tile_hdr(c, &gb);
 1334|       |        // Align to the next byte boundary and check for overrun.
 1335|   303k|        dav1d_bytealign_get_bits(&gb);
 1336|   303k|        if (gb.error) goto error;
  ------------------
  |  Branch (1336:13): [True: 15.7k, False: 287k]
  ------------------
 1337|       |
 1338|   287k|        dav1d_data_ref(&c->tile[c->n_tile_data].data, in);
 1339|   287k|        c->tile[c->n_tile_data].data.data = gb.ptr;
 1340|   287k|        c->tile[c->n_tile_data].data.sz = (size_t)(gb.ptr_end - gb.ptr);
 1341|       |        // ensure tile groups are in order and sane, see 6.10.1
 1342|   287k|        if (c->tile[c->n_tile_data].start > c->tile[c->n_tile_data].end ||
  ------------------
  |  Branch (1342:13): [True: 972, False: 286k]
  ------------------
 1343|   286k|            c->tile[c->n_tile_data].start != c->n_tiles)
  ------------------
  |  Branch (1343:13): [True: 823, False: 285k]
  ------------------
 1344|  1.79k|        {
 1345|  3.75k|            for (int i = 0; i <= c->n_tile_data; i++)
  ------------------
  |  Branch (1345:29): [True: 1.95k, False: 1.79k]
  ------------------
 1346|  1.95k|                dav1d_data_unref_internal(&c->tile[i].data);
 1347|  1.79k|            c->n_tile_data = 0;
 1348|  1.79k|            c->n_tiles = 0;
 1349|  1.79k|            goto error;
 1350|  1.79k|        }
 1351|   285k|        c->n_tiles += 1 + c->tile[c->n_tile_data].end -
 1352|   285k|                          c->tile[c->n_tile_data].start;
 1353|   285k|        c->n_tile_data++;
 1354|   285k|        break;
 1355|   287k|    }
 1356|  5.74k|    case DAV1D_OBU_METADATA: {
  ------------------
  |  Branch (1356:5): [True: 5.74k, False: 407k]
  ------------------
 1357|  5.74k|#define DEBUG_OBU_METADATA 0
 1358|       |#if DEBUG_OBU_METADATA
 1359|       |        const uint8_t *const init_ptr = gb.ptr;
 1360|       |#endif
 1361|       |        // obu metadta type field
 1362|  5.74k|        const enum ObuMetaType meta_type = dav1d_get_uleb128(&gb);
 1363|  5.74k|        if (gb.error) goto error;
  ------------------
  |  Branch (1363:13): [True: 308, False: 5.43k]
  ------------------
 1364|       |
 1365|  5.43k|        switch (meta_type) {
 1366|    649|        case OBU_META_HDR_CLL: {
  ------------------
  |  Branch (1366:9): [True: 649, False: 4.78k]
  ------------------
 1367|    649|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    649|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1368|    649|                                             sizeof(Dav1dContentLightLevel));
 1369|    649|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1369:17): [True: 0, False: 649]
  ------------------
 1370|    649|            Dav1dContentLightLevel *const content_light = ref->data;
 1371|       |
 1372|    649|            content_light->max_content_light_level = dav1d_get_bits(&gb, 16);
 1373|       |#if DEBUG_OBU_METADATA
 1374|       |            printf("CLLOBU: max-content-light-level: %d [off=%td]\n",
 1375|       |                   content_light->max_content_light_level,
 1376|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1377|       |#endif
 1378|    649|            content_light->max_frame_average_light_level = dav1d_get_bits(&gb, 16);
 1379|       |#if DEBUG_OBU_METADATA
 1380|       |            printf("CLLOBU: max-frame-average-light-level: %d [off=%td]\n",
 1381|       |                   content_light->max_frame_average_light_level,
 1382|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1383|       |#endif
 1384|       |
 1385|    649|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1385:17): [True: 241, False: 408]
  ------------------
 1386|    241|                dav1d_ref_dec(&ref);
 1387|    241|                goto error;
 1388|    241|            }
 1389|       |
 1390|    408|            dav1d_ref_dec(&c->content_light_ref);
 1391|    408|            c->content_light = content_light;
 1392|    408|            c->content_light_ref = ref;
 1393|    408|            break;
 1394|    649|        }
 1395|    222|        case OBU_META_HDR_MDCV: {
  ------------------
  |  Branch (1395:9): [True: 222, False: 5.21k]
  ------------------
 1396|    222|            Dav1dRef *ref = dav1d_ref_create(ALLOC_OBU_META,
  ------------------
  |  |   49|    222|#define dav1d_ref_create(type, size) dav1d_ref_create(size)
  ------------------
 1397|    222|                                             sizeof(Dav1dMasteringDisplay));
 1398|    222|            if (!ref) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (1398:17): [True: 0, False: 222]
  ------------------
 1399|    222|            Dav1dMasteringDisplay *const mastering_display = ref->data;
 1400|       |
 1401|    888|            for (int i = 0; i < 3; i++) {
  ------------------
  |  Branch (1401:29): [True: 666, False: 222]
  ------------------
 1402|    666|                mastering_display->primaries[i][0] = dav1d_get_bits(&gb, 16);
 1403|    666|                mastering_display->primaries[i][1] = dav1d_get_bits(&gb, 16);
 1404|       |#if DEBUG_OBU_METADATA
 1405|       |                printf("MDCVOBU: primaries[%d]: (%d, %d) [off=%td]\n", i,
 1406|       |                       mastering_display->primaries[i][0],
 1407|       |                       mastering_display->primaries[i][1],
 1408|       |                       (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1409|       |#endif
 1410|    666|            }
 1411|    222|            mastering_display->white_point[0] = dav1d_get_bits(&gb, 16);
 1412|       |#if DEBUG_OBU_METADATA
 1413|       |            printf("MDCVOBU: white-point-x: %d [off=%td]\n",
 1414|       |                   mastering_display->white_point[0],
 1415|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1416|       |#endif
 1417|    222|            mastering_display->white_point[1] = dav1d_get_bits(&gb, 16);
 1418|       |#if DEBUG_OBU_METADATA
 1419|       |            printf("MDCVOBU: white-point-y: %d [off=%td]\n",
 1420|       |                   mastering_display->white_point[1],
 1421|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1422|       |#endif
 1423|    222|            mastering_display->max_luminance = dav1d_get_bits(&gb, 32);
 1424|       |#if DEBUG_OBU_METADATA
 1425|       |            printf("MDCVOBU: max-luminance: %d [off=%td]\n",
 1426|       |                   mastering_display->max_luminance,
 1427|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1428|       |#endif
 1429|    222|            mastering_display->min_luminance = dav1d_get_bits(&gb, 32);
 1430|       |#if DEBUG_OBU_METADATA
 1431|       |            printf("MDCVOBU: min-luminance: %d [off=%td]\n",
 1432|       |                   mastering_display->min_luminance,
 1433|       |                   (gb.ptr - init_ptr) * 8 - gb.bits_left);
 1434|       |#endif
 1435|    222|            if (check_trailing_bits(&gb, c->strict_std_compliance) < 0) {
  ------------------
  |  Branch (1435:17): [True: 135, False: 87]
  ------------------
 1436|    135|                dav1d_ref_dec(&ref);
 1437|    135|                goto error;
 1438|    135|            }
 1439|       |
 1440|     87|            dav1d_ref_dec(&c->mastering_display_ref);
 1441|     87|            c->mastering_display = mastering_display;
 1442|     87|            c->mastering_display_ref = ref;
 1443|     87|            break;
 1444|    222|        }
 1445|  4.14k|        case OBU_META_ITUT_T35: {
  ------------------
  |  Branch (1445:9): [True: 4.14k, False: 1.29k]
  ------------------
 1446|  4.14k|            ptrdiff_t payload_size = gb.ptr_end - gb.ptr;
 1447|       |            // Don't take into account all the trailing bits for payload_size
 1448|  4.44k|            while (payload_size > 0 && !gb.ptr[payload_size - 1])
  ------------------
  |  Branch (1448:20): [True: 3.87k, False: 571]
  |  Branch (1448:40): [True: 306, False: 3.57k]
  ------------------
 1449|    306|                payload_size--; // trailing_zero_bit x 8
 1450|  4.14k|            payload_size--; // trailing_one_bit + trailing_zero_bit x 7
 1451|       |
 1452|  4.14k|            int country_code_extension_byte = 0;
 1453|  4.14k|            const int country_code = dav1d_get_bits(&gb, 8);
 1454|  4.14k|            payload_size--;
 1455|  4.14k|            if (country_code == 0xFF) {
  ------------------
  |  Branch (1455:17): [True: 1.97k, False: 2.17k]
  ------------------
 1456|  1.97k|                country_code_extension_byte = dav1d_get_bits(&gb, 8);
 1457|  1.97k|                payload_size--;
 1458|  1.97k|            }
 1459|       |
 1460|  4.14k|            if (payload_size <= 0 || gb.ptr[payload_size] != 0x80) {
  ------------------
  |  Branch (1460:17): [True: 628, False: 3.51k]
  |  Branch (1460:38): [True: 1.53k, False: 1.97k]
  ------------------
 1461|  2.16k|                dav1d_log(c, "Malformed ITU-T T.35 metadata message format\n");
  ------------------
  |  |   44|  2.16k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 2.16k]
  |  |  ------------------
  ------------------
 1462|  2.16k|                break;
 1463|  2.16k|            }
 1464|       |
 1465|  1.97k|            if ((c->n_itut_t35 + 1) > INT_MAX / (int)sizeof(*c->itut_t35)) goto error;
  ------------------
  |  Branch (1465:17): [True: 0, False: 1.97k]
  ------------------
 1466|  1.97k|            struct Dav1dITUTT35 *itut_t35 = dav1d_realloc(ALLOC_OBU_META, c->itut_t35,
  ------------------
  |  |  133|  1.97k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
 1467|  1.97k|                                                          (c->n_itut_t35 + 1) * sizeof(*c->itut_t35));
 1468|  1.97k|            if (!itut_t35) goto error;
  ------------------
  |  Branch (1468:17): [True: 0, False: 1.97k]
  ------------------
 1469|  1.97k|            c->itut_t35 = itut_t35;
 1470|  1.97k|            memset(c->itut_t35 + c->n_itut_t35, 0, sizeof(*c->itut_t35));
 1471|       |
 1472|  1.97k|            struct itut_t35_ctx_context *itut_t35_ctx;
 1473|  1.97k|            if (!c->n_itut_t35) {
  ------------------
  |  Branch (1473:17): [True: 977, False: 1.00k]
  ------------------
 1474|    977|                assert(!c->itut_t35_ref);
  ------------------
  |  Branch (1474:17): [True: 977, False: 0]
  ------------------
 1475|    977|                itut_t35_ctx = dav1d_malloc(ALLOC_OBU_META, sizeof(struct itut_t35_ctx_context));
  ------------------
  |  |  132|    977|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1476|    977|                if (!itut_t35_ctx) goto error;
  ------------------
  |  Branch (1476:21): [True: 0, False: 977]
  ------------------
 1477|    977|                c->itut_t35_ref = dav1d_ref_init(&itut_t35_ctx->ref, c->itut_t35,
 1478|    977|                                                 dav1d_picture_free_itut_t35, itut_t35_ctx, 0);
 1479|  1.00k|            } else {
 1480|  1.00k|                assert(c->itut_t35_ref && atomic_load(&c->itut_t35_ref->ref_cnt) == 1);
  ------------------
  |  Branch (1480:17): [True: 1.00k, False: 0]
  |  Branch (1480:17): [True: 1.00k, False: 0]
  ------------------
 1481|  1.00k|                itut_t35_ctx = c->itut_t35_ref->user_data;
 1482|  1.00k|                c->itut_t35_ref->const_data = (uint8_t *)c->itut_t35;
 1483|  1.00k|            }
 1484|  1.97k|            itut_t35_ctx->itut_t35 = c->itut_t35;
 1485|  1.97k|            itut_t35_ctx->n_itut_t35 = c->n_itut_t35 + 1;
 1486|       |
 1487|  1.97k|            Dav1dITUTT35 *const itut_t35_metadata = &c->itut_t35[c->n_itut_t35];
 1488|  1.97k|            itut_t35_metadata->payload = dav1d_malloc(ALLOC_OBU_META, payload_size);
  ------------------
  |  |  132|  1.97k|#define dav1d_malloc(type, sz) malloc(sz)
  ------------------
 1489|  1.97k|            if (!itut_t35_metadata->payload) goto error;
  ------------------
  |  Branch (1489:17): [True: 0, False: 1.97k]
  ------------------
 1490|       |
 1491|  1.97k|            itut_t35_metadata->country_code = country_code;
 1492|  1.97k|            itut_t35_metadata->country_code_extension_byte = country_code_extension_byte;
 1493|  1.97k|            itut_t35_metadata->payload_size = payload_size;
 1494|       |
 1495|       |            // We know that we've read a whole number of bytes and that the
 1496|       |            // payload is within the OBU boundaries, so just use memcpy()
 1497|  1.97k|            assert(gb.bits_left == 0);
  ------------------
  |  Branch (1497:13): [True: 1.97k, False: 0]
  ------------------
 1498|  1.97k|            memcpy(itut_t35_metadata->payload, gb.ptr, payload_size);
 1499|       |
 1500|  1.97k|            c->n_itut_t35++;
 1501|  1.97k|            break;
 1502|  1.97k|        }
 1503|      2|        case OBU_META_SCALABILITY:
  ------------------
  |  Branch (1503:9): [True: 2, False: 5.43k]
  ------------------
 1504|     11|        case OBU_META_TIMECODE:
  ------------------
  |  Branch (1504:9): [True: 9, False: 5.42k]
  ------------------
 1505|       |            // ignore metadata OBUs we don't care about
 1506|     11|            break;
 1507|    412|        default:
  ------------------
  |  Branch (1507:9): [True: 412, False: 5.02k]
  ------------------
 1508|       |            // print a warning but don't fail for unknown types
 1509|    412|            if (meta_type > 31) // Types 6 to 31 are "Unregistered user private", so ignore them.
  ------------------
  |  Branch (1509:17): [True: 330, False: 82]
  ------------------
 1510|    330|                dav1d_log(c, "Unknown Metadata OBU type %d\n", meta_type);
  ------------------
  |  |   44|    330|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 330]
  |  |  ------------------
  ------------------
 1511|    412|            break;
 1512|  5.43k|        }
 1513|       |
 1514|  5.06k|        break;
 1515|  5.43k|    }
 1516|  43.9k|    case DAV1D_OBU_TD:
  ------------------
  |  Branch (1516:5): [True: 43.9k, False: 369k]
  ------------------
 1517|  43.9k|        c->frame_flags |= PICTURE_FLAG_NEW_TEMPORAL_UNIT;
 1518|  43.9k|        break;
 1519|    374|    case DAV1D_OBU_PADDING:
  ------------------
  |  Branch (1519:5): [True: 374, False: 412k]
  ------------------
 1520|       |        // ignore OBUs we don't care about
 1521|    374|        break;
 1522|  6.53k|    default:
  ------------------
  |  Branch (1522:5): [True: 6.53k, False: 406k]
  ------------------
 1523|       |        // print a warning but don't fail for unknown types
 1524|  6.53k|        dav1d_log(c, "Unknown OBU type %d of size %td\n", type, gb.ptr_end - gb.ptr);
  ------------------
  |  |   44|  6.53k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 6.53k]
  |  |  ------------------
  ------------------
 1525|  6.53k|        break;
 1526|   413k|    }
 1527|       |
 1528|   381k|    if (c->seq_hdr && c->frame_hdr) {
  ------------------
  |  Branch (1528:9): [True: 379k, False: 2.31k]
  |  Branch (1528:23): [True: 313k, False: 66.2k]
  ------------------
 1529|   313k|        if (c->frame_hdr->show_existing_frame) {
  ------------------
  |  Branch (1529:13): [True: 14.6k, False: 298k]
  ------------------
 1530|  14.6k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1530:17): [True: 251, False: 14.3k]
  ------------------
 1531|  14.3k|            switch (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type) {
 1532|  5.77k|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1532:13): [True: 5.77k, False: 8.58k]
  ------------------
 1533|  5.83k|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1533:13): [True: 67, False: 14.2k]
  ------------------
 1534|  5.83k|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE)
  ------------------
  |  Branch (1534:21): [True: 0, False: 5.83k]
  ------------------
 1535|      0|                    goto skip;
 1536|  5.83k|                break;
 1537|  5.83k|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1537:13): [True: 481, False: 13.8k]
  ------------------
 1538|    481|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA)
  ------------------
  |  Branch (1538:21): [True: 0, False: 481]
  ------------------
 1539|      0|                    goto skip;
 1540|       |                // fall-through
 1541|  8.51k|            default:
  ------------------
  |  Branch (1541:13): [True: 8.03k, False: 6.32k]
  ------------------
 1542|  8.51k|                break;
 1543|  14.3k|            }
 1544|  14.3k|            if (!c->refs[c->frame_hdr->existing_frame_idx].p.p.data[0]) goto error;
  ------------------
  |  Branch (1544:17): [True: 0, False: 14.3k]
  ------------------
 1545|  14.3k|            if (c->strict_std_compliance &&
  ------------------
  |  Branch (1545:17): [True: 0, False: 14.3k]
  ------------------
 1546|      0|                !c->refs[c->frame_hdr->existing_frame_idx].p.showable)
  ------------------
  |  Branch (1546:17): [True: 0, False: 0]
  ------------------
 1547|      0|            {
 1548|      0|                goto error;
 1549|      0|            }
 1550|  14.3k|            if (c->n_fc == 1) {
  ------------------
  |  Branch (1550:17): [True: 0, False: 14.3k]
  ------------------
 1551|      0|                dav1d_thread_picture_ref(&c->out,
 1552|      0|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1553|      0|                dav1d_picture_copy_props(&c->out.p,
 1554|      0|                                         c->content_light, c->content_light_ref,
 1555|      0|                                         c->mastering_display, c->mastering_display_ref,
 1556|      0|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1557|      0|                                         &in->m);
 1558|       |                // Must be removed from the context after being attached to the frame
 1559|      0|                dav1d_ref_dec(&c->itut_t35_ref);
 1560|      0|                c->itut_t35 = NULL;
 1561|      0|                c->n_itut_t35 = 0;
 1562|      0|                c->event_flags |= dav1d_picture_get_event_flags(&c->refs[c->frame_hdr->existing_frame_idx].p);
 1563|  14.3k|            } else {
 1564|  14.3k|                pthread_mutex_lock(&c->task_thread.lock);
 1565|       |                // need to append this to the frame output queue
 1566|  14.3k|                const unsigned next = c->frame_thread.next++;
 1567|  14.3k|                if (c->frame_thread.next == c->n_fc)
  ------------------
  |  Branch (1567:21): [True: 3.51k, False: 10.8k]
  ------------------
 1568|  3.51k|                    c->frame_thread.next = 0;
 1569|       |
 1570|  14.3k|                Dav1dFrameContext *const f = &c->fc[next];
 1571|  15.9k|                while (f->n_tile_data > 0)
  ------------------
  |  Branch (1571:24): [True: 1.54k, False: 14.3k]
  ------------------
 1572|  1.54k|                    pthread_cond_wait(&f->task_thread.cond,
 1573|  1.54k|                                      &f->task_thread.ttd->lock);
 1574|  14.3k|                Dav1dThreadPicture *const out_delayed =
 1575|  14.3k|                    &c->frame_thread.out_delayed[next];
 1576|  14.3k|                if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
  ------------------
  |  Branch (1576:21): [True: 13.5k, False: 758]
  |  Branch (1576:47): [True: 216, False: 542]
  ------------------
 1577|  13.8k|                    unsigned first = atomic_load(&c->task_thread.first);
 1578|  13.8k|                    if (first + 1U < c->n_fc)
  ------------------
  |  Branch (1578:25): [True: 10.4k, False: 3.36k]
  ------------------
 1579|  13.8k|                        atomic_fetch_add(&c->task_thread.first, 1U);
 1580|  3.36k|                    else
 1581|  13.8k|                        atomic_store(&c->task_thread.first, 0);
 1582|  13.8k|                    atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
 1583|  13.8k|                                                   &first, UINT_MAX);
 1584|  13.8k|                    if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
  ------------------
  |  Branch (1584:25): [True: 13.5k, False: 249]
  |  Branch (1584:47): [True: 6.42k, False: 7.13k]
  ------------------
 1585|  6.42k|                        c->task_thread.cur--;
 1586|  13.8k|                }
 1587|  14.3k|                const int error = f->task_thread.retval;
 1588|  14.3k|                if (error) {
  ------------------
  |  Branch (1588:21): [True: 1.13k, False: 13.2k]
  ------------------
 1589|  1.13k|                    c->cached_error = error;
 1590|  1.13k|                    f->task_thread.retval = 0;
 1591|  1.13k|                    dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
 1592|  1.13k|                    dav1d_thread_picture_unref(out_delayed);
 1593|  13.2k|                } else if (out_delayed->p.data[0]) {
  ------------------
  |  Branch (1593:28): [True: 12.4k, False: 758]
  ------------------
 1594|  12.4k|                    const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
 1595|  12.4k|                                                                   memory_order_relaxed);
 1596|  12.4k|                    if ((out_delayed->visible || c->output_invisible_frames) &&
  ------------------
  |  Branch (1596:26): [True: 12.1k, False: 272]
  |  Branch (1596:50): [True: 0, False: 272]
  ------------------
 1597|  12.1k|                        progress != FRAME_ERROR)
  ------------------
  |  |   35|  12.1k|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (1597:25): [True: 6.47k, False: 5.71k]
  ------------------
 1598|  6.47k|                    {
 1599|  6.47k|                        dav1d_thread_picture_ref(&c->out, out_delayed);
 1600|  6.47k|                        c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
 1601|  6.47k|                    }
 1602|  12.4k|                    dav1d_thread_picture_unref(out_delayed);
 1603|  12.4k|                }
 1604|  14.3k|                dav1d_thread_picture_ref(out_delayed,
 1605|  14.3k|                                         &c->refs[c->frame_hdr->existing_frame_idx].p);
 1606|  14.3k|                out_delayed->visible = 1;
 1607|  14.3k|                dav1d_picture_copy_props(&out_delayed->p,
 1608|  14.3k|                                         c->content_light, c->content_light_ref,
 1609|  14.3k|                                         c->mastering_display, c->mastering_display_ref,
 1610|  14.3k|                                         c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
 1611|  14.3k|                                         &in->m);
 1612|       |                // Must be removed from the context after being attached to the frame
 1613|  14.3k|                dav1d_ref_dec(&c->itut_t35_ref);
 1614|  14.3k|                c->itut_t35 = NULL;
 1615|  14.3k|                c->n_itut_t35 = 0;
 1616|       |
 1617|  14.3k|                pthread_mutex_unlock(&c->task_thread.lock);
 1618|  14.3k|            }
 1619|  14.3k|            if (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY) {
  ------------------
  |  Branch (1619:17): [True: 8.03k, False: 6.32k]
  ------------------
 1620|  8.03k|                const int r = c->frame_hdr->existing_frame_idx;
 1621|  8.03k|                c->refs[r].p.showable = 0;
 1622|  72.3k|                for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1622:33): [True: 64.2k, False: 8.03k]
  ------------------
 1623|  64.2k|                    if (i == r) continue;
  ------------------
  |  Branch (1623:25): [True: 8.03k, False: 56.2k]
  ------------------
 1624|       |
 1625|  56.2k|                    if (c->refs[i].p.p.frame_hdr)
  ------------------
  |  Branch (1625:25): [True: 56.0k, False: 164]
  ------------------
 1626|  56.0k|                        dav1d_thread_picture_unref(&c->refs[i].p);
 1627|  56.2k|                    dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
 1628|       |
 1629|  56.2k|                    dav1d_cdf_thread_unref(&c->cdf[i]);
 1630|  56.2k|                    dav1d_cdf_thread_ref(&c->cdf[i], &c->cdf[r]);
 1631|       |
 1632|  56.2k|                    dav1d_ref_dec(&c->refs[i].segmap);
 1633|  56.2k|                    c->refs[i].segmap = c->refs[r].segmap;
 1634|  56.2k|                    if (c->refs[r].segmap)
  ------------------
  |  Branch (1634:25): [True: 8.84k, False: 47.3k]
  ------------------
 1635|  8.84k|                        dav1d_ref_inc(c->refs[r].segmap);
 1636|  56.2k|                    dav1d_ref_dec(&c->refs[i].refmvs);
 1637|  56.2k|                }
 1638|  8.03k|            }
 1639|  14.3k|            c->frame_hdr = NULL;
 1640|   298k|        } else if (c->n_tiles == c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows) {
  ------------------
  |  Branch (1640:20): [True: 285k, False: 13.0k]
  ------------------
 1641|   285k|            switch (c->frame_hdr->frame_type) {
 1642|  84.7k|            case DAV1D_FRAME_TYPE_INTER:
  ------------------
  |  Branch (1642:13): [True: 84.7k, False: 200k]
  ------------------
 1643|  85.5k|            case DAV1D_FRAME_TYPE_SWITCH:
  ------------------
  |  Branch (1643:13): [True: 772, False: 284k]
  ------------------
 1644|  85.5k|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_REFERENCE ||
  ------------------
  |  Branch (1644:21): [True: 0, False: 85.5k]
  ------------------
 1645|  85.5k|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1645:22): [True: 0, False: 85.5k]
  ------------------
 1646|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1646:22): [True: 0, False: 0]
  ------------------
 1647|      0|                    goto skip;
 1648|  85.5k|                break;
 1649|  85.5k|            case DAV1D_FRAME_TYPE_INTRA:
  ------------------
  |  Branch (1649:13): [True: 749, False: 284k]
  ------------------
 1650|    749|                if (c->decode_frame_type > DAV1D_DECODEFRAMETYPE_INTRA ||
  ------------------
  |  Branch (1650:21): [True: 0, False: 749]
  ------------------
 1651|    749|                    (c->decode_frame_type == DAV1D_DECODEFRAMETYPE_REFERENCE &&
  ------------------
  |  Branch (1651:22): [True: 0, False: 749]
  ------------------
 1652|      0|                     !c->frame_hdr->refresh_frame_flags))
  ------------------
  |  Branch (1652:22): [True: 0, False: 0]
  ------------------
 1653|      0|                    goto skip;
 1654|       |                // fall-through
 1655|   199k|            default:
  ------------------
  |  Branch (1655:13): [True: 199k, False: 86.2k]
  ------------------
 1656|   199k|                break;
 1657|   285k|            }
 1658|   285k|            if (!c->n_tile_data)
  ------------------
  |  Branch (1658:17): [True: 0, False: 285k]
  ------------------
 1659|      0|                goto error;
 1660|   285k|            if ((res = dav1d_submit_frame(c)) < 0)
  ------------------
  |  Branch (1660:17): [True: 3.24k, False: 282k]
  ------------------
 1661|  3.24k|                return res;
 1662|   285k|            assert(!c->n_tile_data);
  ------------------
  |  Branch (1662:13): [True: 282k, False: 0]
  ------------------
 1663|   282k|            c->frame_hdr = NULL;
 1664|   282k|            c->n_tiles = 0;
 1665|   282k|        }
 1666|   313k|    }
 1667|       |
 1668|   378k|    return gb.ptr_end - gb.ptr_start;
 1669|       |
 1670|      0|skip:
 1671|       |    // update refs with only the headers in case we skip the frame
 1672|      0|    for (int i = 0; i < 8; i++) {
  ------------------
  |  Branch (1672:21): [True: 0, False: 0]
  ------------------
 1673|      0|        if (c->frame_hdr->refresh_frame_flags & (1 << i)) {
  ------------------
  |  Branch (1673:13): [True: 0, False: 0]
  ------------------
 1674|      0|            dav1d_thread_picture_unref(&c->refs[i].p);
 1675|      0|            c->refs[i].p.p.frame_hdr = c->frame_hdr;
 1676|      0|            c->refs[i].p.p.seq_hdr = c->seq_hdr;
 1677|      0|            c->refs[i].p.p.frame_hdr_ref = c->frame_hdr_ref;
 1678|      0|            c->refs[i].p.p.seq_hdr_ref = c->seq_hdr_ref;
 1679|      0|            dav1d_ref_inc(c->frame_hdr_ref);
 1680|      0|            dav1d_ref_inc(c->seq_hdr_ref);
 1681|      0|        }
 1682|      0|    }
 1683|       |
 1684|      0|    dav1d_ref_dec(&c->frame_hdr_ref);
 1685|      0|    c->frame_hdr = NULL;
 1686|      0|    c->n_tiles = 0;
 1687|       |
 1688|      0|    return gb.ptr_end - gb.ptr_start;
 1689|       |
 1690|  37.3k|error:
 1691|  37.3k|    dav1d_data_props_copy(&c->cached_error_props, &in->m);
 1692|  37.3k|    dav1d_log(c, gb.error ? "Overrun in OBU bit buffer\n" :
  ------------------
  |  |   44|  37.3k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 37.3k]
  |  |  ------------------
  ------------------
 1693|  37.3k|                            "Error parsing OBU data\n");
 1694|  37.3k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  37.3k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1695|   381k|}
obu.c:parse_seq_hdr:
   75|  37.0k|{
   76|  37.0k|#define DEBUG_SEQ_HDR 0
   77|       |
   78|       |#if DEBUG_SEQ_HDR
   79|       |    const unsigned init_bit_pos = dav1d_get_bits_pos(gb);
   80|       |#endif
   81|       |
   82|  37.0k|    memset(hdr, 0, sizeof(*hdr));
   83|  37.0k|    hdr->profile = dav1d_get_bits(gb, 3);
   84|  37.0k|    if (hdr->profile > 2) goto error;
  ------------------
  |  Branch (84:9): [True: 396, False: 36.6k]
  ------------------
   85|       |#if DEBUG_SEQ_HDR
   86|       |    printf("SEQHDR: post-profile: off=%u\n",
   87|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   88|       |#endif
   89|       |
   90|  36.6k|    hdr->still_picture = dav1d_get_bit(gb);
   91|  36.6k|    hdr->reduced_still_picture_header = dav1d_get_bit(gb);
   92|  36.6k|    if (hdr->reduced_still_picture_header && !hdr->still_picture) goto error;
  ------------------
  |  Branch (92:9): [True: 21.3k, False: 15.3k]
  |  Branch (92:46): [True: 323, False: 20.9k]
  ------------------
   93|       |#if DEBUG_SEQ_HDR
   94|       |    printf("SEQHDR: post-stillpicture_flags: off=%u\n",
   95|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
   96|       |#endif
   97|       |
   98|  36.3k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (98:9): [True: 20.9k, False: 15.3k]
  ------------------
   99|  20.9k|        hdr->num_operating_points = 1;
  100|  20.9k|        hdr->operating_points[0].major_level = dav1d_get_bits(gb, 3);
  101|  20.9k|        hdr->operating_points[0].minor_level = dav1d_get_bits(gb, 2);
  102|  20.9k|        hdr->operating_points[0].initial_display_delay = 10;
  103|  20.9k|    } else {
  104|  15.3k|        hdr->timing_info_present = dav1d_get_bit(gb);
  105|  15.3k|        if (hdr->timing_info_present) {
  ------------------
  |  Branch (105:13): [True: 1.79k, False: 13.5k]
  ------------------
  106|  1.79k|            hdr->num_units_in_tick = dav1d_get_bits(gb, 32);
  107|  1.79k|            hdr->time_scale = dav1d_get_bits(gb, 32);
  108|  1.79k|            if (strict_std_compliance && (!hdr->num_units_in_tick || !hdr->time_scale))
  ------------------
  |  Branch (108:17): [True: 0, False: 1.79k]
  |  Branch (108:43): [True: 0, False: 0]
  |  Branch (108:70): [True: 0, False: 0]
  ------------------
  109|      0|                goto error;
  110|  1.79k|            hdr->equal_picture_interval = dav1d_get_bit(gb);
  111|  1.79k|            if (hdr->equal_picture_interval) {
  ------------------
  |  Branch (111:17): [True: 660, False: 1.13k]
  ------------------
  112|    660|                const unsigned num_ticks_per_picture = dav1d_get_vlc(gb);
  113|    660|                if (num_ticks_per_picture == UINT32_MAX)
  ------------------
  |  Branch (113:21): [True: 67, False: 593]
  ------------------
  114|     67|                    goto error;
  115|    593|                hdr->num_ticks_per_picture = num_ticks_per_picture + 1;
  116|    593|            }
  117|       |
  118|  1.72k|            hdr->decoder_model_info_present = dav1d_get_bit(gb);
  119|  1.72k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (119:17): [True: 1.17k, False: 555]
  ------------------
  120|  1.17k|                hdr->encoder_decoder_buffer_delay_length = dav1d_get_bits(gb, 5) + 1;
  121|  1.17k|                hdr->num_units_in_decoding_tick = dav1d_get_bits(gb, 32);
  122|  1.17k|                if (strict_std_compliance && !hdr->num_units_in_decoding_tick)
  ------------------
  |  Branch (122:21): [True: 0, False: 1.17k]
  |  Branch (122:46): [True: 0, False: 0]
  ------------------
  123|      0|                    goto error;
  124|  1.17k|                hdr->buffer_removal_delay_length = dav1d_get_bits(gb, 5) + 1;
  125|  1.17k|                hdr->frame_presentation_delay_length = dav1d_get_bits(gb, 5) + 1;
  126|  1.17k|            }
  127|  1.72k|        }
  128|       |#if DEBUG_SEQ_HDR
  129|       |        printf("SEQHDR: post-timinginfo: off=%u\n",
  130|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  131|       |#endif
  132|       |
  133|  15.2k|        hdr->display_model_info_present = dav1d_get_bit(gb);
  134|  15.2k|        hdr->num_operating_points = dav1d_get_bits(gb, 5) + 1;
  135|  45.3k|        for (int i = 0; i < hdr->num_operating_points; i++) {
  ------------------
  |  Branch (135:25): [True: 31.7k, False: 13.6k]
  ------------------
  136|  31.7k|            struct Dav1dSequenceHeaderOperatingPoint *const op =
  137|  31.7k|                &hdr->operating_points[i];
  138|  31.7k|            op->idc = dav1d_get_bits(gb, 12);
  139|  31.7k|            if (op->idc && (!(op->idc & 0xff) || !(op->idc & 0xf00)))
  ------------------
  |  Branch (139:17): [True: 18.7k, False: 13.0k]
  |  Branch (139:29): [True: 1.30k, False: 17.4k]
  |  Branch (139:50): [True: 296, False: 17.1k]
  ------------------
  140|  1.60k|                goto error;
  141|  30.1k|            op->major_level = 2 + dav1d_get_bits(gb, 3);
  142|  30.1k|            op->minor_level = dav1d_get_bits(gb, 2);
  143|  30.1k|            if (op->major_level > 3)
  ------------------
  |  Branch (143:17): [True: 7.29k, False: 22.8k]
  ------------------
  144|  7.29k|                op->tier = dav1d_get_bit(gb);
  145|  30.1k|            if (hdr->decoder_model_info_present) {
  ------------------
  |  Branch (145:17): [True: 5.06k, False: 25.0k]
  ------------------
  146|  5.06k|                op->decoder_model_param_present = dav1d_get_bit(gb);
  147|  5.06k|                if (op->decoder_model_param_present) {
  ------------------
  |  Branch (147:21): [True: 2.14k, False: 2.91k]
  ------------------
  148|  2.14k|                    struct Dav1dSequenceHeaderOperatingParameterInfo *const opi =
  149|  2.14k|                        &hdr->operating_parameter_info[i];
  150|  2.14k|                    opi->decoder_buffer_delay =
  151|  2.14k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  152|  2.14k|                    opi->encoder_buffer_delay =
  153|  2.14k|                        dav1d_get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
  154|  2.14k|                    opi->low_delay_mode = dav1d_get_bit(gb);
  155|  2.14k|                }
  156|  5.06k|            }
  157|  30.1k|            if (hdr->display_model_info_present)
  ------------------
  |  Branch (157:17): [True: 5.56k, False: 24.5k]
  ------------------
  158|  5.56k|                op->display_model_param_present = dav1d_get_bit(gb);
  159|  30.1k|            op->initial_display_delay =
  160|  30.1k|                op->display_model_param_present ? dav1d_get_bits(gb, 4) + 1 : 10;
  ------------------
  |  Branch (160:17): [True: 1.28k, False: 28.8k]
  ------------------
  161|  30.1k|        }
  162|       |#if DEBUG_SEQ_HDR
  163|       |        printf("SEQHDR: post-operating-points: off=%u\n",
  164|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  165|       |#endif
  166|  15.2k|    }
  167|       |
  168|  34.6k|    hdr->width_n_bits = dav1d_get_bits(gb, 4) + 1;
  169|  34.6k|    hdr->height_n_bits = dav1d_get_bits(gb, 4) + 1;
  170|  34.6k|    hdr->max_width = dav1d_get_bits(gb, hdr->width_n_bits) + 1;
  171|  34.6k|    hdr->max_height = dav1d_get_bits(gb, hdr->height_n_bits) + 1;
  172|       |#if DEBUG_SEQ_HDR
  173|       |    printf("SEQHDR: post-size: off=%u\n",
  174|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  175|       |#endif
  176|  34.6k|    if (!hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (176:9): [True: 13.6k, False: 20.9k]
  ------------------
  177|  13.6k|        hdr->frame_id_numbers_present = dav1d_get_bit(gb);
  178|  13.6k|        if (hdr->frame_id_numbers_present) {
  ------------------
  |  Branch (178:13): [True: 1.24k, False: 12.4k]
  ------------------
  179|  1.24k|            hdr->delta_frame_id_n_bits = dav1d_get_bits(gb, 4) + 2;
  180|  1.24k|            hdr->frame_id_n_bits = dav1d_get_bits(gb, 3) + hdr->delta_frame_id_n_bits + 1;
  181|  1.24k|        }
  182|  13.6k|    }
  183|       |#if DEBUG_SEQ_HDR
  184|       |    printf("SEQHDR: post-frame-id-numbers-present: off=%u\n",
  185|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  186|       |#endif
  187|       |
  188|  34.6k|    hdr->sb128 = dav1d_get_bit(gb);
  189|  34.6k|    hdr->filter_intra = dav1d_get_bit(gb);
  190|  34.6k|    hdr->intra_edge_filter = dav1d_get_bit(gb);
  191|  34.6k|    if (hdr->reduced_still_picture_header) {
  ------------------
  |  Branch (191:9): [True: 20.9k, False: 13.6k]
  ------------------
  192|  20.9k|        hdr->screen_content_tools = DAV1D_ADAPTIVE;
  193|  20.9k|        hdr->force_integer_mv = DAV1D_ADAPTIVE;
  194|  20.9k|    } else {
  195|  13.6k|        hdr->inter_intra = dav1d_get_bit(gb);
  196|  13.6k|        hdr->masked_compound = dav1d_get_bit(gb);
  197|  13.6k|        hdr->warped_motion = dav1d_get_bit(gb);
  198|  13.6k|        hdr->dual_filter = dav1d_get_bit(gb);
  199|  13.6k|        hdr->order_hint = dav1d_get_bit(gb);
  200|  13.6k|        if (hdr->order_hint) {
  ------------------
  |  Branch (200:13): [True: 9.10k, False: 4.54k]
  ------------------
  201|  9.10k|            hdr->jnt_comp = dav1d_get_bit(gb);
  202|  9.10k|            hdr->ref_frame_mvs = dav1d_get_bit(gb);
  203|  9.10k|        }
  204|  13.6k|        hdr->screen_content_tools = dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb);
  ------------------
  |  Branch (204:37): [True: 9.13k, False: 4.50k]
  ------------------
  205|       |    #if DEBUG_SEQ_HDR
  206|       |        printf("SEQHDR: post-screentools: off=%u\n",
  207|       |               dav1d_get_bits_pos(gb) - init_bit_pos);
  208|       |    #endif
  209|  13.6k|        hdr->force_integer_mv = hdr->screen_content_tools ?
  ------------------
  |  Branch (209:33): [True: 10.9k, False: 2.68k]
  ------------------
  210|  10.9k|                                dav1d_get_bit(gb) ? DAV1D_ADAPTIVE : dav1d_get_bit(gb) : 2;
  ------------------
  |  Branch (210:33): [True: 5.80k, False: 5.16k]
  ------------------
  211|  13.6k|        if (hdr->order_hint)
  ------------------
  |  Branch (211:13): [True: 9.10k, False: 4.54k]
  ------------------
  212|  9.10k|            hdr->order_hint_n_bits = dav1d_get_bits(gb, 3) + 1;
  213|  13.6k|    }
  214|  34.6k|    hdr->super_res = dav1d_get_bit(gb);
  215|  34.6k|    hdr->cdef = dav1d_get_bit(gb);
  216|  34.6k|    hdr->restoration = dav1d_get_bit(gb);
  217|       |#if DEBUG_SEQ_HDR
  218|       |    printf("SEQHDR: post-featurebits: off=%u\n",
  219|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  220|       |#endif
  221|       |
  222|  34.6k|    hdr->hbd = dav1d_get_bit(gb);
  223|  34.6k|    if (hdr->profile == 2 && hdr->hbd)
  ------------------
  |  Branch (223:9): [True: 15.4k, False: 19.1k]
  |  Branch (223:30): [True: 11.8k, False: 3.60k]
  ------------------
  224|  11.8k|        hdr->hbd += dav1d_get_bit(gb);
  225|  34.6k|    if (hdr->profile != 1)
  ------------------
  |  Branch (225:9): [True: 26.1k, False: 8.48k]
  ------------------
  226|  26.1k|        hdr->monochrome = dav1d_get_bit(gb);
  227|  34.6k|    hdr->color_description_present = dav1d_get_bit(gb);
  228|  34.6k|    if (hdr->color_description_present) {
  ------------------
  |  Branch (228:9): [True: 4.72k, False: 29.9k]
  ------------------
  229|  4.72k|        hdr->pri = dav1d_get_bits(gb, 8);
  230|  4.72k|        hdr->trc = dav1d_get_bits(gb, 8);
  231|  4.72k|        hdr->mtrx = dav1d_get_bits(gb, 8);
  232|  29.9k|    } else {
  233|  29.9k|        hdr->pri = DAV1D_COLOR_PRI_UNKNOWN;
  234|  29.9k|        hdr->trc = DAV1D_TRC_UNKNOWN;
  235|  29.9k|        hdr->mtrx = DAV1D_MC_UNKNOWN;
  236|  29.9k|    }
  237|  34.6k|    if (hdr->monochrome) {
  ------------------
  |  Branch (237:9): [True: 9.88k, False: 24.7k]
  ------------------
  238|  9.88k|        hdr->color_range = dav1d_get_bit(gb);
  239|  9.88k|        hdr->layout = DAV1D_PIXEL_LAYOUT_I400;
  240|  9.88k|        hdr->ss_hor = hdr->ss_ver = 1;
  241|  9.88k|        hdr->chr = DAV1D_CHR_UNKNOWN;
  242|  24.7k|    } else if (hdr->pri == DAV1D_COLOR_PRI_BT709 &&
  ------------------
  |  Branch (242:16): [True: 1.17k, False: 23.5k]
  ------------------
  243|  1.17k|               hdr->trc == DAV1D_TRC_SRGB &&
  ------------------
  |  Branch (243:16): [True: 947, False: 225]
  ------------------
  244|    947|               hdr->mtrx == DAV1D_MC_IDENTITY)
  ------------------
  |  Branch (244:16): [True: 778, False: 169]
  ------------------
  245|    778|    {
  246|    778|        hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  247|    778|        hdr->color_range = 1;
  248|    778|        if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->hbd == 2))
  ------------------
  |  Branch (248:13): [True: 579, False: 199]
  |  Branch (248:36): [True: 439, False: 140]
  |  Branch (248:57): [True: 238, False: 201]
  ------------------
  249|    341|            goto error;
  250|  23.9k|    } else {
  251|  23.9k|        hdr->color_range = dav1d_get_bit(gb);
  252|  23.9k|        switch (hdr->profile) {
  ------------------
  |  Branch (252:17): [True: 23.9k, False: 0]
  ------------------
  253|  7.58k|        case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (253:9): [True: 7.58k, False: 16.3k]
  ------------------
  254|  7.58k|                hdr->ss_hor = hdr->ss_ver = 1;
  255|  7.58k|                break;
  256|  8.29k|        case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (256:9): [True: 8.29k, False: 15.6k]
  ------------------
  257|  8.29k|                break;
  258|  8.09k|        case 2:
  ------------------
  |  Branch (258:9): [True: 8.09k, False: 15.8k]
  ------------------
  259|  8.09k|            if (hdr->hbd == 2) {
  ------------------
  |  Branch (259:17): [True: 5.46k, False: 2.63k]
  ------------------
  260|  5.46k|                hdr->ss_hor = dav1d_get_bit(gb);
  261|  5.46k|                if (hdr->ss_hor)
  ------------------
  |  Branch (261:21): [True: 1.13k, False: 4.33k]
  ------------------
  262|  1.13k|                    hdr->ss_ver = dav1d_get_bit(gb);
  263|  5.46k|            } else
  264|  2.63k|                hdr->ss_hor = 1;
  265|  8.09k|            hdr->layout = hdr->ss_hor ?
  ------------------
  |  Branch (265:27): [True: 3.76k, False: 4.33k]
  ------------------
  266|  3.76k|                          hdr->ss_ver ? DAV1D_PIXEL_LAYOUT_I420 :
  ------------------
  |  Branch (266:27): [True: 1.06k, False: 2.70k]
  ------------------
  267|  3.76k|                                        DAV1D_PIXEL_LAYOUT_I422 :
  268|  8.09k|                                        DAV1D_PIXEL_LAYOUT_I444;
  269|  8.09k|            break;
  270|  23.9k|        }
  271|  23.9k|        hdr->chr = (hdr->ss_hor & hdr->ss_ver) ?
  ------------------
  |  Branch (271:20): [True: 8.64k, False: 15.3k]
  ------------------
  272|  15.3k|                   dav1d_get_bits(gb, 2) : DAV1D_CHR_UNKNOWN;
  273|  23.9k|    }
  274|  34.2k|    if (strict_std_compliance &&
  ------------------
  |  Branch (274:9): [True: 0, False: 34.2k]
  ------------------
  275|      0|        hdr->mtrx == DAV1D_MC_IDENTITY && hdr->layout != DAV1D_PIXEL_LAYOUT_I444)
  ------------------
  |  Branch (275:9): [True: 0, False: 0]
  |  Branch (275:43): [True: 0, False: 0]
  ------------------
  276|      0|    {
  277|      0|        goto error;
  278|      0|    }
  279|  34.2k|    if (!hdr->monochrome)
  ------------------
  |  Branch (279:9): [True: 24.4k, False: 9.88k]
  ------------------
  280|  24.4k|        hdr->separate_uv_delta_q = dav1d_get_bit(gb);
  281|       |#if DEBUG_SEQ_HDR
  282|       |    printf("SEQHDR: post-colorinfo: off=%u\n",
  283|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  284|       |#endif
  285|       |
  286|  34.2k|    hdr->film_grain_present = dav1d_get_bit(gb);
  287|       |#if DEBUG_SEQ_HDR
  288|       |    printf("SEQHDR: post-filmgrain: off=%u\n",
  289|       |           dav1d_get_bits_pos(gb) - init_bit_pos);
  290|       |#endif
  291|       |
  292|       |    // We needn't bother flushing the OBU here: we'll check we didn't
  293|       |    // overrun in the caller and will then discard gb, so there's no
  294|       |    // point in setting its position properly.
  295|       |
  296|  34.2k|    return check_trailing_bits(gb, strict_std_compliance);
  297|       |
  298|  2.73k|error:
  299|  2.73k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  2.73k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  300|  34.2k|}
obu.c:parse_frame_hdr:
  409|   328k|static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb) {
  410|   328k|#define DEBUG_FRAME_HDR 0
  411|       |
  412|       |#if DEBUG_FRAME_HDR
  413|       |    const uint8_t *const init_ptr = gb->ptr;
  414|       |#endif
  415|   328k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  416|   328k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  417|       |
  418|   328k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (418:9): [True: 128k, False: 200k]
  ------------------
  419|   128k|        hdr->show_existing_frame = dav1d_get_bit(gb);
  420|       |#if DEBUG_FRAME_HDR
  421|       |    printf("HDR: post-show_existing_frame: off=%td\n",
  422|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  423|       |#endif
  424|   328k|    if (hdr->show_existing_frame) {
  ------------------
  |  Branch (424:9): [True: 15.8k, False: 312k]
  ------------------
  425|  15.8k|        hdr->existing_frame_idx = dav1d_get_bits(gb, 3);
  426|  15.8k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (426:13): [True: 521, False: 15.3k]
  |  Branch (426:51): [True: 202, False: 319]
  ------------------
  427|    202|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  428|  15.8k|        if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (428:13): [True: 855, False: 15.0k]
  ------------------
  429|    855|            hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  430|    855|            Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->existing_frame_idx].p.p.frame_hdr;
  431|    855|            if (!ref_frame_hdr || ref_frame_hdr->frame_id != hdr->frame_id) goto error;
  ------------------
  |  Branch (431:17): [True: 256, False: 599]
  |  Branch (431:35): [True: 231, False: 368]
  ------------------
  432|    855|        }
  433|  15.3k|        return 0;
  434|  15.8k|    }
  435|       |
  436|   312k|    if (seqhdr->reduced_still_picture_header) {
  ------------------
  |  Branch (436:9): [True: 200k, False: 112k]
  ------------------
  437|   200k|        hdr->frame_type = DAV1D_FRAME_TYPE_KEY;
  438|   200k|        hdr->show_frame = 1;
  439|   200k|    } else {
  440|   112k|        hdr->frame_type = dav1d_get_bits(gb, 2);
  441|   112k|        hdr->show_frame = dav1d_get_bit(gb);
  442|   112k|    }
  443|   312k|    if (hdr->show_frame) {
  ------------------
  |  Branch (443:9): [True: 240k, False: 72.4k]
  ------------------
  444|   240k|        if (seqhdr->decoder_model_info_present && !seqhdr->equal_picture_interval)
  ------------------
  |  Branch (444:13): [True: 6.64k, False: 233k]
  |  Branch (444:51): [True: 6.29k, False: 349]
  ------------------
  445|  6.29k|            hdr->frame_presentation_delay = dav1d_get_bits(gb, seqhdr->frame_presentation_delay_length);
  446|   240k|        hdr->showable_frame = hdr->frame_type != DAV1D_FRAME_TYPE_KEY;
  447|   240k|    } else
  448|  72.4k|        hdr->showable_frame = dav1d_get_bit(gb);
  449|   312k|    hdr->error_resilient_mode =
  450|   312k|        (hdr->frame_type == DAV1D_FRAME_TYPE_KEY && hdr->show_frame) ||
  ------------------
  |  Branch (450:10): [True: 212k, False: 100k]
  |  Branch (450:53): [True: 209k, False: 2.99k]
  ------------------
  451|   103k|        hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ||
  ------------------
  |  Branch (451:9): [True: 3.07k, False: 100k]
  ------------------
  452|   100k|        seqhdr->reduced_still_picture_header || dav1d_get_bit(gb);
  ------------------
  |  Branch (452:9): [True: 0, False: 100k]
  |  Branch (452:49): [True: 2.41k, False: 97.6k]
  ------------------
  453|       |#if DEBUG_FRAME_HDR
  454|       |    printf("HDR: post-frametype_bits: off=%td\n",
  455|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  456|       |#endif
  457|   312k|    hdr->disable_cdf_update = dav1d_get_bit(gb);
  458|   312k|    hdr->allow_screen_content_tools = seqhdr->screen_content_tools == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (458:39): [True: 274k, False: 38.0k]
  ------------------
  459|   274k|                                      dav1d_get_bit(gb) : seqhdr->screen_content_tools;
  460|   312k|    if (hdr->allow_screen_content_tools)
  ------------------
  |  Branch (460:9): [True: 237k, False: 75.3k]
  ------------------
  461|   237k|        hdr->force_integer_mv = seqhdr->force_integer_mv == DAV1D_ADAPTIVE ?
  ------------------
  |  Branch (461:33): [True: 185k, False: 52.0k]
  ------------------
  462|   185k|                                dav1d_get_bit(gb) : seqhdr->force_integer_mv;
  463|       |
  464|   312k|    if (IS_KEY_OR_INTRA(hdr))
  ------------------
  |  |   43|   312k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   312k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 214k, False: 98.7k]
  |  |  ------------------
  ------------------
  465|   214k|        hdr->force_integer_mv = 1;
  466|       |
  467|   312k|    if (seqhdr->frame_id_numbers_present)
  ------------------
  |  Branch (467:9): [True: 1.27k, False: 311k]
  ------------------
  468|  1.27k|        hdr->frame_id = dav1d_get_bits(gb, seqhdr->frame_id_n_bits);
  469|       |
  470|   312k|    if (!seqhdr->reduced_still_picture_header)
  ------------------
  |  Branch (470:9): [True: 112k, False: 200k]
  ------------------
  471|   112k|        hdr->frame_size_override = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 1 : dav1d_get_bit(gb);
  ------------------
  |  Branch (471:36): [True: 3.07k, False: 109k]
  ------------------
  472|       |#if DEBUG_FRAME_HDR
  473|       |    printf("HDR: post-frame_size_override_flag: off=%td\n",
  474|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  475|       |#endif
  476|   312k|    if (seqhdr->order_hint)
  ------------------
  |  Branch (476:9): [True: 100k, False: 212k]
  ------------------
  477|   100k|        hdr->frame_offset = dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  478|   312k|    hdr->primary_ref_frame = !hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) ?
  ------------------
  |  |   36|  97.6k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 95.3k, False: 2.29k]
  |  |  ------------------
  ------------------
  |  Branch (478:30): [True: 97.6k, False: 215k]
  ------------------
  479|   217k|                             dav1d_get_bits(gb, 3) : DAV1D_PRIMARY_REF_NONE;
  ------------------
  |  |   45|   530k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  480|       |
  481|   312k|    if (seqhdr->decoder_model_info_present) {
  ------------------
  |  Branch (481:9): [True: 6.90k, False: 306k]
  ------------------
  482|  6.90k|        hdr->buffer_removal_time_present = dav1d_get_bit(gb);
  483|  6.90k|        if (hdr->buffer_removal_time_present) {
  ------------------
  |  Branch (483:13): [True: 881, False: 6.02k]
  ------------------
  484|  4.51k|            for (int i = 0; i < c->seq_hdr->num_operating_points; i++) {
  ------------------
  |  Branch (484:29): [True: 3.63k, False: 881]
  ------------------
  485|  3.63k|                const struct Dav1dSequenceHeaderOperatingPoint *const seqop = &seqhdr->operating_points[i];
  486|  3.63k|                struct Dav1dFrameHeaderOperatingPoint *const op = &hdr->operating_points[i];
  487|  3.63k|                if (seqop->decoder_model_param_present) {
  ------------------
  |  Branch (487:21): [True: 1.91k, False: 1.71k]
  ------------------
  488|  1.91k|                    int in_temporal_layer = (seqop->idc >> hdr->temporal_id) & 1;
  489|  1.91k|                    int in_spatial_layer  = (seqop->idc >> (hdr->spatial_id + 8)) & 1;
  490|  1.91k|                    if (!seqop->idc || (in_temporal_layer && in_spatial_layer))
  ------------------
  |  Branch (490:25): [True: 327, False: 1.58k]
  |  Branch (490:41): [True: 715, False: 874]
  |  Branch (490:62): [True: 500, False: 215]
  ------------------
  491|    827|                        op->buffer_removal_time = dav1d_get_bits(gb, seqhdr->buffer_removal_delay_length);
  492|  1.91k|                }
  493|  3.63k|            }
  494|    881|        }
  495|  6.90k|    }
  496|       |
  497|   312k|    if (IS_KEY_OR_INTRA(hdr)) {
  ------------------
  |  |   43|   312k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   312k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 214k, False: 98.7k]
  |  |  ------------------
  ------------------
  498|   214k|        hdr->refresh_frame_flags = (hdr->frame_type == DAV1D_FRAME_TYPE_KEY &&
  ------------------
  |  Branch (498:37): [True: 212k, False: 1.45k]
  ------------------
  499|   212k|                                    hdr->show_frame) ? 0xff : dav1d_get_bits(gb, 8);
  ------------------
  |  Branch (499:37): [True: 209k, False: 2.99k]
  ------------------
  500|   214k|        if (hdr->refresh_frame_flags != 0xff && hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (500:13): [True: 4.40k, False: 209k]
  |  Branch (500:49): [True: 2.15k, False: 2.25k]
  |  Branch (500:78): [True: 1.09k, False: 1.05k]
  ------------------
  501|  9.86k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (501:29): [True: 8.76k, False: 1.09k]
  ------------------
  502|  8.76k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  503|   214k|        if (c->strict_std_compliance &&
  ------------------
  |  Branch (503:13): [True: 0, False: 214k]
  ------------------
  504|      0|            hdr->frame_type == DAV1D_FRAME_TYPE_INTRA && hdr->refresh_frame_flags == 0xff)
  ------------------
  |  Branch (504:13): [True: 0, False: 0]
  |  Branch (504:58): [True: 0, False: 0]
  ------------------
  505|      0|        {
  506|      0|            goto error;
  507|      0|        }
  508|   214k|        if (read_frame_size(c, gb, 0) < 0) goto error;
  ------------------
  |  Branch (508:13): [True: 0, False: 214k]
  ------------------
  509|   214k|        if (hdr->allow_screen_content_tools && !hdr->super_res.enabled)
  ------------------
  |  Branch (509:13): [True: 184k, False: 29.4k]
  |  Branch (509:48): [True: 182k, False: 2.31k]
  ------------------
  510|   182k|            hdr->allow_intrabc = dav1d_get_bit(gb);
  511|   214k|    } else {
  512|  98.7k|        hdr->refresh_frame_flags = hdr->frame_type == DAV1D_FRAME_TYPE_SWITCH ? 0xff :
  ------------------
  |  Branch (512:36): [True: 3.07k, False: 95.6k]
  ------------------
  513|  98.7k|                                   dav1d_get_bits(gb, 8);
  514|  98.7k|        if (hdr->error_resilient_mode && seqhdr->order_hint)
  ------------------
  |  Branch (514:13): [True: 3.34k, False: 95.3k]
  |  Branch (514:42): [True: 2.46k, False: 887]
  ------------------
  515|  22.1k|            for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (515:29): [True: 19.6k, False: 2.46k]
  ------------------
  516|  19.6k|                dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
  517|  98.7k|        if (seqhdr->order_hint) {
  ------------------
  |  Branch (517:13): [True: 90.9k, False: 7.80k]
  ------------------
  518|  90.9k|            hdr->frame_ref_short_signaling = dav1d_get_bit(gb);
  519|  90.9k|            if (hdr->frame_ref_short_signaling) {
  ------------------
  |  Branch (519:17): [True: 69.8k, False: 21.0k]
  ------------------
  520|  69.8k|                hdr->refidx[0] = dav1d_get_bits(gb, 3);
  521|  69.8k|                hdr->refidx[1] = hdr->refidx[2] = -1;
  522|  69.8k|                hdr->refidx[3] = dav1d_get_bits(gb, 3);
  523|       |
  524|       |                /* +1 allows for unconditional stores, as unused
  525|       |                 * values can be dumped into frame_offset[-1]. */
  526|  69.8k|                int frame_offset_mem[8+1];
  527|  69.8k|                int *const frame_offset = &frame_offset_mem[1];
  528|  69.8k|                int earliest_ref = -1;
  529|   626k|                for (int i = 0, earliest_offset = INT_MAX; i < 8; i++) {
  ------------------
  |  Branch (529:60): [True: 556k, False: 69.5k]
  ------------------
  530|   556k|                    const Dav1dFrameHeader *const refhdr = c->refs[i].p.p.frame_hdr;
  531|   556k|                    if (!refhdr) goto error;
  ------------------
  |  Branch (531:25): [True: 304, False: 556k]
  ------------------
  532|   556k|                    const int diff = get_poc_diff(seqhdr->order_hint_n_bits,
  533|   556k|                                                  refhdr->frame_offset,
  534|   556k|                                                  hdr->frame_offset);
  535|   556k|                    frame_offset[i] = diff;
  536|   556k|                    if (diff < earliest_offset) {
  ------------------
  |  Branch (536:25): [True: 107k, False: 449k]
  ------------------
  537|   107k|                        earliest_offset = diff;
  538|   107k|                        earliest_ref = i;
  539|   107k|                    }
  540|   556k|                }
  541|  69.5k|                frame_offset[hdr->refidx[0]] = INT_MIN; // = reference frame is used
  542|  69.5k|                frame_offset[hdr->refidx[3]] = INT_MIN;
  543|  69.5k|                assert(earliest_ref >= 0);
  ------------------
  |  Branch (543:17): [True: 69.5k, False: 0]
  ------------------
  544|       |
  545|  69.5k|                int refidx = -1;
  546|   626k|                for (int i = 0, latest_offset = 0; i < 8; i++) {
  ------------------
  |  Branch (546:52): [True: 556k, False: 69.5k]
  ------------------
  547|   556k|                    const int hint = frame_offset[i];
  548|   556k|                    if (hint >= latest_offset) {
  ------------------
  |  Branch (548:25): [True: 257k, False: 298k]
  ------------------
  549|   257k|                        latest_offset = hint;
  550|   257k|                        refidx = i;
  551|   257k|                    }
  552|   556k|                }
  553|  69.5k|                frame_offset[refidx] = INT_MIN;
  554|  69.5k|                hdr->refidx[6] = refidx;
  555|       |
  556|   208k|                for (int i = 4; i < 6; i++) {
  ------------------
  |  Branch (556:33): [True: 139k, False: 69.5k]
  ------------------
  557|       |                    /* Unsigned compares to handle negative values. */
  558|   139k|                    unsigned earliest_offset = UINT8_MAX;
  559|   139k|                    refidx = -1;
  560|  1.25M|                    for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (560:37): [True: 1.11M, False: 139k]
  ------------------
  561|  1.11M|                        const unsigned hint = frame_offset[j];
  562|  1.11M|                        if (hint < earliest_offset) {
  ------------------
  |  Branch (562:29): [True: 129k, False: 983k]
  ------------------
  563|   129k|                            earliest_offset = hint;
  564|   129k|                            refidx = j;
  565|   129k|                        }
  566|  1.11M|                    }
  567|   139k|                    frame_offset[refidx] = INT_MIN;
  568|   139k|                    hdr->refidx[i] = refidx;
  569|   139k|                }
  570|       |
  571|   486k|                for (int i = 1; i < 7; i++) {
  ------------------
  |  Branch (571:33): [True: 417k, False: 69.5k]
  ------------------
  572|   417k|                    refidx = hdr->refidx[i];
  573|   417k|                    if (refidx < 0) {
  ------------------
  |  Branch (573:25): [True: 164k, False: 252k]
  ------------------
  574|   164k|                        unsigned latest_offset = ~UINT8_MAX;
  575|  1.48M|                        for (int j = 0; j < 8; j++) {
  ------------------
  |  Branch (575:41): [True: 1.31M, False: 164k]
  ------------------
  576|  1.31M|                            const unsigned hint = frame_offset[j];
  577|  1.31M|                            if (hint >= latest_offset) {
  ------------------
  |  Branch (577:33): [True: 299k, False: 1.01M]
  ------------------
  578|   299k|                                latest_offset = hint;
  579|   299k|                                refidx = j;
  580|   299k|                            }
  581|  1.31M|                        }
  582|   164k|                        frame_offset[refidx] = INT_MIN;
  583|   164k|                        hdr->refidx[i] = refidx >= 0 ? refidx : earliest_ref;
  ------------------
  |  Branch (583:42): [True: 112k, False: 51.8k]
  ------------------
  584|   164k|                    }
  585|   417k|                }
  586|  69.5k|            }
  587|  90.9k|        }
  588|   781k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (588:25): [True: 683k, False: 97.5k]
  ------------------
  589|   683k|            if (!hdr->frame_ref_short_signaling)
  ------------------
  |  Branch (589:17): [True: 197k, False: 486k]
  ------------------
  590|   197k|                hdr->refidx[i] = dav1d_get_bits(gb, 3);
  591|   683k|            if (seqhdr->frame_id_numbers_present) {
  ------------------
  |  Branch (591:17): [True: 1.05k, False: 682k]
  ------------------
  592|  1.05k|                const unsigned delta_ref_frame_id = dav1d_get_bits(gb, seqhdr->delta_frame_id_n_bits) + 1;
  593|  1.05k|                const unsigned ref_frame_id = (hdr->frame_id + (1 << seqhdr->frame_id_n_bits) - delta_ref_frame_id) & ((1 << seqhdr->frame_id_n_bits) - 1);
  594|  1.05k|                Dav1dFrameHeader *const ref_frame_hdr = c->refs[hdr->refidx[i]].p.p.frame_hdr;
  595|  1.05k|                if (!ref_frame_hdr || ref_frame_hdr->frame_id != ref_frame_id) goto error;
  ------------------
  |  Branch (595:21): [True: 459, False: 600]
  |  Branch (595:39): [True: 392, False: 208]
  ------------------
  596|  1.05k|            }
  597|   683k|        }
  598|  97.5k|        const int use_ref = !hdr->error_resilient_mode &&
  ------------------
  |  Branch (598:29): [True: 94.9k, False: 2.62k]
  ------------------
  599|  94.9k|                            hdr->frame_size_override;
  ------------------
  |  Branch (599:29): [True: 64.3k, False: 30.5k]
  ------------------
  600|  97.5k|        if (read_frame_size(c, gb, use_ref) < 0) goto error;
  ------------------
  |  Branch (600:13): [True: 178, False: 97.3k]
  ------------------
  601|  97.3k|        if (!hdr->force_integer_mv)
  ------------------
  |  Branch (601:13): [True: 58.3k, False: 38.9k]
  ------------------
  602|  58.3k|            hdr->hp = dav1d_get_bit(gb);
  603|  97.3k|        hdr->subpel_filter_mode = dav1d_get_bit(gb) ? DAV1D_FILTER_SWITCHABLE :
  ------------------
  |  Branch (603:35): [True: 68.0k, False: 29.3k]
  ------------------
  604|  97.3k|                                                      dav1d_get_bits(gb, 2);
  605|  97.3k|        hdr->switchable_motion_mode = dav1d_get_bit(gb);
  606|  97.3k|        if (!hdr->error_resilient_mode && seqhdr->ref_frame_mvs &&
  ------------------
  |  Branch (606:13): [True: 94.7k, False: 2.62k]
  |  Branch (606:43): [True: 75.3k, False: 19.4k]
  ------------------
  607|  75.3k|            seqhdr->order_hint && IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|  75.3k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 75.3k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (607:13): [True: 75.3k, False: 0]
  ------------------
  608|  75.3k|        {
  609|  75.3k|            hdr->use_ref_frame_mvs = dav1d_get_bit(gb);
  610|  75.3k|        }
  611|  97.3k|    }
  612|       |#if DEBUG_FRAME_HDR
  613|       |    printf("HDR: post-frametype-specific-bits: off=%td\n",
  614|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  615|       |#endif
  616|       |
  617|   311k|    if (!seqhdr->reduced_still_picture_header && !hdr->disable_cdf_update)
  ------------------
  |  Branch (617:9): [True: 111k, False: 200k]
  |  Branch (617:50): [True: 103k, False: 8.52k]
  ------------------
  618|   103k|        hdr->refresh_context = !dav1d_get_bit(gb);
  619|       |#if DEBUG_FRAME_HDR
  620|       |    printf("HDR: post-refresh_context: off=%td\n",
  621|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  622|       |#endif
  623|       |
  624|       |    // tile data
  625|   311k|    hdr->tiling.uniform = dav1d_get_bit(gb);
  626|   311k|    const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
  627|   311k|    const int sbsz_log2 = 6 + seqhdr->sb128;
  628|   311k|    const int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;
  629|   311k|    const int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
  630|   311k|    const int max_tile_width_sb = 4096 >> sbsz_log2;
  631|   311k|    const int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
  632|   311k|    hdr->tiling.min_log2_cols = tile_log2(max_tile_width_sb, sbw);
  633|   311k|    hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, DAV1D_MAX_TILE_COLS));
  ------------------
  |  |   41|   311k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  634|   311k|    hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, DAV1D_MAX_TILE_ROWS));
  ------------------
  |  |   42|   311k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  635|   311k|    const int min_log2_tiles = imax(tile_log2(max_tile_area_sb, sbw * sbh),
  636|   311k|                              hdr->tiling.min_log2_cols);
  637|   311k|    if (hdr->tiling.uniform) {
  ------------------
  |  Branch (637:9): [True: 266k, False: 44.6k]
  ------------------
  638|   266k|        for (hdr->tiling.log2_cols = hdr->tiling.min_log2_cols;
  639|   273k|             hdr->tiling.log2_cols < hdr->tiling.max_log2_cols && dav1d_get_bit(gb);
  ------------------
  |  Branch (639:14): [True: 83.6k, False: 190k]
  |  Branch (639:67): [True: 6.89k, False: 76.7k]
  ------------------
  640|   266k|             hdr->tiling.log2_cols++) ;
  641|   266k|        const int tile_w = 1 + ((sbw - 1) >> hdr->tiling.log2_cols);
  642|   266k|        hdr->tiling.cols = 0;
  643|   556k|        for (int sbx = 0; sbx < sbw; sbx += tile_w, hdr->tiling.cols++)
  ------------------
  |  Branch (643:27): [True: 289k, False: 266k]
  ------------------
  644|   289k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  645|   266k|        hdr->tiling.min_log2_rows =
  646|   266k|            imax(min_log2_tiles - hdr->tiling.log2_cols, 0);
  647|       |
  648|   266k|        for (hdr->tiling.log2_rows = hdr->tiling.min_log2_rows;
  649|   289k|             hdr->tiling.log2_rows < hdr->tiling.max_log2_rows && dav1d_get_bit(gb);
  ------------------
  |  Branch (649:14): [True: 95.1k, False: 194k]
  |  Branch (649:67): [True: 22.2k, False: 72.8k]
  ------------------
  650|   266k|             hdr->tiling.log2_rows++) ;
  651|   266k|        const int tile_h = 1 + ((sbh - 1) >> hdr->tiling.log2_rows);
  652|   266k|        hdr->tiling.rows = 0;
  653|   617k|        for (int sby = 0; sby < sbh; sby += tile_h, hdr->tiling.rows++)
  ------------------
  |  Branch (653:27): [True: 350k, False: 266k]
  ------------------
  654|   350k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  655|   266k|    } else {
  656|  44.6k|        hdr->tiling.cols = 0;
  657|  44.6k|        int widest_tile = 0, max_tile_area_sb = sbw * sbh;
  658|   169k|        for (int sbx = 0; sbx < sbw && hdr->tiling.cols < DAV1D_MAX_TILE_COLS; hdr->tiling.cols++) {
  ------------------
  |  |   41|   125k|#define DAV1D_MAX_TILE_COLS 64
  ------------------
  |  Branch (658:27): [True: 125k, False: 44.3k]
  |  Branch (658:40): [True: 124k, False: 256]
  ------------------
  659|   124k|            const int tile_width_sb = imin(sbw - sbx, max_tile_width_sb);
  660|   124k|            const int tile_w = (tile_width_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_width_sb) : 1;
  ------------------
  |  Branch (660:32): [True: 90.1k, False: 34.8k]
  ------------------
  661|   124k|            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
  662|   124k|            sbx += tile_w;
  663|   124k|            widest_tile = imax(widest_tile, tile_w);
  664|   124k|        }
  665|  44.6k|        hdr->tiling.log2_cols = tile_log2(1, hdr->tiling.cols);
  666|  44.6k|        if (min_log2_tiles) max_tile_area_sb >>= min_log2_tiles + 1;
  ------------------
  |  Branch (666:13): [True: 894, False: 43.7k]
  ------------------
  667|  44.6k|        const int max_tile_height_sb = imax(max_tile_area_sb / widest_tile, 1);
  668|       |
  669|  44.6k|        hdr->tiling.rows = 0;
  670|   137k|        for (int sby = 0; sby < sbh && hdr->tiling.rows < DAV1D_MAX_TILE_ROWS; hdr->tiling.rows++) {
  ------------------
  |  |   42|  92.9k|#define DAV1D_MAX_TILE_ROWS 64
  ------------------
  |  Branch (670:27): [True: 92.9k, False: 44.5k]
  |  Branch (670:40): [True: 92.8k, False: 135]
  ------------------
  671|  92.8k|            const int tile_height_sb = imin(sbh - sby, max_tile_height_sb);
  672|  92.8k|            const int tile_h = (tile_height_sb > 1) ? 1 + dav1d_get_uniform(gb, tile_height_sb) : 1;
  ------------------
  |  Branch (672:32): [True: 52.4k, False: 40.3k]
  ------------------
  673|  92.8k|            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
  674|  92.8k|            sby += tile_h;
  675|  92.8k|        }
  676|  44.6k|        hdr->tiling.log2_rows = tile_log2(1, hdr->tiling.rows);
  677|  44.6k|    }
  678|   311k|    hdr->tiling.col_start_sb[hdr->tiling.cols] = sbw;
  679|   311k|    hdr->tiling.row_start_sb[hdr->tiling.rows] = sbh;
  680|   311k|    if (hdr->tiling.log2_cols || hdr->tiling.log2_rows) {
  ------------------
  |  Branch (680:9): [True: 12.9k, False: 298k]
  |  Branch (680:34): [True: 15.2k, False: 283k]
  ------------------
  681|  28.2k|        hdr->tiling.update = dav1d_get_bits(gb, hdr->tiling.log2_cols + hdr->tiling.log2_rows);
  682|  28.2k|        if (hdr->tiling.update >= hdr->tiling.cols * hdr->tiling.rows)
  ------------------
  |  Branch (682:13): [True: 563, False: 27.6k]
  ------------------
  683|    563|            goto error;
  684|  27.6k|        hdr->tiling.n_bytes = dav1d_get_bits(gb, 2) + 1;
  685|  27.6k|    }
  686|       |#if DEBUG_FRAME_HDR
  687|       |    printf("HDR: post-tiling: off=%td\n",
  688|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  689|       |#endif
  690|       |
  691|       |    // quant data
  692|   311k|    hdr->quant.yac = dav1d_get_bits(gb, 8);
  693|   311k|    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (693:9): [True: 31.6k, False: 279k]
  ------------------
  694|  31.6k|        hdr->quant.ydc_delta = dav1d_get_sbits(gb, 7);
  695|   311k|    if (!seqhdr->monochrome) {
  ------------------
  |  Branch (695:9): [True: 236k, False: 74.4k]
  ------------------
  696|       |        // If the sequence header says that delta_q might be different
  697|       |        // for U, V, we must check whether it actually is for this
  698|       |        // frame.
  699|   236k|        const int diff_uv_delta = seqhdr->separate_uv_delta_q ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (699:35): [True: 27.4k, False: 209k]
  ------------------
  700|   236k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (700:13): [True: 14.2k, False: 222k]
  ------------------
  701|  14.2k|            hdr->quant.udc_delta = dav1d_get_sbits(gb, 7);
  702|   236k|        if (dav1d_get_bit(gb))
  ------------------
  |  Branch (702:13): [True: 13.7k, False: 222k]
  ------------------
  703|  13.7k|            hdr->quant.uac_delta = dav1d_get_sbits(gb, 7);
  704|   236k|        if (diff_uv_delta) {
  ------------------
  |  Branch (704:13): [True: 7.10k, False: 229k]
  ------------------
  705|  7.10k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (705:17): [True: 1.56k, False: 5.53k]
  ------------------
  706|  1.56k|                hdr->quant.vdc_delta = dav1d_get_sbits(gb, 7);
  707|  7.10k|            if (dav1d_get_bit(gb))
  ------------------
  |  Branch (707:17): [True: 2.08k, False: 5.01k]
  ------------------
  708|  2.08k|                hdr->quant.vac_delta = dav1d_get_sbits(gb, 7);
  709|   229k|        } else {
  710|   229k|            hdr->quant.vdc_delta = hdr->quant.udc_delta;
  711|   229k|            hdr->quant.vac_delta = hdr->quant.uac_delta;
  712|   229k|        }
  713|   236k|    }
  714|       |#if DEBUG_FRAME_HDR
  715|       |    printf("HDR: post-quant: off=%td\n",
  716|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  717|       |#endif
  718|   311k|    hdr->quant.qm = dav1d_get_bit(gb);
  719|   311k|    if (hdr->quant.qm) {
  ------------------
  |  Branch (719:9): [True: 28.0k, False: 283k]
  ------------------
  720|  28.0k|        hdr->quant.qm_y = dav1d_get_bits(gb, 4);
  721|  28.0k|        hdr->quant.qm_u = dav1d_get_bits(gb, 4);
  722|  28.0k|        hdr->quant.qm_v = seqhdr->separate_uv_delta_q ? dav1d_get_bits(gb, 4) :
  ------------------
  |  Branch (722:27): [True: 13.9k, False: 14.0k]
  ------------------
  723|  28.0k|                                                        hdr->quant.qm_u;
  724|  28.0k|    }
  725|       |#if DEBUG_FRAME_HDR
  726|       |    printf("HDR: post-qm: off=%td\n",
  727|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  728|       |#endif
  729|       |
  730|       |    // segmentation data
  731|   311k|    hdr->segmentation.enabled = dav1d_get_bit(gb);
  732|   311k|    if (hdr->segmentation.enabled) {
  ------------------
  |  Branch (732:9): [True: 21.1k, False: 289k]
  ------------------
  733|  21.1k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  21.1k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (733:13): [True: 5.69k, False: 15.4k]
  ------------------
  734|  5.69k|            hdr->segmentation.update_map = 1;
  735|  5.69k|            hdr->segmentation.update_data = 1;
  736|  15.4k|        } else {
  737|  15.4k|            hdr->segmentation.update_map = dav1d_get_bit(gb);
  738|  15.4k|            if (hdr->segmentation.update_map)
  ------------------
  |  Branch (738:17): [True: 7.52k, False: 7.87k]
  ------------------
  739|  7.52k|                hdr->segmentation.temporal = dav1d_get_bit(gb);
  740|  15.4k|            hdr->segmentation.update_data = dav1d_get_bit(gb);
  741|  15.4k|        }
  742|       |
  743|  21.1k|        if (hdr->segmentation.update_data) {
  ------------------
  |  Branch (743:13): [True: 7.42k, False: 13.6k]
  ------------------
  744|  7.42k|            hdr->segmentation.seg_data.last_active_segid = -1;
  745|  66.8k|            for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|  66.8k|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (745:29): [True: 59.4k, False: 7.42k]
  ------------------
  746|  59.4k|                Dav1dSegmentationData *const seg =
  747|  59.4k|                    &hdr->segmentation.seg_data.d[i];
  748|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (748:21): [True: 12.0k, False: 47.3k]
  ------------------
  749|  12.0k|                    seg->delta_q = dav1d_get_sbits(gb, 9);
  750|  12.0k|                    hdr->segmentation.seg_data.last_active_segid = i;
  751|  12.0k|                }
  752|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (752:21): [True: 9.95k, False: 49.4k]
  ------------------
  753|  9.95k|                    seg->delta_lf_y_v = dav1d_get_sbits(gb, 7);
  754|  9.95k|                    hdr->segmentation.seg_data.last_active_segid = i;
  755|  9.95k|                }
  756|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (756:21): [True: 12.4k, False: 46.9k]
  ------------------
  757|  12.4k|                    seg->delta_lf_y_h = dav1d_get_sbits(gb, 7);
  758|  12.4k|                    hdr->segmentation.seg_data.last_active_segid = i;
  759|  12.4k|                }
  760|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (760:21): [True: 11.2k, False: 48.1k]
  ------------------
  761|  11.2k|                    seg->delta_lf_u = dav1d_get_sbits(gb, 7);
  762|  11.2k|                    hdr->segmentation.seg_data.last_active_segid = i;
  763|  11.2k|                }
  764|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (764:21): [True: 9.34k, False: 50.0k]
  ------------------
  765|  9.34k|                    seg->delta_lf_v = dav1d_get_sbits(gb, 7);
  766|  9.34k|                    hdr->segmentation.seg_data.last_active_segid = i;
  767|  9.34k|                }
  768|  59.4k|                if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (768:21): [True: 10.3k, False: 49.0k]
  ------------------
  769|  10.3k|                    seg->ref = dav1d_get_bits(gb, 3);
  770|  10.3k|                    hdr->segmentation.seg_data.last_active_segid = i;
  771|  10.3k|                    hdr->segmentation.seg_data.preskip = 1;
  772|  49.0k|                } else {
  773|  49.0k|                    seg->ref = -1;
  774|  49.0k|                }
  775|  59.4k|                if ((seg->skip = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (775:21): [True: 11.1k, False: 48.2k]
  ------------------
  776|  11.1k|                    hdr->segmentation.seg_data.last_active_segid = i;
  777|  11.1k|                    hdr->segmentation.seg_data.preskip = 1;
  778|  11.1k|                }
  779|  59.4k|                if ((seg->globalmv = dav1d_get_bit(gb))) {
  ------------------
  |  Branch (779:21): [True: 10.4k, False: 48.9k]
  ------------------
  780|  10.4k|                    hdr->segmentation.seg_data.last_active_segid = i;
  781|  10.4k|                    hdr->segmentation.seg_data.preskip = 1;
  782|  10.4k|                }
  783|  59.4k|            }
  784|  13.6k|        } else {
  785|       |            // segmentation.update_data was false so we should copy
  786|       |            // segmentation data from the reference frame.
  787|  13.6k|            assert(hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
  ------------------
  |  Branch (787:13): [True: 13.6k, False: 0]
  ------------------
  788|  13.6k|            const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
  789|  13.6k|            if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (789:17): [True: 215, False: 13.4k]
  ------------------
  790|  13.4k|            hdr->segmentation.seg_data =
  791|  13.4k|                c->refs[pri_ref].p.p.frame_hdr->segmentation.seg_data;
  792|  13.4k|        }
  793|   289k|    } else {
  794|  2.60M|        for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++)
  ------------------
  |  |   43|  2.60M|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (794:25): [True: 2.31M, False: 289k]
  ------------------
  795|  2.31M|            hdr->segmentation.seg_data.d[i].ref = -1;
  796|   289k|    }
  797|       |#if DEBUG_FRAME_HDR
  798|       |    printf("HDR: post-segmentation: off=%td\n",
  799|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  800|       |#endif
  801|       |
  802|       |    // delta q
  803|   310k|    if (hdr->quant.yac) {
  ------------------
  |  Branch (803:9): [True: 286k, False: 24.7k]
  ------------------
  804|   286k|        hdr->delta.q.present = dav1d_get_bit(gb);
  805|   286k|        if (hdr->delta.q.present) {
  ------------------
  |  Branch (805:13): [True: 63.1k, False: 222k]
  ------------------
  806|  63.1k|            hdr->delta.q.res_log2 = dav1d_get_bits(gb, 2);
  807|  63.1k|            if (!hdr->allow_intrabc) {
  ------------------
  |  Branch (807:17): [True: 15.3k, False: 47.7k]
  ------------------
  808|  15.3k|                hdr->delta.lf.present = dav1d_get_bit(gb);
  809|  15.3k|                if (hdr->delta.lf.present) {
  ------------------
  |  Branch (809:21): [True: 8.49k, False: 6.90k]
  ------------------
  810|  8.49k|                    hdr->delta.lf.res_log2 = dav1d_get_bits(gb, 2);
  811|  8.49k|                    hdr->delta.lf.multi = dav1d_get_bit(gb);
  812|  8.49k|                }
  813|  15.3k|            }
  814|  63.1k|        }
  815|   286k|    }
  816|       |#if DEBUG_FRAME_HDR
  817|       |    printf("HDR: post-delta_q_lf_flags: off=%td\n",
  818|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  819|       |#endif
  820|       |
  821|       |    // derive lossless flags
  822|   310k|    const int delta_lossless = !hdr->quant.ydc_delta && !hdr->quant.udc_delta &&
  ------------------
  |  Branch (822:32): [True: 280k, False: 29.9k]
  |  Branch (822:57): [True: 274k, False: 6.08k]
  ------------------
  823|   274k|        !hdr->quant.uac_delta && !hdr->quant.vdc_delta && !hdr->quant.vac_delta;
  ------------------
  |  Branch (823:9): [True: 270k, False: 4.38k]
  |  Branch (823:34): [True: 269k, False: 882]
  |  Branch (823:59): [True: 269k, False: 42]
  ------------------
  824|   310k|    hdr->all_lossless = 1;
  825|  2.79M|    for (int i = 0; i < DAV1D_MAX_SEGMENTS; i++) {
  ------------------
  |  |   43|  2.79M|#define DAV1D_MAX_SEGMENTS 8
  ------------------
  |  Branch (825:21): [True: 2.48M, False: 310k]
  ------------------
  826|  2.48M|        hdr->segmentation.qidx[i] = hdr->segmentation.enabled ?
  ------------------
  |  Branch (826:37): [True: 167k, False: 2.31M]
  ------------------
  827|   167k|            iclip_u8(hdr->quant.yac + hdr->segmentation.seg_data.d[i].delta_q) :
  828|  2.48M|            hdr->quant.yac;
  829|  2.48M|        hdr->segmentation.lossless[i] =
  830|  2.48M|            !hdr->segmentation.qidx[i] && delta_lossless;
  ------------------
  |  Branch (830:13): [True: 202k, False: 2.28M]
  |  Branch (830:43): [True: 174k, False: 27.3k]
  ------------------
  831|  2.48M|        hdr->all_lossless &= hdr->segmentation.lossless[i];
  832|  2.48M|    }
  833|       |
  834|       |    // loopfilter
  835|   310k|    if (hdr->all_lossless || hdr->allow_intrabc) {
  ------------------
  |  Branch (835:9): [True: 21.5k, False: 289k]
  |  Branch (835:30): [True: 177k, False: 111k]
  ------------------
  836|   198k|        hdr->loopfilter.mode_ref_delta_enabled = 1;
  837|   198k|        hdr->loopfilter.mode_ref_delta_update = 1;
  838|   198k|        hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  839|   198k|    } else {
  840|   111k|        hdr->loopfilter.level_y[0] = dav1d_get_bits(gb, 6);
  841|   111k|        hdr->loopfilter.level_y[1] = dav1d_get_bits(gb, 6);
  842|   111k|        if (!seqhdr->monochrome &&
  ------------------
  |  Branch (842:13): [True: 47.1k, False: 64.6k]
  ------------------
  843|  47.1k|            (hdr->loopfilter.level_y[0] || hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (843:14): [True: 21.8k, False: 25.3k]
  |  Branch (843:44): [True: 11.1k, False: 14.2k]
  ------------------
  844|  32.9k|        {
  845|  32.9k|            hdr->loopfilter.level_u = dav1d_get_bits(gb, 6);
  846|  32.9k|            hdr->loopfilter.level_v = dav1d_get_bits(gb, 6);
  847|  32.9k|        }
  848|   111k|        hdr->loopfilter.sharpness = dav1d_get_bits(gb, 3);
  849|       |
  850|   111k|        if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|   111k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (850:13): [True: 49.0k, False: 62.8k]
  ------------------
  851|  49.0k|            hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
  852|  62.8k|        } else {
  853|  62.8k|            const int ref = hdr->refidx[hdr->primary_ref_frame];
  854|  62.8k|            if (!c->refs[ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (854:17): [True: 665, False: 62.1k]
  ------------------
  855|  62.1k|            hdr->loopfilter.mode_ref_deltas =
  856|  62.1k|                c->refs[ref].p.p.frame_hdr->loopfilter.mode_ref_deltas;
  857|  62.1k|        }
  858|   111k|        hdr->loopfilter.mode_ref_delta_enabled = dav1d_get_bit(gb);
  859|   111k|        if (hdr->loopfilter.mode_ref_delta_enabled) {
  ------------------
  |  Branch (859:13): [True: 60.7k, False: 50.4k]
  ------------------
  860|  60.7k|            hdr->loopfilter.mode_ref_delta_update = dav1d_get_bit(gb);
  861|  60.7k|            if (hdr->loopfilter.mode_ref_delta_update) {
  ------------------
  |  Branch (861:17): [True: 7.88k, False: 52.8k]
  ------------------
  862|  70.9k|                for (int i = 0; i < 8; i++)
  ------------------
  |  Branch (862:33): [True: 63.0k, False: 7.88k]
  ------------------
  863|  63.0k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (863:25): [True: 19.7k, False: 43.2k]
  ------------------
  864|  19.7k|                        hdr->loopfilter.mode_ref_deltas.ref_delta[i] =
  865|  19.7k|                            dav1d_get_sbits(gb, 7);
  866|  23.6k|                for (int i = 0; i < 2; i++)
  ------------------
  |  Branch (866:33): [True: 15.7k, False: 7.88k]
  ------------------
  867|  15.7k|                    if (dav1d_get_bit(gb))
  ------------------
  |  Branch (867:25): [True: 3.33k, False: 12.4k]
  ------------------
  868|  3.33k|                        hdr->loopfilter.mode_ref_deltas.mode_delta[i] =
  869|  3.33k|                            dav1d_get_sbits(gb, 7);
  870|  7.88k|            }
  871|  60.7k|        }
  872|   111k|    }
  873|       |#if DEBUG_FRAME_HDR
  874|       |    printf("HDR: post-lpf: off=%td\n",
  875|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  876|       |#endif
  877|       |
  878|       |    // cdef
  879|   310k|    if (!hdr->all_lossless && seqhdr->cdef && !hdr->allow_intrabc) {
  ------------------
  |  Branch (879:9): [True: 288k, False: 21.5k]
  |  Branch (879:31): [True: 199k, False: 89.5k]
  |  Branch (879:47): [True: 63.6k, False: 135k]
  ------------------
  880|  63.6k|        hdr->cdef.damping = dav1d_get_bits(gb, 2) + 3;
  881|  63.6k|        hdr->cdef.n_bits = dav1d_get_bits(gb, 2);
  882|   157k|        for (int i = 0; i < (1 << hdr->cdef.n_bits); i++) {
  ------------------
  |  Branch (882:25): [True: 93.6k, False: 63.6k]
  ------------------
  883|  93.6k|            hdr->cdef.y_strength[i] = dav1d_get_bits(gb, 6);
  884|  93.6k|            if (!seqhdr->monochrome)
  ------------------
  |  Branch (884:17): [True: 36.9k, False: 56.7k]
  ------------------
  885|  36.9k|                hdr->cdef.uv_strength[i] = dav1d_get_bits(gb, 6);
  886|  93.6k|        }
  887|  63.6k|    }
  888|       |#if DEBUG_FRAME_HDR
  889|       |    printf("HDR: post-cdef: off=%td\n",
  890|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  891|       |#endif
  892|       |
  893|       |    // restoration
  894|   310k|    if ((!hdr->all_lossless || hdr->super_res.enabled) &&
  ------------------
  |  Branch (894:10): [True: 288k, False: 21.5k]
  |  Branch (894:32): [True: 9.90k, False: 11.6k]
  ------------------
  895|   298k|        seqhdr->restoration && !hdr->allow_intrabc)
  ------------------
  |  Branch (895:9): [True: 114k, False: 183k]
  |  Branch (895:32): [True: 92.7k, False: 22.1k]
  ------------------
  896|  92.7k|    {
  897|  92.7k|        hdr->restoration.type[0] = dav1d_get_bits(gb, 2);
  898|  92.7k|        if (!seqhdr->monochrome) {
  ------------------
  |  Branch (898:13): [True: 37.5k, False: 55.1k]
  ------------------
  899|  37.5k|            hdr->restoration.type[1] = dav1d_get_bits(gb, 2);
  900|  37.5k|            hdr->restoration.type[2] = dav1d_get_bits(gb, 2);
  901|  37.5k|        }
  902|       |
  903|  92.7k|        if (hdr->restoration.type[0] || hdr->restoration.type[1] ||
  ------------------
  |  Branch (903:13): [True: 56.3k, False: 36.4k]
  |  Branch (903:41): [True: 3.45k, False: 32.9k]
  ------------------
  904|  32.9k|            hdr->restoration.type[2])
  ------------------
  |  Branch (904:13): [True: 917, False: 32.0k]
  ------------------
  905|  60.6k|        {
  906|       |            // Log2 of the restoration unit size.
  907|  60.6k|            hdr->restoration.unit_size[0] = 6 + seqhdr->sb128;
  908|  60.6k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (908:17): [True: 11.6k, False: 49.0k]
  ------------------
  909|  11.6k|                hdr->restoration.unit_size[0]++;
  910|  11.6k|                if (!seqhdr->sb128)
  ------------------
  |  Branch (910:21): [True: 2.76k, False: 8.87k]
  ------------------
  911|  2.76k|                    hdr->restoration.unit_size[0] += dav1d_get_bit(gb);
  912|  11.6k|            }
  913|  60.6k|            hdr->restoration.unit_size[1] = hdr->restoration.unit_size[0];
  914|  60.6k|            if ((hdr->restoration.type[1] || hdr->restoration.type[2]) &&
  ------------------
  |  Branch (914:18): [True: 11.1k, False: 49.5k]
  |  Branch (914:46): [True: 3.83k, False: 45.6k]
  ------------------
  915|  15.0k|                seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1)
  ------------------
  |  Branch (915:17): [True: 9.56k, False: 5.45k]
  |  Branch (915:40): [True: 8.93k, False: 637]
  ------------------
  916|  8.93k|            {
  917|  8.93k|                hdr->restoration.unit_size[1] -= dav1d_get_bit(gb);
  918|  8.93k|            }
  919|  60.6k|        } else {
  920|  32.0k|            hdr->restoration.unit_size[0] = 8;
  921|  32.0k|        }
  922|  92.7k|    }
  923|       |#if DEBUG_FRAME_HDR
  924|       |    printf("HDR: post-restoration: off=%td\n",
  925|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  926|       |#endif
  927|       |
  928|   310k|    if (!hdr->all_lossless)
  ------------------
  |  Branch (928:9): [True: 288k, False: 21.5k]
  ------------------
  929|   288k|        hdr->txfm_mode = dav1d_get_bit(gb) ? DAV1D_TX_SWITCHABLE : DAV1D_TX_LARGEST;
  ------------------
  |  Branch (929:26): [True: 156k, False: 131k]
  ------------------
  930|       |#if DEBUG_FRAME_HDR
  931|       |    printf("HDR: post-txfmmode: off=%td\n",
  932|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  933|       |#endif
  934|   310k|    if (IS_INTER_OR_SWITCH(hdr))
  ------------------
  |  |   36|   310k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 96.1k, False: 214k]
  |  |  ------------------
  ------------------
  935|  96.1k|        hdr->switchable_comp_refs = dav1d_get_bit(gb);
  936|       |#if DEBUG_FRAME_HDR
  937|       |    printf("HDR: post-refmode: off=%td\n",
  938|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  939|       |#endif
  940|   310k|    if (hdr->switchable_comp_refs && IS_INTER_OR_SWITCH(hdr) && seqhdr->order_hint) {
  ------------------
  |  |   36|   363k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 53.7k, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (940:9): [True: 53.7k, False: 256k]
  |  Branch (940:65): [True: 52.1k, False: 1.66k]
  ------------------
  941|  52.1k|        const int poc = hdr->frame_offset;
  942|  52.1k|        int off_before = -1, off_after = -1;
  943|  52.1k|        int off_before_idx, off_after_idx;
  944|   415k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (944:25): [True: 363k, False: 51.8k]
  ------------------
  945|   363k|            if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (945:17): [True: 209, False: 363k]
  ------------------
  946|   363k|            const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  947|       |
  948|   363k|            const int diff = get_poc_diff(seqhdr->order_hint_n_bits, refpoc, poc);
  949|   363k|            if (diff > 0) {
  ------------------
  |  Branch (949:17): [True: 145k, False: 217k]
  ------------------
  950|   145k|                if (off_after < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (950:21): [True: 42.3k, False: 103k]
  |  Branch (950:38): [True: 8.78k, False: 94.8k]
  ------------------
  951|   103k|                                                  off_after, refpoc) > 0)
  952|  51.1k|                {
  953|  51.1k|                    off_after = refpoc;
  954|  51.1k|                    off_after_idx = i;
  955|  51.1k|                }
  956|   217k|            } else if (diff < 0 && (off_before < 0 ||
  ------------------
  |  Branch (956:24): [True: 166k, False: 50.6k]
  |  Branch (956:37): [True: 47.7k, False: 118k]
  ------------------
  957|   118k|                                    get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (957:37): [True: 6.83k, False: 112k]
  ------------------
  958|   118k|                                                 refpoc, off_before) > 0))
  959|  54.5k|            {
  960|  54.5k|                off_before = refpoc;
  961|  54.5k|                off_before_idx = i;
  962|  54.5k|            }
  963|   363k|        }
  964|       |
  965|  51.8k|        if ((off_before | off_after) >= 0) {
  ------------------
  |  Branch (965:13): [True: 39.3k, False: 12.5k]
  ------------------
  966|  39.3k|            hdr->skip_mode_refs[0] = imin(off_before_idx, off_after_idx);
  967|  39.3k|            hdr->skip_mode_refs[1] = imax(off_before_idx, off_after_idx);
  968|  39.3k|            hdr->skip_mode_allowed = 1;
  969|  39.3k|        } else if (off_before >= 0) {
  ------------------
  |  Branch (969:20): [True: 8.42k, False: 4.17k]
  ------------------
  970|  8.42k|            int off_before2 = -1;
  971|  8.42k|            int off_before2_idx;
  972|  67.3k|            for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (972:29): [True: 58.9k, False: 8.42k]
  ------------------
  973|  58.9k|                if (!c->refs[hdr->refidx[i]].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (973:21): [True: 0, False: 58.9k]
  ------------------
  974|  58.9k|                const int refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
  975|  58.9k|                if (get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (975:21): [True: 17.2k, False: 41.7k]
  ------------------
  976|  58.9k|                                 refpoc, off_before) < 0) {
  977|  17.2k|                    if (off_before2 < 0 || get_poc_diff(seqhdr->order_hint_n_bits,
  ------------------
  |  Branch (977:25): [True: 4.74k, False: 12.4k]
  |  Branch (977:44): [True: 490, False: 11.9k]
  ------------------
  978|  12.4k|                                                        refpoc, off_before2) > 0)
  979|  5.23k|                    {
  980|  5.23k|                        off_before2 = refpoc;
  981|  5.23k|                        off_before2_idx = i;
  982|  5.23k|                    }
  983|  17.2k|                }
  984|  58.9k|            }
  985|       |
  986|  8.42k|            if (off_before2 >= 0) {
  ------------------
  |  Branch (986:17): [True: 4.74k, False: 3.67k]
  ------------------
  987|  4.74k|                hdr->skip_mode_refs[0] = imin(off_before_idx, off_before2_idx);
  988|  4.74k|                hdr->skip_mode_refs[1] = imax(off_before_idx, off_before2_idx);
  989|  4.74k|                hdr->skip_mode_allowed = 1;
  990|  4.74k|            }
  991|  8.42k|        }
  992|  51.8k|    }
  993|   309k|    if (hdr->skip_mode_allowed)
  ------------------
  |  Branch (993:9): [True: 44.0k, False: 265k]
  ------------------
  994|  44.0k|        hdr->skip_mode_enabled = dav1d_get_bit(gb);
  995|       |#if DEBUG_FRAME_HDR
  996|       |    printf("HDR: post-extskip: off=%td\n",
  997|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
  998|       |#endif
  999|   309k|    if (!hdr->error_resilient_mode && IS_INTER_OR_SWITCH(hdr) && seqhdr->warped_motion)
  ------------------
  |  |   36|   405k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 93.3k, False: 2.24k]
  |  |  ------------------
  ------------------
  |  Branch (999:9): [True: 95.6k, False: 214k]
  |  Branch (999:66): [True: 73.5k, False: 19.8k]
  ------------------
 1000|  73.5k|        hdr->warp_motion = dav1d_get_bit(gb);
 1001|       |#if DEBUG_FRAME_HDR
 1002|       |    printf("HDR: post-warpmotionbit: off=%td\n",
 1003|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1004|       |#endif
 1005|   309k|    hdr->reduced_txtp_set = dav1d_get_bit(gb);
 1006|       |#if DEBUG_FRAME_HDR
 1007|       |    printf("HDR: post-reducedtxtpset: off=%td\n",
 1008|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1009|       |#endif
 1010|       |
 1011|  2.47M|    for (int i = 0; i < 7; i++)
  ------------------
  |  Branch (1011:21): [True: 2.16M, False: 309k]
  ------------------
 1012|  2.16M|        hdr->gmv[i] = dav1d_default_wm_params;
 1013|       |
 1014|   309k|    if (IS_INTER_OR_SWITCH(hdr)) {
  ------------------
  |  |   36|   309k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (36:5): [True: 95.9k, False: 214k]
  |  |  ------------------
  ------------------
 1015|   766k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (1015:25): [True: 670k, False: 95.6k]
  ------------------
 1016|   670k|            hdr->gmv[i].type = !dav1d_get_bit(gb) ? DAV1D_WM_TYPE_IDENTITY :
  ------------------
  |  Branch (1016:32): [True: 641k, False: 29.4k]
  ------------------
 1017|   670k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_ROT_ZOOM :
  ------------------
  |  Branch (1017:33): [True: 19.3k, False: 10.0k]
  ------------------
 1018|  29.4k|                                dav1d_get_bit(gb) ? DAV1D_WM_TYPE_TRANSLATION :
  ------------------
  |  Branch (1018:33): [True: 3.03k, False: 7.03k]
  ------------------
 1019|  10.0k|                                                    DAV1D_WM_TYPE_AFFINE;
 1020|       |
 1021|   670k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_IDENTITY) continue;
  ------------------
  |  Branch (1021:17): [True: 641k, False: 29.4k]
  ------------------
 1022|       |
 1023|  29.4k|            const Dav1dWarpedMotionParams *ref_gmv;
 1024|  29.4k|            if (hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
  ------------------
  |  |   45|  29.4k|#define DAV1D_PRIMARY_REF_NONE 7
  ------------------
  |  Branch (1024:17): [True: 2.47k, False: 26.9k]
  ------------------
 1025|  2.47k|                ref_gmv = &dav1d_default_wm_params;
 1026|  26.9k|            } else {
 1027|  26.9k|                const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
 1028|  26.9k|                if (!c->refs[pri_ref].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1028:21): [True: 307, False: 26.6k]
  ------------------
 1029|  26.6k|                ref_gmv = &c->refs[pri_ref].p.p.frame_hdr->gmv[i];
 1030|  26.6k|            }
 1031|  29.1k|            int32_t *const mat = hdr->gmv[i].matrix;
 1032|  29.1k|            const int32_t *const ref_mat = ref_gmv->matrix;
 1033|  29.1k|            int bits, shift;
 1034|       |
 1035|  29.1k|            if (hdr->gmv[i].type >= DAV1D_WM_TYPE_ROT_ZOOM) {
  ------------------
  |  Branch (1035:17): [True: 26.2k, False: 2.95k]
  ------------------
 1036|  26.2k|                mat[2] = (1 << 16) + 2 *
 1037|  26.2k|                    dav1d_get_bits_subexp(gb, (ref_mat[2] - (1 << 16)) >> 1, 12);
 1038|  26.2k|                mat[3] = 2 * dav1d_get_bits_subexp(gb, ref_mat[3] >> 1, 12);
 1039|       |
 1040|  26.2k|                bits = 12;
 1041|  26.2k|                shift = 10;
 1042|  26.2k|            } else {
 1043|  2.95k|                bits = 9 - !hdr->hp;
 1044|  2.95k|                shift = 13 + !hdr->hp;
 1045|  2.95k|            }
 1046|       |
 1047|  29.1k|            if (hdr->gmv[i].type == DAV1D_WM_TYPE_AFFINE) {
  ------------------
  |  Branch (1047:17): [True: 6.85k, False: 22.2k]
  ------------------
 1048|  6.85k|                mat[4] = 2 * dav1d_get_bits_subexp(gb, ref_mat[4] >> 1, 12);
 1049|  6.85k|                mat[5] = (1 << 16) + 2 *
 1050|  6.85k|                    dav1d_get_bits_subexp(gb, (ref_mat[5] - (1 << 16)) >> 1, 12);
 1051|  22.2k|            } else {
 1052|  22.2k|                mat[4] = -mat[3];
 1053|  22.2k|                mat[5] = mat[2];
 1054|  22.2k|            }
 1055|       |
 1056|  29.1k|            mat[0] = dav1d_get_bits_subexp(gb, ref_mat[0] >> shift, bits) * (1 << shift);
 1057|  29.1k|            mat[1] = dav1d_get_bits_subexp(gb, ref_mat[1] >> shift, bits) * (1 << shift);
 1058|  29.1k|        }
 1059|  95.9k|    }
 1060|       |#if DEBUG_FRAME_HDR
 1061|       |    printf("HDR: post-gmv: off=%td\n",
 1062|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1063|       |#endif
 1064|       |
 1065|   309k|    if (seqhdr->film_grain_present && (hdr->show_frame || hdr->showable_frame)) {
  ------------------
  |  Branch (1065:9): [True: 215k, False: 94.2k]
  |  Branch (1065:40): [True: 171k, False: 43.4k]
  |  Branch (1065:59): [True: 1.46k, False: 42.0k]
  ------------------
 1066|   173k|        hdr->film_grain.present = dav1d_get_bit(gb);
 1067|   173k|        if (hdr->film_grain.present) {
  ------------------
  |  Branch (1067:13): [True: 14.6k, False: 158k]
  ------------------
 1068|  14.6k|            const unsigned seed = dav1d_get_bits(gb, 16);
 1069|  14.6k|            hdr->film_grain.update = hdr->frame_type != DAV1D_FRAME_TYPE_INTER || dav1d_get_bit(gb);
  ------------------
  |  Branch (1069:38): [True: 7.23k, False: 7.39k]
  |  Branch (1069:83): [True: 781, False: 6.61k]
  ------------------
 1070|  14.6k|            if (!hdr->film_grain.update) {
  ------------------
  |  Branch (1070:17): [True: 6.61k, False: 8.01k]
  ------------------
 1071|  6.61k|                const int refidx = dav1d_get_bits(gb, 3);
 1072|  6.61k|                int i;
 1073|  16.8k|                for (i = 0; i < 7; i++)
  ------------------
  |  Branch (1073:29): [True: 16.6k, False: 254]
  ------------------
 1074|  16.6k|                    if (hdr->refidx[i] == refidx)
  ------------------
  |  Branch (1074:25): [True: 6.35k, False: 10.2k]
  ------------------
 1075|  6.35k|                        break;
 1076|  6.61k|                if (i == 7 || !c->refs[refidx].p.p.frame_hdr) goto error;
  ------------------
  |  Branch (1076:21): [True: 254, False: 6.35k]
  |  Branch (1076:31): [True: 194, False: 6.16k]
  ------------------
 1077|  6.16k|                hdr->film_grain.data = c->refs[refidx].p.p.frame_hdr->film_grain.data;
 1078|  6.16k|                hdr->film_grain.data.seed = seed;
 1079|  8.01k|            } else {
 1080|  8.01k|                Dav1dFilmGrainData *const fgd = &hdr->film_grain.data;
 1081|  8.01k|                fgd->seed = seed;
 1082|       |
 1083|  8.01k|                fgd->num_y_points = dav1d_get_bits(gb, 4);
 1084|  8.01k|                if (fgd->num_y_points > 14) goto error;
  ------------------
  |  Branch (1084:21): [True: 271, False: 7.74k]
  ------------------
 1085|  11.9k|                for (int i = 0; i < fgd->num_y_points; i++) {
  ------------------
  |  Branch (1085:33): [True: 4.91k, False: 7.07k]
  ------------------
 1086|  4.91k|                    fgd->y_points[i][0] = dav1d_get_bits(gb, 8);
 1087|  4.91k|                    if (i && fgd->y_points[i - 1][0] >= fgd->y_points[i][0])
  ------------------
  |  Branch (1087:25): [True: 2.28k, False: 2.62k]
  |  Branch (1087:30): [True: 672, False: 1.61k]
  ------------------
 1088|    672|                        goto error;
 1089|  4.23k|                    fgd->y_points[i][1] = dav1d_get_bits(gb, 8);
 1090|  4.23k|                }
 1091|       |
 1092|  7.07k|                if (!seqhdr->monochrome)
  ------------------
  |  Branch (1092:21): [True: 6.07k, False: 997]
  ------------------
 1093|  6.07k|                    fgd->chroma_scaling_from_luma = dav1d_get_bit(gb);
 1094|  7.07k|                if (seqhdr->monochrome || fgd->chroma_scaling_from_luma ||
  ------------------
  |  Branch (1094:21): [True: 997, False: 6.07k]
  |  Branch (1094:43): [True: 2.18k, False: 3.89k]
  ------------------
 1095|  3.89k|                    (seqhdr->ss_ver == 1 && seqhdr->ss_hor == 1 && !fgd->num_y_points))
  ------------------
  |  Branch (1095:22): [True: 1.01k, False: 2.88k]
  |  Branch (1095:45): [True: 1.01k, False: 0]
  |  Branch (1095:68): [True: 262, False: 750]
  ------------------
 1096|  3.43k|                {
 1097|  3.43k|                    fgd->num_uv_points[0] = fgd->num_uv_points[1] = 0;
 1098|  9.59k|                } else for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1098:41): [True: 6.83k, False: 2.76k]
  ------------------
 1099|  6.83k|                    fgd->num_uv_points[pl] = dav1d_get_bits(gb, 4);
 1100|  6.83k|                    if (fgd->num_uv_points[pl] > 10) goto error;
  ------------------
  |  Branch (1100:25): [True: 470, False: 6.36k]
  ------------------
 1101|  11.0k|                    for (int i = 0; i < fgd->num_uv_points[pl]; i++) {
  ------------------
  |  Branch (1101:37): [True: 5.07k, False: 5.95k]
  ------------------
 1102|  5.07k|                        fgd->uv_points[pl][i][0] = dav1d_get_bits(gb, 8);
 1103|  5.07k|                        if (i && fgd->uv_points[pl][i - 1][0] >= fgd->uv_points[pl][i][0])
  ------------------
  |  Branch (1103:29): [True: 3.18k, False: 1.89k]
  |  Branch (1103:34): [True: 406, False: 2.78k]
  ------------------
 1104|    406|                            goto error;
 1105|  4.67k|                        fgd->uv_points[pl][i][1] = dav1d_get_bits(gb, 8);
 1106|  4.67k|                    }
 1107|  6.36k|                }
 1108|       |
 1109|  6.19k|                if (seqhdr->ss_hor == 1 && seqhdr->ss_ver == 1 &&
  ------------------
  |  Branch (1109:21): [True: 4.37k, False: 1.82k]
  |  Branch (1109:44): [True: 3.66k, False: 707]
  ------------------
 1110|  3.66k|                    !!fgd->num_uv_points[0] != !!fgd->num_uv_points[1])
  ------------------
  |  Branch (1110:21): [True: 94, False: 3.57k]
  ------------------
 1111|     94|                {
 1112|     94|                    goto error;
 1113|     94|                }
 1114|       |
 1115|  6.10k|                fgd->scaling_shift = dav1d_get_bits(gb, 2) + 8;
 1116|  6.10k|                fgd->ar_coeff_lag = dav1d_get_bits(gb, 2);
 1117|  6.10k|                const int num_y_pos = 2 * fgd->ar_coeff_lag * (fgd->ar_coeff_lag + 1);
 1118|  6.10k|                if (fgd->num_y_points)
  ------------------
  |  Branch (1118:21): [True: 1.75k, False: 4.34k]
  ------------------
 1119|  22.4k|                    for (int i = 0; i < num_y_pos; i++)
  ------------------
  |  Branch (1119:37): [True: 20.6k, False: 1.75k]
  ------------------
 1120|  20.6k|                        fgd->ar_coeffs_y[i] = dav1d_get_bits(gb, 8) - 128;
 1121|  18.3k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1121:34): [True: 12.2k, False: 6.10k]
  ------------------
 1122|  12.2k|                    if (fgd->num_uv_points[pl] || fgd->chroma_scaling_from_luma) {
  ------------------
  |  Branch (1122:25): [True: 1.38k, False: 10.8k]
  |  Branch (1122:51): [True: 4.36k, False: 6.46k]
  ------------------
 1123|  5.74k|                        const int num_uv_pos = num_y_pos + !!fgd->num_y_points;
 1124|  27.5k|                        for (int i = 0; i < num_uv_pos; i++)
  ------------------
  |  Branch (1124:41): [True: 21.8k, False: 5.74k]
  ------------------
 1125|  21.8k|                            fgd->ar_coeffs_uv[pl][i] = dav1d_get_bits(gb, 8) - 128;
 1126|  5.74k|                        if (!fgd->num_y_points)
  ------------------
  |  Branch (1126:29): [True: 4.32k, False: 1.41k]
  ------------------
 1127|  4.32k|                            fgd->ar_coeffs_uv[pl][num_uv_pos] = 0;
 1128|  5.74k|                    }
 1129|  6.10k|                fgd->ar_coeff_shift = dav1d_get_bits(gb, 2) + 6;
 1130|  6.10k|                fgd->grain_scale_shift = dav1d_get_bits(gb, 2);
 1131|  18.3k|                for (int pl = 0; pl < 2; pl++)
  ------------------
  |  Branch (1131:34): [True: 12.2k, False: 6.10k]
  ------------------
 1132|  12.2k|                    if (fgd->num_uv_points[pl]) {
  ------------------
  |  Branch (1132:25): [True: 1.38k, False: 10.8k]
  ------------------
 1133|  1.38k|                        fgd->uv_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1134|  1.38k|                        fgd->uv_luma_mult[pl] = dav1d_get_bits(gb, 8) - 128;
 1135|  1.38k|                        fgd->uv_offset[pl] = dav1d_get_bits(gb, 9) - 256;
 1136|  1.38k|                    }
 1137|  6.10k|                fgd->overlap_flag = dav1d_get_bit(gb);
 1138|  6.10k|                fgd->clip_to_restricted_range = dav1d_get_bit(gb);
 1139|  6.10k|            }
 1140|  14.6k|        }
 1141|   173k|    }
 1142|       |#if DEBUG_FRAME_HDR
 1143|       |    printf("HDR: post-filmgrain: off=%td\n",
 1144|       |           (gb->ptr - init_ptr) * 8 - gb->bits_left);
 1145|       |#endif
 1146|       |
 1147|   307k|    return 0;
 1148|       |
 1149|  6.14k|error:
 1150|  6.14k|    dav1d_log(c, "Error parsing frame header\n");
  ------------------
  |  |   44|  6.14k|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 6.14k]
  |  |  ------------------
  ------------------
 1151|  6.14k|    return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  6.14k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
 1152|   309k|}
obu.c:read_frame_size:
  343|   311k|{
  344|   311k|    const Dav1dSequenceHeader *const seqhdr = c->seq_hdr;
  345|   311k|    Dav1dFrameHeader *const hdr = c->frame_hdr;
  346|       |
  347|   311k|    if (use_ref) {
  ------------------
  |  Branch (347:9): [True: 64.3k, False: 247k]
  ------------------
  348|   110k|        for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (348:25): [True: 109k, False: 601]
  ------------------
  349|   109k|            if (dav1d_get_bit(gb)) {
  ------------------
  |  Branch (349:17): [True: 63.7k, False: 46.0k]
  ------------------
  350|  63.7k|                const Dav1dThreadPicture *const ref =
  351|  63.7k|                    &c->refs[c->frame_hdr->refidx[i]].p;
  352|  63.7k|                if (!ref->p.frame_hdr) return -1;
  ------------------
  |  Branch (352:21): [True: 178, False: 63.5k]
  ------------------
  353|  63.5k|                hdr->width[1] = ref->p.frame_hdr->width[1];
  354|  63.5k|                hdr->height = ref->p.frame_hdr->height;
  355|  63.5k|                hdr->render_width = ref->p.frame_hdr->render_width;
  356|  63.5k|                hdr->render_height = ref->p.frame_hdr->render_height;
  357|  63.5k|                hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (357:42): [True: 8.35k, False: 55.2k]
  |  Branch (357:63): [True: 6.68k, False: 1.66k]
  ------------------
  358|  63.5k|                if (hdr->super_res.enabled) {
  ------------------
  |  Branch (358:21): [True: 6.68k, False: 56.8k]
  ------------------
  359|  6.68k|                    const int d = hdr->super_res.width_scale_denominator =
  360|  6.68k|                        9 + dav1d_get_bits(gb, 3);
  361|  6.68k|                    hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,
  362|  6.68k|                                         imin(16, hdr->width[1]));
  363|  56.8k|                } else {
  364|  56.8k|                    hdr->super_res.width_scale_denominator = 8;
  365|  56.8k|                    hdr->width[0] = hdr->width[1];
  366|  56.8k|                }
  367|  63.5k|                return 0;
  368|  63.7k|            }
  369|   109k|        }
  370|  64.3k|    }
  371|       |
  372|   248k|    if (hdr->frame_size_override) {
  ------------------
  |  Branch (372:9): [True: 7.56k, False: 240k]
  ------------------
  373|  7.56k|        hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
  374|  7.56k|        hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;
  375|   240k|    } else {
  376|   240k|        hdr->width[1] = seqhdr->max_width;
  377|   240k|        hdr->height = seqhdr->max_height;
  378|   240k|    }
  379|   248k|    hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bit(gb);
  ------------------
  |  Branch (379:30): [True: 218k, False: 29.9k]
  |  Branch (379:51): [True: 33.1k, False: 184k]
  ------------------
  380|   248k|    if (hdr->super_res.enabled) {
  ------------------
  |  Branch (380:9): [True: 33.1k, False: 214k]
  ------------------
  381|  33.1k|        const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);
  382|  33.1k|        hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));
  383|   214k|    } else {
  384|   214k|        hdr->super_res.width_scale_denominator = 8;
  385|   214k|        hdr->width[0] = hdr->width[1];
  386|   214k|    }
  387|   248k|    hdr->have_render_size = dav1d_get_bit(gb);
  388|   248k|    if (hdr->have_render_size) {
  ------------------
  |  Branch (388:9): [True: 7.58k, False: 240k]
  ------------------
  389|  7.58k|        hdr->render_width = dav1d_get_bits(gb, 16) + 1;
  390|  7.58k|        hdr->render_height = dav1d_get_bits(gb, 16) + 1;
  391|   240k|    } else {
  392|   240k|        hdr->render_width = hdr->width[1];
  393|   240k|        hdr->render_height = hdr->height;
  394|   240k|    }
  395|   248k|    return 0;
  396|   311k|}
obu.c:tile_log2:
  398|  1.33M|static inline int tile_log2(const int sz, const int tgt) {
  399|  1.33M|    int k;
  400|  1.96M|    for (k = 0; (sz << k) < tgt; k++) ;
  ------------------
  |  Branch (400:17): [True: 631k, False: 1.33M]
  ------------------
  401|  1.33M|    return k;
  402|  1.33M|}
obu.c:check_trailing_bits:
   50|  53.9k|{
   51|  53.9k|    const int trailing_one_bit = dav1d_get_bit(gb);
   52|       |
   53|  53.9k|    if (gb->error)
  ------------------
  |  Branch (53:9): [True: 3.60k, False: 50.3k]
  ------------------
   54|  3.60k|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|  3.60k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   55|       |
   56|  50.3k|    if (!strict_std_compliance)
  ------------------
  |  Branch (56:9): [True: 50.3k, False: 0]
  ------------------
   57|  50.3k|        return 0;
   58|       |
   59|      0|    if (!trailing_one_bit || gb->state)
  ------------------
  |  Branch (59:9): [True: 0, False: 0]
  |  Branch (59:30): [True: 0, False: 0]
  ------------------
   60|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   61|       |
   62|      0|    ptrdiff_t size = gb->ptr_end - gb->ptr;
   63|      0|    while (size > 0 && gb->ptr[size - 1] == 0)
  ------------------
  |  Branch (63:12): [True: 0, False: 0]
  |  Branch (63:24): [True: 0, False: 0]
  ------------------
   64|      0|        size--;
   65|       |
   66|      0|    if (size)
  ------------------
  |  Branch (66:9): [True: 0, False: 0]
  ------------------
   67|      0|        return DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
   68|       |
   69|      0|    return 0;
   70|      0|}
obu.c:parse_tile_hdr:
 1154|   303k|static void parse_tile_hdr(Dav1dContext *const c, GetBits *const gb) {
 1155|   303k|    const int n_tiles = c->frame_hdr->tiling.cols * c->frame_hdr->tiling.rows;
 1156|   303k|    const int have_tile_pos = n_tiles > 1 ? dav1d_get_bit(gb) : 0;
  ------------------
  |  Branch (1156:31): [True: 25.6k, False: 277k]
  ------------------
 1157|       |
 1158|   303k|    if (have_tile_pos) {
  ------------------
  |  Branch (1158:9): [True: 2.89k, False: 300k]
  ------------------
 1159|  2.89k|        const int n_bits = c->frame_hdr->tiling.log2_cols +
 1160|  2.89k|                           c->frame_hdr->tiling.log2_rows;
 1161|  2.89k|        c->tile[c->n_tile_data].start = dav1d_get_bits(gb, n_bits);
 1162|  2.89k|        c->tile[c->n_tile_data].end = dav1d_get_bits(gb, n_bits);
 1163|   300k|    } else {
 1164|   300k|        c->tile[c->n_tile_data].start = 0;
 1165|   300k|        c->tile[c->n_tile_data].end = n_tiles - 1;
 1166|   300k|    }
 1167|   303k|}

dav1d_pal_dsp_init:
   71|  10.2k|COLD void dav1d_pal_dsp_init(Dav1dPalDSPContext *const c) {
   72|  10.2k|    c->pal_idx_finish = pal_idx_finish_c;
   73|       |
   74|  10.2k|#if HAVE_ASM
   75|       |#if ARCH_RISCV
   76|       |    pal_dsp_init_riscv(c);
   77|       |#elif ARCH_X86
   78|       |    pal_dsp_init_x86(c);
   79|  10.2k|#endif
   80|  10.2k|#endif
   81|  10.2k|}

dav1d_default_picture_alloc:
   46|   313k|int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) {
   47|   313k|    const int hbd = p->p.bpc > 8;
   48|   313k|    const int aligned_w = (p->p.w + 127) & ~127;
   49|   313k|    const int aligned_h = (p->p.h + 127) & ~127;
   50|   313k|    const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
   51|   313k|    const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
   52|   313k|    const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
   53|   313k|    ptrdiff_t y_stride = aligned_w << hbd;
   54|   313k|    ptrdiff_t uv_stride = has_chroma ? y_stride >> ss_hor : 0;
  ------------------
  |  Branch (54:27): [True: 239k, False: 73.7k]
  ------------------
   55|       |    /* Due to how mapping of addresses to sets works in most L1 and L2 cache
   56|       |     * implementations, strides of multiples of certain power-of-two numbers
   57|       |     * may cause multiple rows of the same superblock to map to the same set,
   58|       |     * causing evictions of previous rows resulting in a reduction in cache
   59|       |     * hit rate. Avoid that by slightly padding the stride when necessary. */
   60|   313k|    if (!(y_stride & 1023))
  ------------------
  |  Branch (60:9): [True: 24.5k, False: 288k]
  ------------------
   61|  24.5k|        y_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  24.5k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   62|   313k|    if (!(uv_stride & 1023) && has_chroma)
  ------------------
  |  Branch (62:9): [True: 96.0k, False: 217k]
  |  Branch (62:32): [True: 22.3k, False: 73.7k]
  ------------------
   63|  22.3k|        uv_stride += DAV1D_PICTURE_ALIGNMENT;
  ------------------
  |  |   44|  22.3k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   64|   313k|    p->stride[0] = y_stride;
   65|   313k|    p->stride[1] = uv_stride;
   66|   313k|    const size_t y_sz = y_stride * aligned_h;
   67|   313k|    const size_t uv_sz = uv_stride * (aligned_h >> ss_ver);
   68|   313k|    const size_t pic_size = y_sz + 2 * uv_sz;
   69|       |
   70|   313k|    uint8_t *const buf = dav1d_mem_pool_pop(cookie, pic_size + DAV1D_PICTURE_ALIGNMENT);
  ------------------
  |  |   44|   313k|#define DAV1D_PICTURE_ALIGNMENT 64
  ------------------
   71|   313k|    if (!buf) return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (71:9): [True: 0, False: 313k]
  ------------------
   72|   313k|    p->allocator_data = buf;
   73|   313k|    p->data[0] = buf;
   74|   313k|    p->data[1] = has_chroma ? buf + y_sz : NULL;
  ------------------
  |  Branch (74:18): [True: 239k, False: 73.7k]
  ------------------
   75|   313k|    p->data[2] = has_chroma ? buf + y_sz + uv_sz : NULL;
  ------------------
  |  Branch (75:18): [True: 239k, False: 73.7k]
  ------------------
   76|       |
   77|   313k|    return 0;
   78|   313k|}
dav1d_default_picture_release:
   80|   313k|void dav1d_default_picture_release(Dav1dPicture *const p, void *const cookie) {
   81|   313k|    dav1d_mem_pool_push(cookie, p->allocator_data);
   82|   313k|}
dav1d_picture_free_itut_t35:
   99|    977|void dav1d_picture_free_itut_t35(const uint8_t *const data, void *const user_data) {
  100|    977|    struct itut_t35_ctx_context *itut_t35_ctx = user_data;
  101|       |
  102|  2.95k|    for (size_t i = 0; i < itut_t35_ctx->n_itut_t35; i++)
  ------------------
  |  Branch (102:24): [True: 1.97k, False: 977]
  ------------------
  103|  1.97k|        dav1d_free(itut_t35_ctx->itut_t35[i].payload);
  ------------------
  |  |  135|  1.97k|#define dav1d_free(ptr) free(ptr)
  ------------------
  104|    977|    dav1d_free(itut_t35_ctx->itut_t35);
  ------------------
  |  |  135|    977|#define dav1d_free(ptr) free(ptr)
  ------------------
  105|    977|    dav1d_free(itut_t35_ctx);
  ------------------
  |  |  135|    977|#define dav1d_free(ptr) free(ptr)
  ------------------
  106|    977|}
dav1d_picture_copy_props:
  164|   263k|{
  165|   263k|    dav1d_data_props_copy(&p->m, props);
  166|       |
  167|   263k|    dav1d_ref_dec(&p->content_light_ref);
  168|   263k|    p->content_light_ref = content_light_ref;
  169|   263k|    p->content_light = content_light;
  170|   263k|    if (content_light_ref) dav1d_ref_inc(content_light_ref);
  ------------------
  |  Branch (170:9): [True: 7.22k, False: 256k]
  ------------------
  171|       |
  172|   263k|    dav1d_ref_dec(&p->mastering_display_ref);
  173|   263k|    p->mastering_display_ref = mastering_display_ref;
  174|   263k|    p->mastering_display = mastering_display;
  175|   263k|    if (mastering_display_ref) dav1d_ref_inc(mastering_display_ref);
  ------------------
  |  Branch (175:9): [True: 868, False: 262k]
  ------------------
  176|       |
  177|   263k|    dav1d_ref_dec(&p->itut_t35_ref);
  178|   263k|    p->itut_t35_ref = itut_t35_ref;
  179|   263k|    p->itut_t35 = itut_t35;
  180|   263k|    p->n_itut_t35 = n_itut_t35;
  181|   263k|    if (itut_t35_ref) dav1d_ref_inc(itut_t35_ref);
  ------------------
  |  Branch (181:9): [True: 1.17k, False: 262k]
  ------------------
  182|   263k|}
dav1d_thread_picture_alloc:
  186|   282k|{
  187|   282k|    Dav1dThreadPicture *const p = &f->sr_cur;
  188|       |
  189|   282k|    const int res = picture_alloc(c, &p->p, f->frame_hdr->width[1], f->frame_hdr->height,
  190|   282k|                                  f->seq_hdr, f->seq_hdr_ref,
  191|   282k|                                  f->frame_hdr, f->frame_hdr_ref,
  192|   282k|                                  bpc, &f->tile[0].data.m, &c->allocator,
  193|   282k|                                  (void **) &p->progress);
  194|   282k|    if (res) return res;
  ------------------
  |  Branch (194:9): [True: 0, False: 282k]
  ------------------
  195|       |
  196|       |    // Don't clear these flags from c->frame_flags if the frame is not going to be output.
  197|       |    // This way they will be added to the next visible frame too.
  198|   282k|    const int flags_mask = ((f->frame_hdr->show_frame || c->output_invisible_frames) &&
  ------------------
  |  Branch (198:30): [True: 218k, False: 63.7k]
  |  Branch (198:58): [True: 0, False: 63.7k]
  ------------------
  199|   218k|                            c->max_spatial_id == f->frame_hdr->spatial_id)
  ------------------
  |  Branch (199:29): [True: 210k, False: 8.34k]
  ------------------
  200|   282k|                           ? 0 : (PICTURE_FLAG_NEW_SEQUENCE | PICTURE_FLAG_NEW_OP_PARAMS_INFO);
  201|   282k|    p->flags = c->frame_flags;
  202|   282k|    c->frame_flags &= flags_mask;
  203|       |
  204|   282k|    p->visible = f->frame_hdr->show_frame;
  205|   282k|    p->showable = f->frame_hdr->showable_frame;
  206|       |
  207|   282k|    if (p->visible) {
  ------------------
  |  Branch (207:9): [True: 218k, False: 63.7k]
  ------------------
  208|       |        // Only add HDR10+ and T35 metadata when show frame flag is enabled
  209|   218k|        dav1d_picture_copy_props(&p->p, c->content_light, c->content_light_ref,
  210|   218k|                                 c->mastering_display, c->mastering_display_ref,
  211|   218k|                                 c->itut_t35, c->itut_t35_ref, c->n_itut_t35,
  212|   218k|                                 &f->tile[0].data.m);
  213|       |
  214|       |        // Must be removed from the context after being attached to the frame
  215|   218k|        dav1d_ref_dec(&c->itut_t35_ref);
  216|   218k|        c->itut_t35 = NULL;
  217|   218k|        c->n_itut_t35 = 0;
  218|   218k|    } else {
  219|  63.7k|        dav1d_data_props_copy(&p->p.m, &f->tile[0].data.m);
  220|  63.7k|    }
  221|       |
  222|   282k|    if (c->n_fc > 1) {
  ------------------
  |  Branch (222:9): [True: 282k, False: 0]
  ------------------
  223|   282k|        atomic_init(&p->progress[0], 0);
  224|       |        atomic_init(&p->progress[1], 0);
  225|   282k|    }
  226|   282k|    return res;
  227|   282k|}
dav1d_picture_alloc_copy:
  231|  30.9k|{
  232|  30.9k|    struct pic_ctx_context *const pic_ctx = (struct pic_ctx_context*)src->ref->const_data;
  233|  30.9k|    const int res = picture_alloc(c, dst, w, src->p.h,
  234|  30.9k|                                  src->seq_hdr, src->seq_hdr_ref,
  235|  30.9k|                                  src->frame_hdr, src->frame_hdr_ref,
  236|  30.9k|                                  src->p.bpc, &src->m, &pic_ctx->allocator,
  237|  30.9k|                                  NULL);
  238|  30.9k|    if (res) return res;
  ------------------
  |  Branch (238:9): [True: 0, False: 30.9k]
  ------------------
  239|       |
  240|  30.9k|    dav1d_picture_copy_props(dst, src->content_light, src->content_light_ref,
  241|  30.9k|                             src->mastering_display, src->mastering_display_ref,
  242|  30.9k|                             src->itut_t35, src->itut_t35_ref, src->n_itut_t35,
  243|  30.9k|                             &src->m);
  244|       |
  245|  30.9k|    return 0;
  246|  30.9k|}
dav1d_picture_ref:
  248|  3.22M|void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
  249|  3.22M|    assert(dst != NULL);
  ------------------
  |  Branch (249:5): [True: 3.22M, False: 0]
  ------------------
  250|  3.22M|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (250:5): [True: 3.22M, False: 0]
  ------------------
  251|  3.22M|    assert(src != NULL);
  ------------------
  |  Branch (251:5): [True: 3.22M, False: 0]
  ------------------
  252|       |
  253|  3.22M|    if (src->ref) {
  ------------------
  |  Branch (253:9): [True: 3.22M, False: 0]
  ------------------
  254|  3.22M|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (254:9): [True: 3.22M, False: 0]
  ------------------
  255|  3.22M|        dav1d_ref_inc(src->ref);
  256|  3.22M|    }
  257|  3.22M|    if (src->frame_hdr_ref) dav1d_ref_inc(src->frame_hdr_ref);
  ------------------
  |  Branch (257:9): [True: 3.22M, False: 0]
  ------------------
  258|  3.22M|    if (src->seq_hdr_ref) dav1d_ref_inc(src->seq_hdr_ref);
  ------------------
  |  Branch (258:9): [True: 3.22M, False: 0]
  ------------------
  259|  3.22M|    if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref);
  ------------------
  |  Branch (259:9): [True: 0, False: 3.22M]
  ------------------
  260|  3.22M|    if (src->content_light_ref) dav1d_ref_inc(src->content_light_ref);
  ------------------
  |  Branch (260:9): [True: 48.5k, False: 3.17M]
  ------------------
  261|  3.22M|    if (src->mastering_display_ref) dav1d_ref_inc(src->mastering_display_ref);
  ------------------
  |  Branch (261:9): [True: 4.24k, False: 3.21M]
  ------------------
  262|  3.22M|    if (src->itut_t35_ref) dav1d_ref_inc(src->itut_t35_ref);
  ------------------
  |  Branch (262:9): [True: 5.47k, False: 3.21M]
  ------------------
  263|  3.22M|    *dst = *src;
  264|  3.22M|}
dav1d_picture_move_ref:
  266|   127k|void dav1d_picture_move_ref(Dav1dPicture *const dst, Dav1dPicture *const src) {
  267|   127k|    assert(dst != NULL);
  ------------------
  |  Branch (267:5): [True: 127k, False: 0]
  ------------------
  268|   127k|    assert(dst->data[0] == NULL);
  ------------------
  |  Branch (268:5): [True: 127k, False: 0]
  ------------------
  269|   127k|    assert(src != NULL);
  ------------------
  |  Branch (269:5): [True: 127k, False: 0]
  ------------------
  270|       |
  271|   127k|    if (src->ref)
  ------------------
  |  Branch (271:9): [True: 127k, False: 0]
  ------------------
  272|   127k|        assert(src->data[0] != NULL);
  ------------------
  |  Branch (272:9): [True: 127k, False: 0]
  ------------------
  273|       |
  274|   127k|    *dst = *src;
  275|   127k|    memset(src, 0, sizeof(*src));
  276|   127k|}
dav1d_thread_picture_ref:
  280|  2.96M|{
  281|  2.96M|    dav1d_picture_ref(&dst->p, &src->p);
  282|  2.96M|    dst->visible = src->visible;
  283|  2.96M|    dst->showable = src->showable;
  284|  2.96M|    dst->progress = src->progress;
  285|  2.96M|    dst->flags = src->flags;
  286|  2.96M|}
dav1d_picture_unref_internal:
  299|  3.74M|void dav1d_picture_unref_internal(Dav1dPicture *const p) {
  300|  3.74M|    validate_input(p != NULL);
  ------------------
  |  |   59|  3.74M|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  3.74M|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 3.74M]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  301|       |
  302|  3.74M|    if (p->ref) {
  ------------------
  |  Branch (302:9): [True: 3.53M, False: 215k]
  ------------------
  303|  3.53M|        validate_input(p->data[0] != NULL);
  ------------------
  |  |   59|  3.53M|#define validate_input(x) validate_input_or_ret(x, )
  |  |  ------------------
  |  |  |  |   52|  3.53M|    if (!(x)) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (52:9): [True: 0, False: 3.53M]
  |  |  |  |  ------------------
  |  |  |  |   53|      0|        debug_print("Input validation check \'%s\' failed in %s!\n", \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|#define debug_print(...) fprintf(stderr, __VA_ARGS__)
  |  |  |  |  ------------------
  |  |  |  |   54|      0|                    #x, __func__); \
  |  |  |  |   55|      0|        debug_abort(); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   39|      0|#define debug_abort abort
  |  |  |  |  ------------------
  |  |  |  |   56|      0|        return r; \
  |  |  |  |   57|      0|    }
  |  |  ------------------
  ------------------
  304|  3.53M|        dav1d_ref_dec(&p->ref);
  305|  3.53M|    }
  306|  3.74M|    dav1d_ref_dec(&p->seq_hdr_ref);
  307|  3.74M|    dav1d_ref_dec(&p->frame_hdr_ref);
  308|  3.74M|    dav1d_ref_dec(&p->m.user_data.ref);
  309|  3.74M|    dav1d_ref_dec(&p->content_light_ref);
  310|  3.74M|    dav1d_ref_dec(&p->mastering_display_ref);
  311|  3.74M|    dav1d_ref_dec(&p->itut_t35_ref);
  312|  3.74M|    memset(p, 0, sizeof(*p));
  313|  3.74M|    dav1d_data_props_set_defaults(&p->m);
  314|  3.74M|}
dav1d_thread_picture_unref:
  316|  3.28M|void dav1d_thread_picture_unref(Dav1dThreadPicture *const p) {
  317|  3.28M|    dav1d_picture_unref_internal(&p->p);
  318|       |
  319|       |    p->progress = NULL;
  320|  3.28M|}
dav1d_picture_get_event_flags:
  322|   134k|enum Dav1dEventFlags dav1d_picture_get_event_flags(const Dav1dThreadPicture *const p) {
  323|   134k|    if (!p->flags)
  ------------------
  |  Branch (323:9): [True: 112k, False: 22.2k]
  ------------------
  324|   112k|        return 0;
  325|       |
  326|  22.2k|    enum Dav1dEventFlags flags = 0;
  327|  22.2k|    if (p->flags & PICTURE_FLAG_NEW_SEQUENCE)
  ------------------
  |  Branch (327:9): [True: 12.2k, False: 9.93k]
  ------------------
  328|  12.2k|       flags |= DAV1D_EVENT_FLAG_NEW_SEQUENCE;
  329|  22.2k|    if (p->flags & PICTURE_FLAG_NEW_OP_PARAMS_INFO)
  ------------------
  |  Branch (329:9): [True: 3, False: 22.2k]
  ------------------
  330|      3|       flags |= DAV1D_EVENT_FLAG_NEW_OP_PARAMS_INFO;
  331|       |
  332|  22.2k|    return flags;
  333|   134k|}
picture.c:picture_alloc:
  117|   313k|{
  118|   313k|    if (p->data[0]) {
  ------------------
  |  Branch (118:9): [True: 0, False: 313k]
  ------------------
  119|      0|        dav1d_log(c, "Picture already allocated!\n");
  ------------------
  |  |   44|      0|#define dav1d_log(...) do { } while(0)
  |  |  ------------------
  |  |  |  Branch (44:37): [Folded, False: 0]
  |  |  ------------------
  ------------------
  120|      0|        return -1;
  121|      0|    }
  122|   313k|    assert(bpc > 0 && bpc <= 16);
  ------------------
  |  Branch (122:5): [True: 313k, False: 0]
  |  Branch (122:5): [True: 313k, False: 0]
  ------------------
  123|       |
  124|   313k|    size_t extra = c->n_fc > 1 ? sizeof(atomic_int) * 2 : 0;
  ------------------
  |  Branch (124:20): [True: 313k, False: 0]
  ------------------
  125|   313k|    struct pic_ctx_context *pic_ctx = dav1d_mem_pool_pop(c->pic_ctx_pool, extra +
  126|   313k|                                                         sizeof(struct pic_ctx_context));
  127|   313k|    if (!pic_ctx)
  ------------------
  |  Branch (127:9): [True: 0, False: 313k]
  ------------------
  128|      0|        return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  129|       |
  130|   313k|    p->p.w = w;
  131|   313k|    p->p.h = h;
  132|   313k|    p->seq_hdr = seq_hdr;
  133|   313k|    p->frame_hdr = frame_hdr;
  134|   313k|    p->p.layout = seq_hdr->layout;
  135|   313k|    p->p.bpc = bpc;
  136|   313k|    dav1d_data_props_set_defaults(&p->m);
  137|   313k|    const int res = p_allocator->alloc_picture_callback(p, p_allocator->cookie);
  138|   313k|    if (res < 0) {
  ------------------
  |  Branch (138:9): [True: 0, False: 313k]
  ------------------
  139|      0|        dav1d_mem_pool_push(c->pic_ctx_pool, pic_ctx);
  140|      0|        return res;
  141|      0|    }
  142|       |
  143|   313k|    pic_ctx->allocator = *p_allocator;
  144|   313k|    pic_ctx->pic = *p;
  145|   313k|    p->ref = dav1d_ref_init(&pic_ctx->ref, pic_ctx, free_buffer, c->pic_ctx_pool, 0);
  146|       |
  147|   313k|    p->seq_hdr_ref = seq_hdr_ref;
  148|   313k|    if (seq_hdr_ref) dav1d_ref_inc(seq_hdr_ref);
  ------------------
  |  Branch (148:9): [True: 313k, False: 0]
  ------------------
  149|       |
  150|   313k|    p->frame_hdr_ref = frame_hdr_ref;
  151|   313k|    if (frame_hdr_ref) dav1d_ref_inc(frame_hdr_ref);
  ------------------
  |  Branch (151:9): [True: 313k, False: 0]
  ------------------
  152|       |
  153|   313k|    if (extra && extra_ptr)
  ------------------
  |  Branch (153:9): [True: 313k, False: 0]
  |  Branch (153:18): [True: 282k, False: 30.9k]
  ------------------
  154|   282k|        *extra_ptr = &pic_ctx->extra_data;
  155|       |
  156|   313k|    return 0;
  157|   313k|}
picture.c:free_buffer:
   91|   313k|static void free_buffer(const uint8_t *const data, void *const user_data) {
   92|   313k|    struct pic_ctx_context *pic_ctx = (struct pic_ctx_context*)data;
   93|       |
   94|   313k|    pic_ctx->allocator.release_picture_callback(&pic_ctx->pic,
   95|   313k|                                                pic_ctx->allocator.cookie);
   96|   313k|    dav1d_mem_pool_push(user_data, pic_ctx);
   97|   313k|}

dav1d_init_qm_tables:
 1648|      1|COLD void dav1d_init_qm_tables(void) {
 1649|       |    // This function is guaranteed to be called only once
 1650|       |
 1651|     16|    for (int i = 0; i < 15; i++)
  ------------------
  |  Branch (1651:21): [True: 15, False: 1]
  ------------------
 1652|     45|        for (int j = 0; j < 2; j++) {
  ------------------
  |  Branch (1652:25): [True: 30, False: 15]
  ------------------
 1653|       |            // note that the w/h in the assignment is inverted, this is on purpose
 1654|       |            // because we store coefficients transposed
 1655|     30|            dav1d_qm_tbl[i][j][RTX_4X8  ] = qm_tbl_8x4[i][j];
 1656|     30|            dav1d_qm_tbl[i][j][RTX_8X4  ] = qm_tbl_4x8[i][j];
 1657|     30|            dav1d_qm_tbl[i][j][RTX_4X16 ] = qm_tbl_16x4[i][j];
 1658|     30|            dav1d_qm_tbl[i][j][RTX_16X4 ] = qm_tbl_4x16[i][j];
 1659|     30|            dav1d_qm_tbl[i][j][RTX_8X16 ] = qm_tbl_16x8[i][j];
 1660|     30|            dav1d_qm_tbl[i][j][RTX_16X8 ] = qm_tbl_8x16[i][j];
 1661|     30|            dav1d_qm_tbl[i][j][RTX_8X32 ] = qm_tbl_32x8[i][j];
 1662|     30|            dav1d_qm_tbl[i][j][RTX_32X8 ] = qm_tbl_8x32[i][j];
 1663|     30|            dav1d_qm_tbl[i][j][RTX_16X32] = qm_tbl_32x16[i][j];
 1664|     30|            dav1d_qm_tbl[i][j][RTX_32X16] = qm_tbl_16x32[i][j];
 1665|       |
 1666|     30|            dav1d_qm_tbl[i][j][ TX_4X4  ] = qm_tbl_4x4[i][j];
 1667|     30|            dav1d_qm_tbl[i][j][ TX_8X8  ] = qm_tbl_8x8[i][j];
 1668|     30|            dav1d_qm_tbl[i][j][ TX_16X16] = qm_tbl_16x16[i][j];
 1669|     30|            dav1d_qm_tbl[i][j][ TX_32X32] = qm_tbl_32x32[i][j];
 1670|       |
 1671|     30|            untriangle(qm_tbl_32x32[i][j], qm_tbl_32x32_t[i][j], 32);
 1672|     30|            subsample(qm_tbl_4x4[i][j],   &qm_tbl_32x32[i][j][32*3+3], 32, 8, 8);
 1673|     30|            subsample(qm_tbl_8x4[i][j],   &qm_tbl_32x16[i][j][32*1+1], 16, 4, 4);
 1674|     30|            subsample(qm_tbl_8x8[i][j],   &qm_tbl_32x32[i][j][32*1+1], 32, 4, 4);
 1675|     30|            subsample(qm_tbl_16x4[i][j],  &qm_tbl_32x16[i][j][32*1+0], 16, 2, 4);
 1676|     30|            subsample(qm_tbl_16x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 2, 2);
 1677|     30|            subsample(qm_tbl_16x16[i][j], &qm_tbl_32x32[i][j][32*0+0], 32, 2, 2);
 1678|     30|            subsample(qm_tbl_32x8[i][j],  &qm_tbl_32x16[i][j][32*0+0], 16, 1, 2);
 1679|     30|            transpose(qm_tbl_4x8[i][j], qm_tbl_8x4[i][j], 8, 4);
 1680|     30|            transpose(qm_tbl_4x16[i][j], qm_tbl_16x4[i][j], 16, 4);
 1681|     30|            transpose(qm_tbl_8x16[i][j], qm_tbl_16x8[i][j], 16, 8);
 1682|     30|            transpose(qm_tbl_8x32[i][j], qm_tbl_32x8[i][j], 32, 8);
 1683|     30|            transpose(qm_tbl_16x32[i][j], qm_tbl_32x16[i][j], 32, 16);
 1684|       |
 1685|     30|            dav1d_qm_tbl[i][j][ TX_64X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1686|     30|            dav1d_qm_tbl[i][j][RTX_64X32] = dav1d_qm_tbl[i][j][ TX_32X32];
 1687|     30|            dav1d_qm_tbl[i][j][RTX_64X16] = dav1d_qm_tbl[i][j][RTX_32X16];
 1688|     30|            dav1d_qm_tbl[i][j][RTX_32X64] = dav1d_qm_tbl[i][j][ TX_32X32];
 1689|     30|            dav1d_qm_tbl[i][j][RTX_16X64] = dav1d_qm_tbl[i][j][RTX_16X32];
 1690|     30|        }
 1691|       |
 1692|       |    // dav1d_qm_tbl[15][*][*] == NULL
 1693|      1|}
qm.c:untriangle:
 1635|     30|static void untriangle(uint8_t *dst, const uint8_t *src, const int sz) {
 1636|    990|    for (int y = 0; y < sz; y++) {
  ------------------
  |  Branch (1636:21): [True: 960, False: 30]
  ------------------
 1637|    960|        memcpy(dst, src, y + 1);
 1638|    960|        const uint8_t *src_ptr = &src[y];
 1639|  15.8k|        for (int x = y + 1; x < sz; x++) {
  ------------------
  |  Branch (1639:29): [True: 14.8k, False: 960]
  ------------------
 1640|  14.8k|            src_ptr += x;
 1641|  14.8k|            dst[x] = *src_ptr;
 1642|  14.8k|        }
 1643|    960|        dst += sz;
 1644|    960|        src += y + 1;
 1645|    960|    }
 1646|     30|}
qm.c:subsample:
 1621|    210|{
 1622|  1.77k|    for (int y = 0; y < h; y += vstep)
  ------------------
  |  Branch (1622:21): [True: 1.56k, False: 210]
  ------------------
 1623|  26.0k|        for (int x = 0; x < 32; x += hstep)
  ------------------
  |  Branch (1623:25): [True: 24.4k, False: 1.56k]
  ------------------
 1624|  24.4k|            *dst++ = src[y * 32 + x];
 1625|    210|}
qm.c:transpose:
 1629|    150|{
 1630|  1.35k|    for (int y = 0, y_off = 0; y < h; y++, y_off += w)
  ------------------
  |  Branch (1630:32): [True: 1.20k, False: 150]
  ------------------
 1631|  30.9k|        for (int x = 0, x_off = 0; x < w; x++, x_off += h)
  ------------------
  |  Branch (1631:36): [True: 29.7k, False: 1.20k]
  ------------------
 1632|  29.7k|            dst[x_off + y] = src[y_off + x];
 1633|    150|}

dav1d_read_coef_blocks_8bpc:
  826|  5.08M|{
  827|  5.08M|    const Dav1dFrameContext *const f = t->f;
  828|  5.08M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  829|  5.08M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  830|  5.08M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  831|  5.08M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
  832|  5.08M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  833|  5.08M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  834|  5.08M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
  835|  5.08M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (835:28): [True: 3.73M, False: 1.34M]
  ------------------
  836|  3.73M|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (836:29): [True: 3.40M, False: 330k]
  |  Branch (836:45): [True: 165k, False: 165k]
  ------------------
  837|  3.57M|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (837:29): [True: 3.29M, False: 279k]
  |  Branch (837:45): [True: 139k, False: 139k]
  ------------------
  838|       |
  839|  5.08M|    if (b->skip) {
  ------------------
  |  Branch (839:9): [True: 2.55M, False: 2.53M]
  ------------------
  840|  2.55M|        BlockContext *const a = t->a;
  841|  2.55M|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
  842|  2.55M|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
  843|  2.55M|        if (has_chroma) {
  ------------------
  |  Branch (843:13): [True: 1.33M, False: 1.21M]
  ------------------
  844|  1.33M|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
  845|  1.33M|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
  846|  1.33M|            memset_cw(&a->ccoef[0][cbx4], 0x40);
  847|  1.33M|            memset_cw(&a->ccoef[1][cbx4], 0x40);
  848|  1.33M|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
  849|  1.33M|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
  850|  1.33M|        }
  851|  2.55M|        return;
  852|  2.55M|    }
  853|       |
  854|  2.53M|    Dav1dTileState *const ts = t->ts;
  855|  2.53M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
  856|  2.53M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
  857|  2.53M|    assert(t->frame_thread.pass == 1);
  ------------------
  |  Branch (857:5): [True: 2.53M, False: 18.4E]
  ------------------
  858|  2.53M|    assert(!b->skip);
  ------------------
  |  Branch (858:5): [True: 2.53M, False: 18.4E]
  ------------------
  859|  2.53M|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
  860|  2.53M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->intra ? b->tx : b->max_ytx];
  ------------------
  |  Branch (860:58): [True: 1.80M, False: 733k]
  ------------------
  861|  2.53M|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
  862|       |
  863|  5.11M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (863:26): [True: 2.57M, False: 2.53M]
  ------------------
  864|  2.57M|        const int sub_h4 = imin(h4, 16 + init_y);
  865|  5.21M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (865:30): [True: 2.63M, False: 2.57M]
  ------------------
  866|  2.63M|            const int sub_w4 = imin(w4, init_x + 16);
  867|  2.63M|            int y_off = !!init_y, y, x;
  868|  5.45M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (868:47): [True: 2.81M, False: 2.63M]
  ------------------
  869|  2.81M|                 y += t_dim->h, t->by += t_dim->h, y_off++)
  870|  2.81M|            {
  871|  2.81M|                int x_off = !!init_x;
  872|  6.28M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (872:51): [True: 3.47M, False: 2.81M]
  ------------------
  873|  3.47M|                     x += t_dim->w, t->bx += t_dim->w, x_off++)
  874|  3.47M|                {
  875|  3.47M|                    if (!b->intra) {
  ------------------
  |  Branch (875:25): [True: 790k, False: 2.68M]
  ------------------
  876|   790k|                        read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
  877|   790k|                                       x_off, y_off, NULL);
  878|  2.68M|                    } else {
  879|  2.68M|                        uint8_t cf_ctx = 0x40;
  880|  2.68M|                        enum TxfmType txtp;
  881|  2.68M|                        const int eob =
  882|  2.68M|                            decode_coefs(t, &t->a->lcoef[bx4 + x],
  883|  2.68M|                                         &t->l.lcoef[by4 + y], b->tx, bs, b, 1,
  884|  2.68M|                                         0, ts->frame_thread[1].cf, &txtp, &cf_ctx);
  885|  2.68M|                        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  2.68M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 2.68M]
  |  |  ------------------
  |  |   35|  2.68M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  2.68M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  886|      0|                            printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
  887|      0|                                   b->tx, txtp, eob, ts->msac.rng);
  888|  2.68M|                        *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  889|  2.68M|                        ts->frame_thread[1].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
  890|  2.68M|                        dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
  891|  2.68M|                        dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
  892|  2.68M|                    }
  893|  3.47M|                }
  894|  2.81M|                t->bx -= x;
  895|  2.81M|            }
  896|  2.63M|            t->by -= y;
  897|       |
  898|  2.63M|            if (!has_chroma) continue;
  ------------------
  |  Branch (898:17): [True: 449k, False: 2.18M]
  ------------------
  899|       |
  900|  2.18M|            const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
  901|  2.18M|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
  902|  6.55M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (902:30): [True: 4.36M, False: 2.18M]
  ------------------
  903|  9.00M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (903:61): [True: 4.64M, False: 4.36M]
  ------------------
  904|  4.64M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
  905|  4.64M|                {
  906|  10.0M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (906:65): [True: 5.41M, False: 4.64M]
  ------------------
  907|  5.41M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
  908|  5.41M|                    {
  909|  5.41M|                        uint8_t cf_ctx = 0x40;
  910|  5.41M|                        enum TxfmType txtp;
  911|  5.41M|                        if (!b->intra)
  ------------------
  |  Branch (911:29): [True: 1.12M, False: 4.29M]
  ------------------
  912|  1.12M|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
  913|  1.12M|                                                        bx4 + (x << ss_hor)];
  914|  5.41M|                        const int eob =
  915|  5.41M|                            decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
  916|  5.41M|                                         &t->l.ccoef[pl][cby4 + y], b->uvtx, bs,
  917|  5.41M|                                         b, b->intra, 1 + pl, ts->frame_thread[1].cf,
  918|  5.41M|                                         &txtp, &cf_ctx);
  919|  5.41M|                        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  5.41M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 5.41M]
  |  |  ------------------
  |  |   35|  5.41M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  5.41M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  920|      0|                            printf("Post-uv-cf-blk[pl=%d,tx=%d,"
  921|      0|                                   "txtp=%d,eob=%d]: r=%d\n",
  922|      0|                                   pl, b->uvtx, txtp, eob, ts->msac.rng);
  923|  5.41M|                        *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  924|  5.41M|                        ts->frame_thread[1].cf += uv_t_dim->w * uv_t_dim->h * 16;
  925|  5.41M|                        int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
  926|  5.41M|                        int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
  927|  5.41M|                        dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
  928|  5.41M|                        dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
  929|  5.41M|                    }
  930|  4.64M|                    t->bx -= x << ss_hor;
  931|  4.64M|                }
  932|  4.36M|                t->by -= y << ss_ver;
  933|  4.36M|            }
  934|  2.18M|        }
  935|  2.57M|    }
  936|  2.53M|}
dav1d_recon_b_intra_8bpc:
 1179|  1.10M|{
 1180|  1.10M|    Dav1dTileState *const ts = t->ts;
 1181|  1.10M|    const Dav1dFrameContext *const f = t->f;
 1182|  1.10M|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|  1.10M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|  1.10M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|  1.10M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|  1.10M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|  1.10M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|  1.10M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|  1.10M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|  1.10M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|  1.10M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 854k, False: 246k]
  ------------------
 1192|   854k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 705k, False: 148k]
  |  Branch (1192:45): [True: 74.6k, False: 74.2k]
  ------------------
 1193|   780k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 636k, False: 144k]
  |  Branch (1193:45): [True: 72.1k, False: 72.1k]
  ------------------
 1194|  1.10M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|  1.10M|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|  1.10M|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   51|  1.10M|#define bitfn(x) x##_8bpc
  ------------------
 1199|  1.10M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|  1.10M|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  2.25M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 1.15M, False: 1.10M]
  ------------------
 1204|  1.15M|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|  1.15M|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  2.39M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 1.24M, False: 1.15M]
  ------------------
 1207|  1.24M|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 25.4k, False: 1.21M]
  ------------------
 1208|  25.4k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  25.4k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|  25.4k|#define PXSTRIDE(x) (x)
  ------------------
 1210|  25.4k|                const uint8_t *pal_idx;
 1211|  25.4k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 25.4k, False: 18.4E]
  ------------------
 1212|  25.4k|                    const int p = t->frame_thread.pass & 1;
 1213|  25.4k|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 25.4k, False: 18.4E]
  ------------------
 1214|  25.4k|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|  25.4k|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  18.4E|                } else {
 1217|  18.4E|                    pal_idx = t->scratch.pal_idx_y;
 1218|  18.4E|                }
 1219|  25.4k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 25.4k, False: 4]
  ------------------
 1220|  25.4k|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|  25.4k|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  25.4k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|      4|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  25.4k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1223|  25.4k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  25.4k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  25.4k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  25.4k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 25.4k]
  |  |  ------------------
  |  |   35|  25.4k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  25.4k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  25.4k|            }
 1229|       |
 1230|  1.24M|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|  1.24M|                                     sm_flag(&t->l, by4) |
 1232|  1.24M|                                     intra_edge_filter_flag);
 1233|  1.24M|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 92.2k, False: 1.15M]
  |  Branch (1233:58): [True: 49.5k, False: 1.10M]
  ------------------
 1234|  1.15M|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|  1.24M|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 92.2k, False: 1.15M]
  |  Branch (1235:48): [True: 49.5k, False: 1.10M]
  ------------------
 1236|  1.15M|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|  1.24M|            int y, x;
 1238|  1.24M|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  2.85M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.61M, False: 1.24M]
  ------------------
 1240|  1.61M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.61M|            {
 1242|  1.61M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.61M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
  ------------------
  |  |   53|  1.61M|#define PXSTRIDE(x) (x)
  ------------------
 1244|  1.61M|                                    t->bx + init_x);
 1245|  5.68M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 4.07M, False: 1.61M]
  ------------------
 1246|  4.07M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  4.07M|                {
 1248|  4.07M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 32.1k, False: 4.04M]
  ------------------
 1249|       |
 1250|  4.04M|                    int angle = b->y_angle;
 1251|  4.04M|                    const enum EdgeFlags edge_flags =
 1252|  4.04M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 2.48M, False: 1.55M]
  |  Branch (1252:42): [True: 531k, False: 1.02M]
  |  Branch (1252:57): [True: 769k, False: 2.24M]
  ------------------
 1253|  3.27M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  4.04M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 2.45M, False: 1.58M]
  |  Branch (1254:42): [True: 1.07M, False: 513k]
  |  Branch (1254:56): [True: 805k, False: 268k]
  ------------------
 1255|  3.25M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  4.04M|                    const pixel *top_sb_edge = NULL;
 1257|  4.04M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 517k, False: 3.52M]
  ------------------
 1258|   517k|                        top_sb_edge = f->ipred_edge[0];
 1259|   517k|                        const int sby = t->by >> f->sb_shift;
 1260|   517k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   517k|                    }
 1262|  4.04M|                    const enum IntraPredMode m =
 1263|  4.04M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  4.04M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  4.04M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1264|  4.04M|                                                          t->bx > ts->tiling.col_start,
 1265|  4.04M|                                                          t->by,
 1266|  4.04M|                                                          t->by > ts->tiling.row_start,
 1267|  4.04M|                                                          ts->tiling.col_end,
 1268|  4.04M|                                                          ts->tiling.row_end,
 1269|  4.04M|                                                          edge_flags, dst,
 1270|  4.04M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  4.04M|                                                          b->y_mode, &angle,
 1272|  4.04M|                                                          t_dim->w, t_dim->h,
 1273|  4.04M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  4.04M|                                                          edge HIGHBD_CALL_SUFFIX);
 1275|  4.04M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  4.04M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  4.04M|                                             angle | intra_flags,
 1278|  4.04M|                                             4 * f->bw - 4 * t->bx,
 1279|  4.04M|                                             4 * f->bh - 4 * t->by
 1280|  4.04M|                                             HIGHBD_CALL_SUFFIX);
 1281|       |
 1282|  4.04M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  4.04M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 4.04M]
  |  |  ------------------
  |  |   35|  4.04M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  4.04M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  4.07M|                skip_y_pred: {}
 1293|  4.07M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 1.04M, False: 3.02M]
  ------------------
 1294|  1.04M|                        coef *cf;
 1295|  1.04M|                        int eob;
 1296|  1.04M|                        enum TxfmType txtp;
 1297|  1.04M|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 1.04M, False: 18.4E]
  ------------------
 1298|  1.04M|                            const int p = t->frame_thread.pass & 1;
 1299|  1.04M|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|  1.04M|                            cf = ts->frame_thread[p].cf;
 1301|  1.04M|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|  1.04M|                            eob  = cbi >> 5;
 1303|  1.04M|                            txtp = cbi & 0x1f;
 1304|  18.4E|                        } else {
 1305|  18.4E|                            uint8_t cf_ctx;
 1306|  18.4E|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|  18.4E|#define bitfn(x) x##_8bpc
  ------------------
 1307|  18.4E|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  18.4E|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  18.4E|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  18.4E|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  18.4E|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  18.4E|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  18.4E|                        }
 1316|  1.04M|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 723k, False: 325k]
  ------------------
 1317|   723k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   723k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 723k]
  |  |  ------------------
  |  |   35|   723k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   723k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|   723k|                            dsp->itx.itxfm_add[b->tx]
 1321|   723k|                                              [txtp](dst,
 1322|   723k|                                                     f->cur.stride[0],
 1323|   723k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1324|   723k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   723k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 723k]
  |  |  ------------------
  |  |   35|   723k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   723k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|   723k|                        }
 1328|  3.02M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 0, False: 3.02M]
  ------------------
 1329|      0|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|      0|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|      0|                    }
 1332|  4.07M|                    dst += 4 * t_dim->w;
 1333|  4.07M|                }
 1334|  1.61M|                t->bx -= x;
 1335|  1.61M|            }
 1336|  1.24M|            t->by -= y;
 1337|       |
 1338|  1.24M|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 431k, False: 812k]
  ------------------
 1339|       |
 1340|   812k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   812k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 65.5k, False: 747k]
  ------------------
 1343|  65.5k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 65.5k, False: 18.4E]
  |  Branch (1343:17): [True: 65.5k, False: 0]
  ------------------
 1344|       |
 1345|  65.5k|                int16_t *const ac = t->scratch.ac;
 1346|  65.5k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|  65.5k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|  65.5k|#define PXSTRIDE(x) (x)
  ------------------
 1348|  65.5k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|  65.5k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
  ------------------
  |  |   53|  65.5k|#define PXSTRIDE(x) (x)
  ------------------
 1350|  65.5k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|  65.5k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|  65.5k|                const int furthest_r =
 1354|  65.5k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|  65.5k|                const int furthest_b =
 1356|  65.5k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|  65.5k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|  65.5k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|  65.5k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|  65.5k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   196k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 131k, False: 65.5k]
  ------------------
 1362|   131k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 27.5k, False: 103k]
  ------------------
 1363|   103k|                    int angle = 0;
 1364|   103k|                    const pixel *top_sb_edge = NULL;
 1365|   103k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 13.5k, False: 90.0k]
  ------------------
 1366|  13.5k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  13.5k|                        const int sby = t->by >> f->sb_shift;
 1368|  13.5k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  13.5k|                    }
 1370|   103k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|   103k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|   103k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|   103k|                    const enum IntraPredMode m =
 1374|   103k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|   103k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   103k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1375|   103k|                                                          ypos, ypos > ystart,
 1376|   103k|                                                          ts->tiling.col_end >> ss_hor,
 1377|   103k|                                                          ts->tiling.row_end >> ss_ver,
 1378|   103k|                                                          0, uv_dst[pl], stride,
 1379|   103k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|   103k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|   103k|                                                          edge HIGHBD_CALL_SUFFIX);
 1382|   103k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|   103k|                                           uv_t_dim->w * 4,
 1384|   103k|                                           uv_t_dim->h * 4,
 1385|   103k|                                           ac, b->cfl_alpha[pl]
 1386|   103k|                                           HIGHBD_CALL_SUFFIX);
 1387|   103k|                }
 1388|  65.5k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  65.5k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 65.5k]
  |  |  ------------------
  |  |   35|  65.5k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  65.5k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   747k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 6.98k, False: 740k]
  ------------------
 1394|  6.98k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  6.98k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|  6.98k|#define PXSTRIDE(x) (x)
  ------------------
 1396|  6.98k|                const pixel (*pal)[8];
 1397|  6.98k|                const uint8_t *pal_idx;
 1398|  6.98k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 6.98k, False: 0]
  ------------------
 1399|  6.98k|                    const int p = t->frame_thread.pass & 1;
 1400|  6.98k|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 6.98k, False: 0]
  ------------------
 1401|  6.98k|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|  6.98k|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|  6.98k|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|  6.98k|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  6.98k|                } else {
 1406|      0|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|      0|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|      0|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1407|      0|                    pal_idx = t->scratch.pal_idx_uv;
 1408|      0|                }
 1409|       |
 1410|  6.98k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  6.98k|                                       f->cur.stride[1], pal[1],
 1412|  6.98k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  6.98k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  6.98k|                                       f->cur.stride[1], pal[2],
 1415|  6.98k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  6.98k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  6.98k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 6.98k]
  |  |  ------------------
  |  |   35|  6.98k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  6.98k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
  ------------------
  |  |   53|      0|#define PXSTRIDE(x) (x)
  ------------------
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  6.98k|            }
 1425|       |
 1426|   812k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   812k|                                 sm_uv_flag(&t->l, cby4);
 1428|   812k|            const int uv_sb_has_tr =
 1429|   812k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 65.5k, False: 747k]
  |  Branch (1429:55): [True: 36.4k, False: 710k]
  ------------------
 1430|   747k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   812k|            const int uv_sb_has_bl =
 1432|   812k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 65.5k, False: 747k]
  |  Branch (1432:30): [True: 36.4k, False: 710k]
  ------------------
 1433|   747k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   812k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.43M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.62M, False: 813k]
  ------------------
 1436|  3.62M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 2.00M, False: 1.62M]
  ------------------
 1437|  2.00M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  2.00M|                {
 1439|  2.00M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  2.00M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
  ------------------
  |  |   53|  2.00M|#define PXSTRIDE(x) (x)
  ------------------
 1441|  2.00M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  6.63M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 4.62M, False: 2.00M]
  ------------------
 1443|  4.62M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  4.62M|                    {
 1445|  4.62M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 131k, False: 4.49M]
  |  Branch (1445:56): [True: 103k, False: 27.5k]
  ------------------
 1446|  4.52M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 16.7k, False: 4.50M]
  ------------------
 1447|   120k|                        {
 1448|   120k|                            goto skip_uv_pred;
 1449|   120k|                        }
 1450|       |
 1451|  4.50M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  4.50M|                        const enum EdgeFlags edge_flags =
 1456|  4.50M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 2.76M, False: 1.74M]
  |  Branch (1456:58): [True: 646k, False: 1.09M]
  ------------------
 1457|  3.41M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 970k, False: 2.43M]
  ------------------
 1458|  3.53M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  4.50M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 2.61M, False: 1.88M]
  ------------------
 1460|  1.88M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 1.17M, False: 714k]
  |  Branch (1460:49): [True: 899k, False: 274k]
  ------------------
 1461|  3.51M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  4.50M|                        const pixel *top_sb_edge = NULL;
 1463|  4.50M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 441k, False: 4.06M]
  ------------------
 1464|   441k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   441k|                            const int sby = t->by >> f->sb_shift;
 1466|   441k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   441k|                        }
 1468|  4.50M|                        const enum IntraPredMode uv_mode =
 1469|  4.50M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 27.5k, False: 4.47M]
  ------------------
 1470|  4.50M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  4.50M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  4.50M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  4.50M|                        const enum IntraPredMode m =
 1474|  4.50M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  4.50M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  4.50M|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1475|  4.50M|                                                              ypos, ypos > ystart,
 1476|  4.50M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  4.50M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  4.50M|                                                              edge_flags, dst, stride,
 1479|  4.50M|                                                              top_sb_edge, uv_mode,
 1480|  4.50M|                                                              &angle, uv_t_dim->w,
 1481|  4.50M|                                                              uv_t_dim->h,
 1482|  4.50M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  4.50M|                                                              edge HIGHBD_CALL_SUFFIX);
 1484|  4.50M|                        angle |= intra_edge_filter_flag;
 1485|  4.50M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  4.50M|                                                 uv_t_dim->w * 4,
 1487|  4.50M|                                                 uv_t_dim->h * 4,
 1488|  4.50M|                                                 angle | sm_uv_fl,
 1489|  4.50M|                                                 (4 * f->bw + ss_hor -
 1490|  4.50M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  4.50M|                                                 (4 * f->bh + ss_ver -
 1492|  4.50M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  4.50M|                                                 HIGHBD_CALL_SUFFIX);
 1494|  4.50M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  4.50M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 4.50M]
  |  |  ------------------
  |  |   35|  4.50M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  4.50M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  4.62M|                    skip_uv_pred: {}
 1505|  4.62M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.04M, False: 3.58M]
  ------------------
 1506|  1.04M|                            enum TxfmType txtp;
 1507|  1.04M|                            int eob;
 1508|  1.04M|                            coef *cf;
 1509|  1.04M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 1.04M, False: 18.4E]
  ------------------
 1510|  1.04M|                                const int p = t->frame_thread.pass & 1;
 1511|  1.04M|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|  1.04M|                                cf = ts->frame_thread[p].cf;
 1513|  1.04M|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|  1.04M|                                eob  = cbi >> 5;
 1515|  1.04M|                                txtp = cbi & 0x1f;
 1516|  18.4E|                            } else {
 1517|  18.4E|                                uint8_t cf_ctx;
 1518|  18.4E|                                cf = bitfn(t->cf);
  ------------------
  |  |   51|  18.4E|#define bitfn(x) x##_8bpc
  ------------------
 1519|  18.4E|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  18.4E|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  18.4E|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  18.4E|                                                   &txtp, &cf_ctx);
 1523|  18.4E|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  18.4E|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  18.4E|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  18.4E|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  18.4E|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  18.4E|                            }
 1532|  1.04M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 423k, False: 619k]
  ------------------
 1533|   423k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   423k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 423k]
  |  |  ------------------
  |  |   35|   423k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   423k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   423k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   423k|                                                  [txtp](dst, stride,
 1538|   423k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
 1539|   423k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   423k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 423k]
  |  |  ------------------
  |  |   35|   423k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   423k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   423k|                            }
 1543|  3.58M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 0, False: 3.58M]
  ------------------
 1544|      0|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|      0|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|      0|                        }
 1547|  4.62M|                        dst += uv_t_dim->w * 4;
 1548|  4.62M|                    }
 1549|  2.00M|                    t->bx -= x << ss_hor;
 1550|  2.00M|                }
 1551|  1.62M|                t->by -= y << ss_ver;
 1552|  1.62M|            }
 1553|   812k|        }
 1554|  1.15M|    }
 1555|  1.10M|}
dav1d_recon_b_inter_8bpc:
 1559|  1.65M|{
 1560|  1.65M|    Dav1dTileState *const ts = t->ts;
 1561|  1.65M|    const Dav1dFrameContext *const f = t->f;
 1562|  1.65M|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|  1.65M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|  1.65M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|  1.65M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|  1.65M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|  1.65M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|  1.65M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|  1.65M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|  1.65M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 926k, False: 727k]
  ------------------
 1571|   926k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 778k, False: 148k]
  |  Branch (1571:45): [True: 73.7k, False: 74.2k]
  ------------------
 1572|   852k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 743k, False: 109k]
  |  Branch (1572:45): [True: 54.8k, False: 55.0k]
  ------------------
 1573|  1.65M|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 727k, False: 926k]
  ------------------
 1574|  1.65M|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|  1.65M|    int res;
 1576|       |
 1577|       |    // prediction
 1578|  1.65M|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|  1.65M|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|  1.65M|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
  ------------------
  |  |   53|  1.65M|#define PXSTRIDE(x) (x)
  ------------------
 1581|  1.65M|    const ptrdiff_t uvdstoff =
 1582|  1.65M|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
  ------------------
  |  |   53|  1.65M|#define PXSTRIDE(x) (x)
  ------------------
 1583|  1.65M|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|  1.65M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  1.65M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 19.2k, False: 1.63M]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|  19.2k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 19.2k, False: 0]
  ------------------
 1586|  19.2k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|  19.2k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|  19.2k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 19.2k]
  ------------------
 1589|  40.2k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 13.4k, False: 5.81k]
  |  Branch (1589:42): [True: 26.8k, False: 13.4k]
  ------------------
 1590|  26.8k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|  26.8k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|  26.8k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|  26.8k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|  26.8k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 26.8k]
  ------------------
 1595|  26.8k|        }
 1596|  1.63M|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 1.53M, False: 101k]
  ------------------
 1597|  1.53M|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|  1.53M|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|  1.53M|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 1.28M, False: 249k]
  ------------------
 1601|  1.28M|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 758k, False: 525k]
  |  Branch (1601:44): [True: 414k, False: 344k]
  ------------------
 1602|   870k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 93.0k, False: 777k]
  |  Branch (1602:44): [True: 88.4k, False: 4.58k]
  ------------------
 1603|   502k|        {
 1604|   502k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|   502k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 88.4k, False: 414k]
  ------------------
 1606|   502k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|   502k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 502k]
  ------------------
 1608|  1.03M|        } else {
 1609|  1.03M|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|  1.03M|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|  1.03M|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 1.03M]
  ------------------
 1612|  1.03M|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 186k, False: 844k]
  ------------------
 1613|   186k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|   186k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 186k]
  ------------------
 1615|   186k|            }
 1616|  1.03M|        }
 1617|  1.53M|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 92.7k, False: 1.44M]
  ------------------
 1618|  92.7k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|  92.7k|#define bitfn(x) x##_8bpc
  ------------------
 1619|  92.7k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 15.2k, False: 77.5k]
  ------------------
 1620|  77.5k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  92.7k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|  92.7k|#define bitfn(x) x##_8bpc
  ------------------
 1622|  92.7k|            int angle = 0;
 1623|  92.7k|            const pixel *top_sb_edge = NULL;
 1624|  92.7k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 12.1k, False: 80.6k]
  ------------------
 1625|  12.1k|                top_sb_edge = f->ipred_edge[0];
 1626|  12.1k|                const int sby = t->by >> f->sb_shift;
 1627|  12.1k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  12.1k|            }
 1629|  92.7k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  92.7k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  92.7k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1630|  92.7k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  92.7k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  92.7k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  92.7k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  92.7k|                                                  HIGHBD_CALL_SUFFIX);
 1635|  92.7k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  92.7k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  92.7k|                                     HIGHBD_CALL_SUFFIX);
 1638|  92.7k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  92.7k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  92.7k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  92.7k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 65.4k, False: 27.3k]
  |  |  ------------------
  |  |   85|  92.7k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  92.7k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  92.7k|        }
 1641|       |
 1642|  1.53M|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 846k, False: 688k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   688k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 56.1k, False: 631k]
  |  Branch (1645:42): [True: 39.9k, False: 591k]
  ------------------
 1646|   688k|        refmvs_block *const *r;
 1647|   688k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 97.1k, False: 590k]
  ------------------
 1648|  97.1k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 97.1k, False: 1]
  ------------------
 1649|  97.1k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  97.1k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 57.2k, False: 39.9k]
  ------------------
 1651|  97.1k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 52.8k, False: 44.3k]
  ------------------
 1652|  97.1k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 57.2k, False: 39.9k]
  |  Branch (1652:29): [True: 12.8k, False: 44.3k]
  ------------------
 1653|  12.8k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  97.1k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   688k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 84.9k, False: 603k]
  ------------------
 1658|  84.9k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 84.9k, False: 18.4E]
  ------------------
 1659|  84.9k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  84.9k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 49.9k, False: 34.9k]
  |  Branch (1660:29): [True: 10.1k, False: 39.8k]
  ------------------
 1661|  30.3k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 20.2k, False: 10.1k]
  ------------------
 1662|  20.2k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  20.2k|                             NULL, f->cur.stride[1],
 1664|  20.2k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  20.2k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  20.2k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  20.2k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  20.2k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 0, False: 20.2k]
  ------------------
 1669|  20.2k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  20.2k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 20.2k]
  ------------------
 1671|  20.2k|                }
 1672|  10.1k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  10.1k|#define PXSTRIDE(x) (x)
  ------------------
 1673|  10.1k|                h_off = 2;
 1674|  10.1k|            }
 1675|  84.9k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 49.9k, False: 34.9k]
  ------------------
 1676|  49.9k|                const enum Filter2d left_filter_2d =
 1677|  49.9k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|   149k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 99.9k, False: 49.9k]
  ------------------
 1679|  99.9k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  99.9k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  99.9k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  99.9k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  99.9k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  99.9k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 0, False: 99.9k]
  ------------------
 1685|  99.9k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  99.9k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 99.9k]
  ------------------
 1687|  99.9k|                }
 1688|  49.9k|                h_off = 2;
 1689|  49.9k|            }
 1690|  84.9k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 45.0k, False: 39.8k]
  ------------------
 1691|  45.0k|                const enum Filter2d top_filter_2d =
 1692|  45.0k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|   135k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 90.1k, False: 45.0k]
  ------------------
 1694|  90.1k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  90.1k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  90.1k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  90.1k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  90.1k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  90.1k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 0, False: 90.1k]
  ------------------
 1700|  90.1k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  90.1k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 90.1k]
  ------------------
 1702|  90.1k|                }
 1703|  45.0k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  45.0k|#define PXSTRIDE(x) (x)
  ------------------
 1704|  45.0k|            }
 1705|   254k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 169k, False: 84.9k]
  ------------------
 1706|   169k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|   169k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|   169k|                         refp, b->ref[0], filter_2d);
 1709|   169k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 169k]
  ------------------
 1710|   169k|            }
 1711|   603k|        } else {
 1712|   603k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 293k, False: 309k]
  ------------------
 1713|   293k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 33.9k, False: 259k]
  |  Branch (1713:48): [True: 9.75k, False: 24.2k]
  ------------------
 1714|   284k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 42.9k, False: 241k]
  |  Branch (1714:48): [True: 41.9k, False: 960]
  ------------------
 1715|  51.7k|            {
 1716|   155k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 103k, False: 51.7k]
  ------------------
 1717|   103k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|   103k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|   103k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 83.8k, False: 19.4k]
  ------------------
 1720|   103k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|   103k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 103k]
  ------------------
 1722|   103k|                }
 1723|   551k|            } else {
 1724|  1.65M|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 1.10M, False: 551k]
  ------------------
 1725|  1.10M|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|  1.10M|                             NULL, f->cur.stride[1],
 1727|  1.10M|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|  1.10M|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|  1.10M|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|  1.10M|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 1.10M]
  ------------------
 1731|  1.10M|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 357k, False: 746k]
  ------------------
 1732|   357k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|   357k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|   357k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 357k]
  ------------------
 1735|   357k|                    }
 1736|  1.10M|                }
 1737|   551k|            }
 1738|   603k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 91.2k, False: 511k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  91.2k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  91.2k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  91.2k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 64.3k, False: 26.8k]
  |  |  ------------------
  |  |   85|  91.2k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  91.2k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|   273k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 182k, False: 91.2k]
  ------------------
 1745|   182k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   51|   182k|#define bitfn(x) x##_8bpc
  ------------------
 1746|   182k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   51|   182k|#define bitfn(x) x##_8bpc
  ------------------
 1747|   182k|                    enum IntraPredMode m =
 1748|   182k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 29.7k, False: 152k]
  ------------------
 1749|   152k|                        SMOOTH_PRED : b->interintra_mode;
 1750|   182k|                    int angle = 0;
 1751|   182k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|   182k|                    const pixel *top_sb_edge = NULL;
 1753|   182k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 24.0k, False: 158k]
  ------------------
 1754|  24.0k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  24.0k|                        const int sby = t->by >> f->sb_shift;
 1756|  24.0k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  24.0k|                    }
 1758|   182k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|   182k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   182k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 1759|   182k|                                                          (t->bx >> ss_hor) >
 1760|   182k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|   182k|                                                          t->by >> ss_ver,
 1762|   182k|                                                          (t->by >> ss_ver) >
 1763|   182k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|   182k|                                                          ts->tiling.col_end >> ss_hor,
 1765|   182k|                                                          ts->tiling.row_end >> ss_ver,
 1766|   182k|                                                          0, uvdst, f->cur.stride[1],
 1767|   182k|                                                          top_sb_edge, m,
 1768|   182k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|   182k|                                                          HIGHBD_CALL_SUFFIX);
 1770|   182k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|   182k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|   182k|                                             HIGHBD_CALL_SUFFIX);
 1773|   182k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|   182k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|   182k|                }
 1776|  91.2k|            }
 1777|   603k|        }
 1778|       |
 1779|  1.53M|    skip_inter_chroma_pred: {}
 1780|  1.53M|        t->tl_4x4_filter = filter_2d;
 1781|  1.53M|    } else {
 1782|   101k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|   101k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|   101k|        int jnt_weight;
 1786|   101k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|   101k|        const uint8_t *mask;
 1788|       |
 1789|   303k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 202k, False: 101k]
  ------------------
 1790|   202k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   202k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 9.90k, False: 193k]
  |  Branch (1792:55): [True: 6.30k, False: 3.60k]
  ------------------
 1793|  6.30k|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|  6.30k|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|  6.30k|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 6.30k]
  ------------------
 1796|   196k|            } else {
 1797|   196k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   196k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   196k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 196k]
  ------------------
 1800|   196k|            }
 1801|   202k|        }
 1802|   101k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 101k, False: 18.4E]
  ------------------
 1803|  63.7k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 63.7k, False: 37.2k]
  ------------------
 1804|  63.7k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  63.7k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
 1806|  63.7k|            break;
 1807|  14.1k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 14.1k, False: 86.8k]
  ------------------
 1808|  14.1k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  14.1k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  14.1k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
 1811|  14.1k|            break;
 1812|  14.1k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 14.1k, False: 86.9k]
  ------------------
 1813|  14.1k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  14.1k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  14.1k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  14.1k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
 1817|  14.1k|            mask = seg_mask;
 1818|  14.1k|            break;
 1819|  9.54k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 9.54k, False: 91.4k]
  ------------------
 1820|  9.54k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  9.54k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  9.54k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  9.54k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  9.54k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  9.54k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
 1824|  9.54k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 9.07k, False: 465]
  ------------------
 1825|  9.07k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  9.07k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  9.07k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  9.54k|            break;
 1827|   101k|        }
 1828|       |
 1829|       |        // chroma
 1830|   289k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 96.5k, False: 4.78k]
  |  Branch (1830:42): [True: 192k, False: 96.4k]
  ------------------
 1831|   578k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 385k, False: 192k]
  ------------------
 1832|   385k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   385k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 17.9k, False: 367k]
  ------------------
 1834|  17.9k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 7.87k, False: 10.0k]
  |  Branch (1834:45): [True: 4.75k, False: 3.11k]
  ------------------
 1835|  4.75k|                {
 1836|  4.75k|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|  4.75k|                                      b_dim, 1 + pl,
 1838|  4.75k|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|  4.75k|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 4.75k]
  ------------------
 1840|   380k|                } else {
 1841|   380k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|   380k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|   380k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 380k]
  ------------------
 1844|   380k|                }
 1845|   385k|            }
 1846|   192k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|   192k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 193k, False: 18.4E]
  ------------------
 1848|   122k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 122k, False: 70.7k]
  ------------------
 1849|   122k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|   122k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|   122k|                            HIGHBD_CALL_SUFFIX);
 1852|   122k|                break;
 1853|  27.1k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 27.1k, False: 165k]
  ------------------
 1854|  27.1k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  27.1k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  27.1k|                              HIGHBD_CALL_SUFFIX);
 1857|  27.1k|                break;
 1858|  18.1k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 18.1k, False: 174k]
  ------------------
 1859|  44.2k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 26.1k, False: 166k]
  ------------------
 1860|  44.2k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  44.2k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  44.2k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  44.2k|                             HIGHBD_CALL_SUFFIX);
 1864|  44.2k|                break;
 1865|   192k|            }
 1866|   192k|        }
 1867|   101k|    }
 1868|       |
 1869|  1.65M|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  1.65M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.65M]
  |  |  ------------------
  |  |   35|  1.65M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.65M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|  1.65M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|  1.65M|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 1.17M, False: 484k]
  ------------------
 1882|       |        // reset coef contexts
 1883|  1.17M|        BlockContext *const a = t->a;
 1884|  1.17M|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|  1.17M|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|  1.17M|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 395k, False: 776k]
  ------------------
 1887|   395k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   395k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   395k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   395k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   395k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   395k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   395k|        }
 1894|  1.17M|        return 0;
 1895|  1.17M|    }
 1896|       |
 1897|   484k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   484k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   484k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   971k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 487k, False: 484k]
  ------------------
 1902|   976k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 489k, False: 487k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   489k|            int y_off = !!init_y, y;
 1905|   489k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
  ------------------
  |  |   53|   489k|#define PXSTRIDE(x) (x)
  ------------------
 1906|   983k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 493k, False: 489k]
  ------------------
 1907|   493k|                 y += ytx->h, y_off++)
 1908|   493k|            {
 1909|   493k|                int x, x_off = !!init_x;
 1910|   994k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 500k, False: 493k]
  ------------------
 1911|   500k|                     x += ytx->w, x_off++)
 1912|   500k|                {
 1913|   500k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   500k|                                   x_off, y_off, &dst[x * 4]);
 1915|   500k|                    t->bx += ytx->w;
 1916|   500k|                }
 1917|   493k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
  ------------------
  |  |   53|   493k|#define PXSTRIDE(x) (x)
  ------------------
 1918|   493k|                t->bx -= x;
 1919|   493k|                t->by += ytx->h;
 1920|   493k|            }
 1921|   489k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
  ------------------
  |  |   53|   489k|#define PXSTRIDE(x) (x)
  ------------------
 1922|   489k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|  1.22M|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 407k, False: 82.1k]
  |  Branch (1925:46): [True: 814k, False: 407k]
  ------------------
 1926|   814k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   814k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
  ------------------
  |  |   53|   814k|#define PXSTRIDE(x) (x)
  ------------------
 1928|   814k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|  1.63M|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 818k, False: 814k]
  ------------------
 1930|   818k|                {
 1931|   818k|                    int x;
 1932|   818k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|  1.63M|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 820k, False: 818k]
  ------------------
 1934|   820k|                    {
 1935|   820k|                        coef *cf;
 1936|   820k|                        int eob;
 1937|   820k|                        enum TxfmType txtp;
 1938|   820k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 820k, False: 18.4E]
  ------------------
 1939|   820k|                            const int p = t->frame_thread.pass & 1;
 1940|   820k|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|   820k|                            cf = ts->frame_thread[p].cf;
 1942|   820k|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|   820k|                            eob  = cbi >> 5;
 1944|   820k|                            txtp = cbi & 0x1f;
 1945|  18.4E|                        } else {
 1946|  18.4E|                            uint8_t cf_ctx;
 1947|  18.4E|                            cf = bitfn(t->cf);
  ------------------
  |  |   51|  18.4E|#define bitfn(x) x##_8bpc
  ------------------
 1948|  18.4E|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|  18.4E|                                                        bx4 + (x << ss_hor)];
 1950|  18.4E|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|  18.4E|                                               &t->l.ccoef[pl][cby4 + y],
 1952|  18.4E|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|  18.4E|                                               cf, &txtp, &cf_ctx);
 1954|  18.4E|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|  18.4E|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|  18.4E|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|  18.4E|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|  18.4E|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|  18.4E|                        }
 1963|   820k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 273k, False: 547k]
  ------------------
 1964|   273k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   273k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 273k]
  |  |  ------------------
  |  |   35|   273k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   273k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   273k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   273k|                                              [txtp](&uvdst[4 * x],
 1968|   273k|                                                     f->cur.stride[1],
 1969|   273k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
 1970|   273k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   273k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 273k]
  |  |  ------------------
  |  |   35|   273k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   273k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   273k|                        }
 1974|   820k|                        t->bx += uvtx->w << ss_hor;
 1975|   820k|                    }
 1976|   818k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
  ------------------
  |  |   53|   818k|#define PXSTRIDE(x) (x)
  ------------------
 1977|   818k|                    t->bx -= x << ss_hor;
 1978|   818k|                    t->by += uvtx->h << ss_ver;
 1979|   818k|                }
 1980|   814k|                t->by -= y << ss_ver;
 1981|   814k|            }
 1982|   489k|        }
 1983|   487k|    }
 1984|   484k|    return 0;
 1985|  1.65M|}
dav1d_filter_sbrow_deblock_cols_8bpc:
 1987|   380k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|   380k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 1, False: 380k]
  ------------------
 1989|   380k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 3.08k, False: 377k]
  |  Branch (1989:50): [True: 0, False: 3.08k]
  ------------------
 1990|      0|    {
 1991|      0|        return;
 1992|      0|    }
 1993|   380k|    const int y = sby * f->sb_step * 4;
 1994|   380k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|   380k|    pixel *const p[3] = {
 1996|   380k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|   380k|#define PXSTRIDE(x) (x)
  ------------------
 1997|   380k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|   380k|#define PXSTRIDE(x) (x)
  ------------------
 1998|   380k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|   380k|#define PXSTRIDE(x) (x)
  ------------------
 1999|   380k|    };
 2000|   380k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|   380k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|   380k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   380k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2002|   380k|                                        f->lf.start_of_tile_row[sby]);
 2003|   380k|}
dav1d_filter_sbrow_deblock_rows_8bpc:
 2005|   443k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|   443k|    const int y = sby * f->sb_step * 4;
 2007|   443k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|   443k|    pixel *const p[3] = {
 2009|   443k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|   443k|#define PXSTRIDE(x) (x)
  ------------------
 2010|   443k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|   443k|#define PXSTRIDE(x) (x)
  ------------------
 2011|   443k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|   443k|#define PXSTRIDE(x) (x)
  ------------------
 2012|   443k|    };
 2013|   443k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|   443k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 443k, False: 1]
  ------------------
 2015|   443k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 377k, False: 65.9k]
  |  Branch (2015:49): [True: 3.07k, False: 62.9k]
  ------------------
 2016|   380k|    {
 2017|   380k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|   380k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   380k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2018|   380k|    }
 2019|   443k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 90.3k, False: 353k]
  |  Branch (2019:29): [True: 122k, False: 231k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|   212k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|   212k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   212k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2022|   212k|    }
 2023|   443k|}
dav1d_filter_sbrow_cdef_8bpc:
 2025|  90.3k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|  90.3k|    const Dav1dFrameContext *const f = tc->f;
 2027|  90.3k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 90.3k]
  ------------------
 2028|  90.3k|    const int sbsz = f->sb_step;
 2029|  90.3k|    const int y = sby * sbsz * 4;
 2030|  90.3k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|  90.3k|    pixel *const p[3] = {
 2032|  90.3k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  90.3k|#define PXSTRIDE(x) (x)
  ------------------
 2033|  90.3k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  90.3k|#define PXSTRIDE(x) (x)
  ------------------
 2034|  90.3k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  90.3k|#define PXSTRIDE(x) (x)
  ------------------
 2035|  90.3k|    };
 2036|  90.3k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|  90.3k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|  90.3k|    const int start = sby * sbsz;
 2039|  90.3k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 80.4k, False: 9.91k]
  ------------------
 2040|  80.4k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|  80.4k|        pixel *p_up[3] = {
 2042|  80.4k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  80.4k|#define PXSTRIDE(x) (x)
  ------------------
 2043|  80.4k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  80.4k|#define PXSTRIDE(x) (x)
  ------------------
 2044|  80.4k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  80.4k|#define PXSTRIDE(x) (x)
  ------------------
 2045|  80.4k|        };
 2046|  80.4k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|  80.4k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  80.4k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2047|  80.4k|    }
 2048|  90.3k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|  90.3k|    const int end = imin(start + n_blks, f->bh);
 2050|  90.3k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|  90.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  90.3k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2051|  90.3k|}
dav1d_filter_sbrow_resize_8bpc:
 2053|  7.54k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  7.54k|    const int sbsz = f->sb_step;
 2055|  7.54k|    const int y = sby * sbsz * 4;
 2056|  7.54k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  7.54k|    const pixel *const p[3] = {
 2058|  7.54k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2059|  7.54k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2060|  7.54k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2061|  7.54k|    };
 2062|  7.54k|    pixel *const sr_p[3] = {
 2063|  7.54k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2064|  7.54k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2065|  7.54k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|  7.54k|#define PXSTRIDE(x) (x)
  ------------------
 2066|  7.54k|    };
 2067|  7.54k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  29.8k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 22.2k, False: 7.54k]
  ------------------
 2069|  22.2k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 14.7k, False: 7.53k]
  |  Branch (2069:34): [True: 8.12k, False: 6.61k]
  ------------------
 2070|  22.2k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  22.2k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  22.2k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
  ------------------
  |  |   53|  22.2k|#define PXSTRIDE(x) (x)
  ------------------
 2073|  22.2k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  22.2k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
  ------------------
  |  |   53|  22.2k|#define PXSTRIDE(x) (x)
  ------------------
 2075|  22.2k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  22.2k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 14.7k, False: 7.53k]
  |  Branch (2076:34): [True: 10.7k, False: 3.99k]
  ------------------
 2077|  22.2k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  22.2k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  22.2k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  22.2k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  22.2k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  22.2k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  22.2k|                          HIGHBD_CALL_SUFFIX);
 2085|  22.2k|    }
 2086|  7.54k|}
dav1d_filter_sbrow_lr_8bpc:
 2088|   130k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|   130k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 130k]
  ------------------
 2090|   130k|    const int y = sby * f->sb_step * 4;
 2091|   130k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|   130k|    pixel *const sr_p[3] = {
 2093|   130k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
  ------------------
  |  |   53|   130k|#define PXSTRIDE(x) (x)
  ------------------
 2094|   130k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
  ------------------
  |  |   53|   130k|#define PXSTRIDE(x) (x)
  ------------------
 2095|   130k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
  ------------------
  |  |   53|   130k|#define PXSTRIDE(x) (x)
  ------------------
 2096|   130k|    };
 2097|   130k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|   130k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   130k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2098|   130k|}
dav1d_backup_ipred_edge_8bpc:
 2111|   727k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|   727k|    const Dav1dFrameContext *const f = t->f;
 2113|   727k|    Dav1dTileState *const ts = t->ts;
 2114|   727k|    const int sby = t->by >> f->sb_shift;
 2115|   727k|    const int sby_off = f->sb128w * 128 * sby;
 2116|   727k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|   727k|    const pixel *const y =
 2119|   727k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|   727k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   727k|#define PXSTRIDE(x) (x)
  ------------------
 2121|   727k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   47|   727k|#define pixel_copy memcpy
  ------------------
 2122|   727k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|   727k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 77.6k, False: 649k]
  ------------------
 2125|  77.6k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|  77.6k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|  77.6k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|  77.6k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
  ------------------
  |  |   53|  77.6k|#define PXSTRIDE(x) (x)
  ------------------
 2130|   232k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 155k, False: 77.6k]
  ------------------
 2131|   155k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   47|   155k|#define pixel_copy memcpy
  ------------------
 2132|   155k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|   155k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|  77.6k|    }
 2135|   727k|}
dav1d_copy_pal_block_y_8bpc:
 2141|  82.4k|{
 2142|  82.4k|    const Dav1dFrameContext *const f = t->f;
 2143|  82.4k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 82.4k, False: 2]
  ------------------
 2144|  82.4k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|  82.4k|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  82.4k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|      2|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  82.4k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2147|   421k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 339k, False: 82.4k]
  ------------------
 2148|   339k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   339k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   339k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2149|   366k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 284k, False: 82.4k]
  ------------------
 2150|   284k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   284k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   284k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2151|  82.4k|}
dav1d_copy_pal_block_uv_8bpc:
 2157|  16.5k|{
 2158|  16.5k|    const Dav1dFrameContext *const f = t->f;
 2159|  16.5k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 16.5k, False: 0]
  ------------------
 2160|  16.5k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|  16.5k|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  16.5k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|      0|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|      0|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  49.6k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 33.0k, False: 16.5k]
  ------------------
 2165|   207k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 174k, False: 33.0k]
  ------------------
 2166|   174k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|   174k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   174k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2167|   210k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 177k, False: 33.0k]
  ------------------
 2168|   177k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|   177k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|   177k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2169|  33.0k|    }
 2170|  16.5k|}
dav1d_read_pal_plane_8bpc:
 2175|  98.9k|{
 2176|  98.9k|    Dav1dTileState *const ts = t->ts;
 2177|  98.9k|    const Dav1dFrameContext *const f = t->f;
 2178|  98.9k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  98.9k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  98.9k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  98.9k|    pixel cache[16], used_cache[8];
 2181|  98.9k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 16.5k, False: 82.4k]
  ------------------
 2182|  98.9k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  98.9k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 79.1k, False: 19.8k]
  |  Branch (2184:30): [True: 10.3k, False: 68.8k]
  ------------------
 2185|  98.9k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  98.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  98.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2186|  98.9k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  98.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  98.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|   207k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 139k, False: 67.9k]
  |  Branch (2189:23): [True: 108k, False: 31.0k]
  ------------------
 2190|   108k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 36.9k, False: 71.6k]
  ------------------
 2191|  36.9k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 7.10k, False: 29.8k]
  |  Branch (2191:29): [True: 29.7k, False: 144]
  ------------------
 2192|  36.8k|                cache[n_cache++] = *l;
 2193|  36.9k|            l++;
 2194|  36.9k|            l_cache--;
 2195|  71.6k|        } else {
 2196|  71.6k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 32.5k, False: 39.0k]
  ------------------
 2197|  32.5k|                l++;
 2198|  32.5k|                l_cache--;
 2199|  32.5k|            }
 2200|  71.6k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 11.4k, False: 60.1k]
  |  Branch (2200:29): [True: 58.2k, False: 1.84k]
  ------------------
 2201|  69.7k|                cache[n_cache++] = *a;
 2202|  71.6k|            a++;
 2203|  71.6k|            a_cache--;
 2204|  71.6k|        }
 2205|   108k|    }
 2206|  98.9k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 31.0k, False: 67.9k]
  ------------------
 2207|   125k|        do {
 2208|   125k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 25.4k, False: 100k]
  |  Branch (2208:29): [True: 86.7k, False: 13.4k]
  ------------------
 2209|   112k|                cache[n_cache++] = *l;
 2210|   125k|            l++;
 2211|   125k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 94.5k, False: 31.0k]
  ------------------
 2212|  67.9k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 29.6k, False: 38.2k]
  ------------------
 2213|   122k|        do {
 2214|   122k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 19.0k, False: 103k]
  |  Branch (2214:29): [True: 66.2k, False: 36.9k]
  ------------------
 2215|  85.3k|                cache[n_cache++] = *a;
 2216|   122k|            a++;
 2217|   122k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 92.6k, False: 29.6k]
  ------------------
 2218|  29.6k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  98.9k|    int i = 0;
 2222|   361k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 274k, False: 87.4k]
  |  Branch (2222:36): [True: 262k, False: 11.5k]
  ------------------
 2223|   262k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|   262k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 132k, False: 130k]
  ------------------
 2224|   132k|            used_cache[i++] = cache[n];
 2225|  98.9k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  98.9k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 98.9k, False: 4]
  ------------------
 2229|  98.9k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|  98.9k|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  98.9k|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|      4|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  98.9k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2232|  98.9k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 85.2k, False: 13.7k]
  ------------------
 2233|  85.2k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [True: 85.2k, Folded]
  ------------------
 2234|  85.2k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  85.2k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 72.3k, False: 12.9k]
  ------------------
 2237|  72.3k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  72.3k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|   153k|            do {
 2241|   153k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|   153k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|   153k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 39.4k, False: 114k]
  ------------------
 2244|   105k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 66.0k, False: 39.4k]
  ------------------
 2245|  66.0k|                        pal[i] = max;
 2246|  39.4k|                    break;
 2247|  39.4k|                }
 2248|   114k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|   114k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 81.4k, False: 32.9k]
  ------------------
 2250|  72.3k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  85.2k|        int n = 0, m = n_used_cache;
 2254|   483k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 398k, False: 85.2k]
  ------------------
 2255|   398k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 168k, False: 229k]
  |  Branch (2255:38): [True: 37.1k, False: 131k]
  |  Branch (2255:53): [True: 56.0k, False: 75.3k]
  ------------------
 2256|  93.1k|                pal[i] = used_cache[n++];
 2257|   305k|            } else {
 2258|   305k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 305k, False: 18.4E]
  ------------------
 2259|   305k|                pal[i] = pal[m++];
 2260|   305k|            }
 2261|   398k|        }
 2262|  85.2k|    } else {
 2263|  13.7k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  13.7k|    }
 2265|       |
 2266|  99.0k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  99.0k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 99.0k]
  |  |  ------------------
  |  |   35|  99.0k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  99.0k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  99.0k|}
dav1d_read_pal_uv_8bpc:
 2280|  16.5k|{
 2281|  16.5k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  16.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  16.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  16.5k|    Dav1dTileState *const ts = t->ts;
 2285|  16.5k|    const Dav1dFrameContext *const f = t->f;
 2286|  16.5k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 16.5k, False: 18.4E]
  ------------------
 2287|  16.5k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|  16.5k|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  18.4E|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  18.4E|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   51|  16.5k|#define bitfn(x) x##_8bpc
  |  |  ------------------
  ------------------
 2290|  18.4E|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [True: 16.5k, Folded]
  ------------------
 2291|  16.5k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  16.5k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 8.55k, False: 7.98k]
  ------------------
 2292|  8.55k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  8.55k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  8.55k|        const int max = (1 << bpc) - 1;
 2295|  34.8k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 26.3k, False: 8.55k]
  ------------------
 2296|  26.3k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  26.3k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  25.6k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 25.6k, False: 715]
  |  Branch (2297:26): [True: 14.7k, False: 10.8k]
  ------------------
 2298|  26.3k|            prev = pal[i] = (prev + delta) & max;
 2299|  26.3k|        }
 2300|  8.55k|    } else {
 2301|  44.1k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 36.1k, False: 7.98k]
  ------------------
 2302|  36.1k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  7.98k|    }
 2304|  16.5k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  16.5k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 16.5k]
  |  |  ------------------
  |  |   35|  16.5k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  16.5k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  16.5k|}
recon_tmpl.c:read_coef_tree:
  736|  5.04M|{
  737|  5.04M|    const Dav1dFrameContext *const f = t->f;
  738|  5.04M|    Dav1dTileState *const ts = t->ts;
  739|  5.04M|    const Dav1dDSPContext *const dsp = f->dsp;
  740|  5.04M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[ytx];
  741|  5.04M|    const int txw = t_dim->w, txh = t_dim->h;
  742|       |
  743|       |    /* y_off can be larger than 3 since lossless blocks use TX_4X4 but can't
  744|       |     * be splitted. Aviods an undefined left shift. */
  745|  5.04M|    if (depth < 2 && tx_split[depth] &&
  ------------------
  |  Branch (745:9): [True: 4.58M, False: 459k]
  |  Branch (745:22): [True: 554k, False: 4.03M]
  ------------------
  746|   554k|        tx_split[depth] & (1 << (y_off * 4 + x_off)))
  ------------------
  |  Branch (746:9): [True: 452k, False: 102k]
  ------------------
  747|   452k|    {
  748|   452k|        const enum RectTxfmSize sub = t_dim->sub;
  749|   452k|        const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
  750|   452k|        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
  751|       |
  752|   452k|        read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  753|   452k|                       x_off * 2 + 0, y_off * 2 + 0, dst);
  754|   452k|        t->bx += txsw;
  755|   452k|        if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (755:13): [True: 353k, False: 98.7k]
  |  Branch (755:27): [True: 351k, False: 1.60k]
  ------------------
  756|   351k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  757|   351k|                           y_off * 2 + 0, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (757:43): [True: 140k, False: 211k]
  ------------------
  758|   452k|        t->bx -= txsw;
  759|   452k|        t->by += txsh;
  760|   452k|        if (txh >= txw && t->by < f->bh) {
  ------------------
  |  Branch (760:13): [True: 326k, False: 126k]
  |  Branch (760:27): [True: 324k, False: 1.98k]
  ------------------
  761|   324k|            if (dst)
  ------------------
  |  Branch (761:17): [True: 130k, False: 194k]
  ------------------
  762|   130k|                dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
  ------------------
  |  |   53|   130k|#define PXSTRIDE(x) (x)
  ------------------
  763|   324k|            read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
  764|   324k|                           x_off * 2 + 0, y_off * 2 + 1, dst);
  765|   324k|            t->bx += txsw;
  766|   324k|            if (txw >= txh && t->bx < f->bw)
  ------------------
  |  Branch (766:17): [True: 225k, False: 98.3k]
  |  Branch (766:31): [True: 224k, False: 1.44k]
  ------------------
  767|   224k|                read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
  768|   224k|                               y_off * 2 + 1, dst ? &dst[4 * txsw] : NULL);
  ------------------
  |  Branch (768:47): [True: 87.5k, False: 136k]
  ------------------
  769|   324k|            t->bx -= txsw;
  770|   324k|        }
  771|   452k|        t->by -= txsh;
  772|  4.59M|    } else {
  773|  4.59M|        const int bx4 = t->bx & 31, by4 = t->by & 31;
  774|  4.59M|        enum TxfmType txtp;
  775|  4.59M|        uint8_t cf_ctx;
  776|  4.59M|        int eob;
  777|  4.59M|        coef *cf;
  778|       |
  779|  4.59M|        if (t->frame_thread.pass) {
  ------------------
  |  Branch (779:13): [True: 4.59M, False: 18.4E]
  ------------------
  780|  4.59M|            const int p = t->frame_thread.pass & 1;
  781|  4.59M|            assert(ts->frame_thread[p].cf);
  ------------------
  |  Branch (781:13): [True: 4.59M, False: 18.4E]
  ------------------
  782|  4.59M|            cf = ts->frame_thread[p].cf;
  783|  4.59M|            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
  784|  18.4E|        } else {
  785|  18.4E|            cf = bitfn(t->cf);
  ------------------
  |  |   51|  18.4E|#define bitfn(x) x##_8bpc
  ------------------
  786|  18.4E|        }
  787|  4.59M|        if (t->frame_thread.pass != 2) {
  ------------------
  |  Branch (787:13): [True: 3.43M, False: 1.16M]
  ------------------
  788|  3.43M|            eob = decode_coefs(t, &t->a->lcoef[bx4], &t->l.lcoef[by4],
  789|  3.43M|                               ytx, bs, b, 0, 0, cf, &txtp, &cf_ctx);
  790|  3.43M|            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  3.43M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 3.43M]
  |  |  ------------------
  |  |   35|  3.43M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  3.43M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  791|      0|                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
  792|      0|                       ytx, txtp, eob, ts->msac.rng);
  793|  3.43M|            dav1d_memset_likely_pow2(&t->a->lcoef[bx4], cf_ctx, imin(txw, f->bw - t->bx));
  794|  3.43M|            dav1d_memset_likely_pow2(&t->l.lcoef[by4], cf_ctx, imin(txh, f->bh - t->by));
  795|  3.43M|#define set_ctx(rep_macro) \
  796|  3.43M|            for (int y = 0; y < txh; y++) { \
  797|  3.43M|                rep_macro(txtp_map, 0, txtp); \
  798|  3.43M|                txtp_map += 32; \
  799|  3.43M|            }
  800|  3.43M|            uint8_t *txtp_map = &t->scratch.txtp_map[by4 * 32 + bx4];
  801|  3.43M|            case_set_upto16(t_dim->lw);
  ------------------
  |  |   80|  3.43M|    switch (var) { \
  |  |   81|  1.87M|    case 0: set_ctx(set_ctx1); break; \
  |  |  ------------------
  |  |  |  |  796|  4.02M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 2.14M, False: 1.87M]
  |  |  |  |  ------------------
  |  |  |  |  797|  2.14M|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  2.14M|    case 0: set_ctx(set_ctx1); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   56|  2.14M|    ((union alias8 *) &(var)[off])->u8 = (val) * 0x01
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|  2.14M|                txtp_map += 32; \
  |  |  |  |  799|  2.14M|            }
  |  |  ------------------
  |  |  |  Branch (81:5): [True: 1.87M, False: 1.55M]
  |  |  ------------------
  |  |   82|   742k|    case 1: set_ctx(set_ctx2); break; \
  |  |  ------------------
  |  |  |  |  796|  2.45M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 1.71M, False: 742k]
  |  |  |  |  ------------------
  |  |  |  |  797|  1.71M|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   82|  1.71M|    case 1: set_ctx(set_ctx2); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   58|  1.71M|    ((union alias16 *) &(var)[off])->u16 = (val) * 0x0101
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|  1.71M|                txtp_map += 32; \
  |  |  |  |  799|  1.71M|            }
  |  |  ------------------
  |  |  |  Branch (82:5): [True: 742k, False: 2.68M]
  |  |  ------------------
  |  |   83|   544k|    case 2: set_ctx(set_ctx4); break; \
  |  |  ------------------
  |  |  |  |  796|  2.37M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 1.82M, False: 544k]
  |  |  |  |  ------------------
  |  |  |  |  797|  1.82M|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   83|  1.82M|    case 2: set_ctx(set_ctx4); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   60|  1.82M|    ((union alias32 *) &(var)[off])->u32 = (val) * 0x01010101U
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|  1.82M|                txtp_map += 32; \
  |  |  |  |  799|  1.82M|            }
  |  |  ------------------
  |  |  |  Branch (83:5): [True: 544k, False: 2.88M]
  |  |  ------------------
  |  |   84|   157k|    case 3: set_ctx(set_ctx8); break; \
  |  |  ------------------
  |  |  |  |  796|  1.09M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 938k, False: 157k]
  |  |  |  |  ------------------
  |  |  |  |  797|   938k|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|   938k|    case 3: set_ctx(set_ctx8); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   62|   938k|    ((union alias64 *) &(var)[off])->u64 = (val) * 0x0101010101010101ULL
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|   938k|                txtp_map += 32; \
  |  |  |  |  799|   938k|            }
  |  |  ------------------
  |  |  |  Branch (84:5): [True: 157k, False: 3.27M]
  |  |  ------------------
  |  |   85|   117k|    case 4: set_ctx(set_ctx16); break; \
  |  |  ------------------
  |  |  |  |  796|  1.77M|            for (int y = 0; y < txh; y++) { \
  |  |  |  |  ------------------
  |  |  |  |  |  Branch (796:29): [True: 1.65M, False: 117k]
  |  |  |  |  ------------------
  |  |  |  |  797|  1.65M|                rep_macro(txtp_map, 0, txtp); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  1.65M|    case 4: set_ctx(set_ctx16); break; \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   63|  1.65M|#define set_ctx16(var, off, val) do { \
  |  |  |  |  |  |  |  |   64|  1.65M|        memset(&(var)[off], val, 16); \
  |  |  |  |  |  |  |  |   65|  1.65M|    } while (0)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (65:14): [Folded, False: 1.65M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  798|  1.65M|                txtp_map += 32; \
  |  |  |  |  799|  1.65M|            }
  |  |  ------------------
  |  |  |  Branch (85:5): [True: 117k, False: 3.31M]
  |  |  ------------------
  |  |   86|      0|    default: assert(0); \
  |  |  ------------------
  |  |  |  Branch (86:5): [True: 0, False: 3.43M]
  |  |  ------------------
  |  |   87|  3.43M|    }
  ------------------
  |  Branch (801:13): [Folded, False: 0]
  ------------------
  802|  3.43M|#undef set_ctx
  803|  3.43M|            if (t->frame_thread.pass == 1)
  ------------------
  |  Branch (803:17): [True: 3.43M, False: 30]
  ------------------
  804|  3.43M|                *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  805|  3.43M|        } else {
  806|  1.16M|            const int cbi = *ts->frame_thread[0].cbi++;
  807|  1.16M|            eob  = cbi >> 5;
  808|  1.16M|            txtp = cbi & 0x1f;
  809|  1.16M|        }
  810|  4.60M|        if (!(t->frame_thread.pass & 1)) {
  ------------------
  |  Branch (810:13): [True: 1.16M, False: 3.43M]
  ------------------
  811|  1.16M|            assert(dst);
  ------------------
  |  Branch (811:13): [True: 1.16M, False: 13]
  ------------------
  812|  1.16M|            if (eob >= 0) {
  ------------------
  |  Branch (812:17): [True: 934k, False: 233k]
  ------------------
  813|   934k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   934k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 934k]
  |  |  ------------------
  |  |   35|   934k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   934k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  814|      0|                    coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
  815|   934k|                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob
  816|   934k|                                              HIGHBD_CALL_SUFFIX);
  817|   934k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   934k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 934k]
  |  |  ------------------
  |  |   35|   934k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   934k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
  818|      0|                    hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
  819|   934k|            }
  820|  1.16M|        }
  821|  4.60M|    }
  822|  5.04M|}
recon_tmpl.c:decode_coefs:
  327|  25.5M|{
  328|  25.5M|    Dav1dTileState *const ts = t->ts;
  329|  25.5M|    const int chroma = !!plane;
  330|  25.5M|    const Dav1dFrameContext *const f = t->f;
  331|  25.5M|    const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id];
  332|  25.5M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
  333|  25.5M|    const int dbg = DEBUG_BLOCK_INFO && plane && 0;
  ------------------
  |  |   34|  25.5M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 25.5M]
  |  |  ------------------
  |  |   35|  25.5M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  25.5M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (333:41): [True: 0, False: 0]
  |  Branch (333:50): [Folded, False: 0]
  ------------------
  334|       |
  335|  25.5M|    if (dbg)
  ------------------
  |  Branch (335:9): [Folded, False: 25.5M]
  ------------------
  336|      0|        printf("Start: r=%d\n", ts->msac.rng);
  337|       |
  338|       |    // does this block have any non-zero coefficients
  339|  25.5M|    const int sctx = get_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
  340|  25.5M|    const int all_skip = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|  25.5M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  341|  25.5M|                             ts->cdf.coef.skip[t_dim->ctx][sctx]);
  342|  25.5M|    if (dbg)
  ------------------
  |  Branch (342:9): [Folded, False: 25.5M]
  ------------------
  343|      0|        printf("Post-non-zero[%d][%d][%d]: r=%d\n",
  344|      0|               t_dim->ctx, sctx, all_skip, ts->msac.rng);
  345|  25.5M|    if (all_skip) {
  ------------------
  |  Branch (345:9): [True: 15.1M, False: 10.4M]
  ------------------
  346|  15.1M|        *res_ctx = 0x40;
  347|  15.1M|        *txtp = lossless * WHT_WHT; /* lossless ? WHT_WHT : DCT_DCT */
  348|  15.1M|        return -1;
  349|  15.1M|    }
  350|       |
  351|       |    // transform type (chroma: derived, luma: explicitly coded)
  352|  10.4M|    if (lossless) {
  ------------------
  |  Branch (352:9): [True: 2.81M, False: 7.67M]
  ------------------
  353|  2.81M|        assert(t_dim->max == TX_4X4);
  ------------------
  |  Branch (353:9): [True: 2.82M, False: 18.4E]
  ------------------
  354|  2.82M|        *txtp = WHT_WHT;
  355|  7.67M|    } else if (t_dim->max + intra >= TX_64X64) {
  ------------------
  |  Branch (355:16): [True: 1.64M, False: 6.02M]
  ------------------
  356|  1.64M|        *txtp = DCT_DCT;
  357|  6.02M|    } else if (chroma) {
  ------------------
  |  Branch (357:16): [True: 2.03M, False: 3.99M]
  ------------------
  358|       |        // inferred from either the luma txtp (inter) or a LUT (intra)
  359|  2.03M|        *txtp = intra ? dav1d_txtp_from_uvmode[b->uv_mode] :
  ------------------
  |  Branch (359:17): [True: 1.31M, False: 717k]
  ------------------
  360|  2.03M|                        get_uv_inter_txtp(t_dim, *txtp);
  361|  3.99M|    } else if (!f->frame_hdr->segmentation.qidx[b->seg_id]) {
  ------------------
  |  Branch (361:16): [True: 5.99k, False: 3.98M]
  ------------------
  362|       |        // In libaom, lossless is checked by a literal qidx == 0, but not all
  363|       |        // such blocks are actually lossless. The remainder gets an implicit
  364|       |        // transform type (for luma)
  365|  5.99k|        *txtp = DCT_DCT;
  366|  3.98M|    } else {
  367|  3.98M|        unsigned idx;
  368|  3.98M|        if (intra) {
  ------------------
  |  Branch (368:13): [True: 2.83M, False: 1.14M]
  ------------------
  369|  2.83M|            const enum IntraPredMode y_mode_nofilt = b->y_mode == FILTER_PRED ?
  ------------------
  |  Branch (369:54): [True: 642k, False: 2.19M]
  ------------------
  370|  2.19M|                dav1d_filter_mode_to_y_mode[b->y_angle] : b->y_mode;
  371|  2.83M|            if (f->frame_hdr->reduced_txtp_set || t_dim->min == TX_16X16) {
  ------------------
  |  Branch (371:17): [True: 444k, False: 2.39M]
  |  Branch (371:51): [True: 366k, False: 2.02M]
  ------------------
  372|   813k|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|   813k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  373|   813k|                          ts->cdf.m.txtp_intra2[t_dim->min][y_mode_nofilt], 4);
  374|   813k|                *txtp = dav1d_tx_types_per_set[idx + 0];
  375|  2.02M|            } else {
  376|  2.02M|                idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  2.02M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
  377|  2.02M|                          ts->cdf.m.txtp_intra1[t_dim->min][y_mode_nofilt], 6);
  378|  2.02M|                *txtp = dav1d_tx_types_per_set[idx + 5];
  379|  2.02M|            }
  380|  2.83M|            if (dbg)
  ------------------
  |  Branch (380:17): [Folded, False: 2.83M]
  ------------------
  381|      0|                printf("Post-txtp-intra[%d->%d][%d][%d->%d]: r=%d\n",
  382|      0|                       tx, t_dim->min, y_mode_nofilt, idx, *txtp, ts->msac.rng);
  383|  2.83M|        } else {
  384|  1.45M|            if (f->frame_hdr->reduced_txtp_set || t_dim->max == TX_32X32) {
  ------------------
  |  Branch (384:17): [True: 18.4E, False: 1.45M]
  |  Branch (384:51): [True: 159k, False: 1.29M]
  ------------------
  385|   270k|                idx = dav1d_msac_decode_bool_adapt(&ts->msac,
  ------------------
  |  |   52|   270k|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  386|   270k|                          ts->cdf.m.txtp_inter3[t_dim->min]);
  387|   270k|                *txtp = (idx - 1) & IDTX; /* idx ? DCT_DCT : IDTX */
  388|   878k|            } else if (t_dim->min == TX_16X16) {
  ------------------
  |  Branch (388:24): [True: 192k, False: 685k]
  ------------------
  389|   192k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   192k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  390|   192k|                          ts->cdf.m.txtp_inter2, 11);
  391|   192k|                *txtp = dav1d_tx_types_per_set[idx + 12];
  392|   685k|            } else {
  393|   685k|                idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
  ------------------
  |  |   57|   685k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  ------------------
  394|   685k|                          ts->cdf.m.txtp_inter1[t_dim->min], 15);
  395|   685k|                *txtp = dav1d_tx_types_per_set[idx + 24];
  396|   685k|            }
  397|  1.14M|            if (dbg)
  ------------------
  |  Branch (397:17): [Folded, False: 1.14M]
  ------------------
  398|      0|                printf("Post-txtp-inter[%d->%d][%d->%d]: r=%d\n",
  399|      0|                       tx, t_dim->min, idx, *txtp, ts->msac.rng);
  400|  1.14M|        }
  401|  3.98M|    }
  402|       |
  403|       |    // find end-of-block (eob)
  404|  10.4M|    int eob;
  405|  10.4M|    const int slw = imin(t_dim->lw, TX_32X32), slh = imin(t_dim->lh, TX_32X32);
  406|  10.4M|    const int tx2dszctx = slw + slh;
  407|  10.4M|    const enum TxClass tx_class = dav1d_tx_type_class[*txtp];
  408|  10.4M|    const int is_1d = tx_class != TX_CLASS_2D;
  409|  10.4M|    switch (tx2dszctx) {
  ------------------
  |  Branch (409:13): [True: 10.8M, False: 18.4E]
  ------------------
  410|      0|#define case_sz(sz, bin, ns, is_1d) \
  411|      0|    case sz: { \
  412|      0|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  413|      0|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  414|      0|        break; \
  415|      0|    }
  416|  3.97M|    case_sz(0,   16,  8, [is_1d]);
  ------------------
  |  |  411|  3.97M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 3.97M, False: 6.52M]
  |  |  ------------------
  |  |  412|  3.97M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  3.97M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|  3.97M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|  3.97M|        break; \
  |  |  415|  3.97M|    }
  ------------------
  417|  1.09M|    case_sz(1,   32,  8, [is_1d]);
  ------------------
  |  |  411|  1.09M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 1.09M, False: 9.39M]
  |  |  ------------------
  |  |  412|  1.09M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  1.09M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|  1.09M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|  1.09M|        break; \
  |  |  415|  1.09M|    }
  ------------------
  418|  2.07M|    case_sz(2,   64,  8, [is_1d]);
  ------------------
  |  |  411|  2.07M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 2.07M, False: 8.41M]
  |  |  ------------------
  |  |  412|  2.07M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  2.07M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|  2.07M|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|  2.07M|        break; \
  |  |  415|  2.07M|    }
  ------------------
  419|   927k|    case_sz(3,  128,  8, [is_1d]);
  ------------------
  |  |  411|   927k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 927k, False: 9.56M]
  |  |  ------------------
  |  |  412|   927k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   927k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   48|   927k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  |  |  ------------------
  |  |  414|   927k|        break; \
  |  |  415|   927k|    }
  ------------------
  420|  1.17M|    case_sz(4,  256, 16, [is_1d]);
  ------------------
  |  |  411|  1.17M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 1.17M, False: 9.31M]
  |  |  ------------------
  |  |  412|  1.17M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  1.17M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|  1.17M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|  1.17M|        break; \
  |  |  415|  1.17M|    }
  ------------------
  421|   422k|    case_sz(5,  512, 16,        );
  ------------------
  |  |  411|   422k|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 422k, False: 10.0M]
  |  |  ------------------
  |  |  412|   422k|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|   422k|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|   422k|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|   422k|        break; \
  |  |  415|   422k|    }
  ------------------
  422|  1.17M|    case_sz(6, 1024, 16,        );
  ------------------
  |  |  411|  1.17M|    case sz: { \
  |  |  ------------------
  |  |  |  Branch (411:5): [True: 1.17M, False: 9.31M]
  |  |  ------------------
  |  |  412|  1.17M|        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
  |  |  413|  1.17M|        eob = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
  |  |  ------------------
  |  |  |  |   57|  1.17M|#define dav1d_msac_decode_symbol_adapt16(ctx, cdf, symb) ((ctx)->symbol_adapt16(ctx, cdf, symb))
  |  |  ------------------
  |  |  414|  1.17M|        break; \
  |  |  415|  1.17M|    }
  ------------------
  423|  10.4M|#undef case_sz
  424|  10.4M|    }
  425|  10.7M|    if (dbg)
  ------------------
  |  Branch (425:9): [Folded, False: 10.7M]
  ------------------
  426|      0|        printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
  427|      0|               16 << tx2dszctx, chroma, is_1d, eob, ts->msac.rng);
  428|  10.7M|    if (eob > 1) {
  ------------------
  |  Branch (428:9): [True: 7.47M, False: 3.30M]
  ------------------
  429|  7.47M|        const int eob_bin = eob - 2;
  430|  7.47M|        uint16_t *const eob_hi_bit_cdf =
  431|  7.47M|            ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
  432|  7.47M|        const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
  ------------------
  |  |   52|  7.47M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  433|  7.47M|        if (dbg)
  ------------------
  |  Branch (433:13): [Folded, False: 7.47M]
  ------------------
  434|      0|            printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
  435|      0|                   t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
  436|  7.47M|        eob = ((eob_hi_bit | 2) << eob_bin) | dav1d_msac_decode_bools(&ts->msac, eob_bin);
  437|  7.47M|        if (dbg)
  ------------------
  |  Branch (437:13): [Folded, False: 7.47M]
  ------------------
  438|      0|            printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
  439|  7.47M|    }
  440|  10.7M|    assert(eob >= 0);
  ------------------
  |  Branch (440:5): [True: 10.7M, False: 18.4E]
  ------------------
  441|       |
  442|       |    // base tokens
  443|  10.7M|    uint16_t (*const eob_cdf)[4] = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma];
  444|  10.7M|    uint16_t (*const hi_cdf)[4] = ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
  445|  10.7M|    unsigned rc, dc_tok;
  446|       |
  447|  10.7M|    if (eob) {
  ------------------
  |  Branch (447:9): [True: 7.86M, False: 2.92M]
  ------------------
  448|  7.86M|        uint16_t (*const lo_cdf)[4] = ts->cdf.coef.base_tok[t_dim->ctx][chroma];
  449|  7.86M|        uint8_t *const levels = t->scratch.levels; // bits 0-5: tok, 6-7: lo_tok
  450|       |
  451|       |        /* eob */
  452|  7.86M|        unsigned ctx = 1 + (eob > 2 << tx2dszctx) + (eob > 4 << tx2dszctx);
  453|  7.86M|        int eob_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[ctx], 2);
  ------------------
  |  |   47|  7.86M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  454|  7.86M|        int tok = eob_tok + 1;
  455|  7.86M|        int level_tok = tok * 0x41;
  456|  7.86M|        unsigned mag;
  457|       |
  458|  7.86M|#define DECODE_COEFS_CLASS(tx_class) \
  459|  7.86M|        unsigned x, y; \
  460|  7.86M|        uint8_t *level; \
  461|  7.86M|        if (tx_class == TX_CLASS_2D) \
  462|  7.86M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  463|  7.86M|        else if (tx_class == TX_CLASS_H) \
  464|       |            /* Transposing reduces the stride and padding requirements */ \
  465|  7.86M|            x = eob & mask, y = eob >> shift, rc = eob; \
  466|  7.86M|        else /* tx_class == TX_CLASS_V */ \
  467|  7.86M|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  468|  7.86M|        if (dbg) \
  469|  7.86M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  470|  7.86M|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  471|  7.86M|        if (eob_tok == 2) { \
  472|  7.86M|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  473|  7.86M|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  474|  7.86M|            level_tok = tok + (3 << 6); \
  475|  7.86M|            if (dbg) \
  476|  7.86M|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  477|  7.86M|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  478|  7.86M|                       ts->msac.rng); \
  479|  7.86M|        } \
  480|  7.86M|        cf[rc] = tok << 11; \
  481|  7.86M|        if (tx_class == TX_CLASS_2D) \
  482|  7.86M|            level = levels + rc; \
  483|  7.86M|        else \
  484|  7.86M|            level = levels + x * stride + y; \
  485|  7.86M|        *level = (uint8_t) level_tok; \
  486|  7.86M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  487|  7.86M|            unsigned rc_i; \
  488|  7.86M|            if (tx_class == TX_CLASS_2D) \
  489|  7.86M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  490|  7.86M|            else if (tx_class == TX_CLASS_H) \
  491|  7.86M|                x = i & mask, y = i >> shift, rc_i = i; \
  492|  7.86M|            else /* tx_class == TX_CLASS_V */ \
  493|  7.86M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  494|  7.86M|            assert(x < 32 && y < 32); \
  495|  7.86M|            if (tx_class == TX_CLASS_2D) \
  496|  7.86M|                level = levels + rc_i; \
  497|  7.86M|            else \
  498|  7.86M|                level = levels + x * stride + y; \
  499|  7.86M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  500|  7.86M|            if (tx_class == TX_CLASS_2D) \
  501|  7.86M|                y |= x; \
  502|  7.86M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  503|  7.86M|            if (dbg) \
  504|  7.86M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  505|  7.86M|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  506|  7.86M|            if (tok == 3) { \
  507|  7.86M|                mag &= 63; \
  508|  7.86M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  509|  7.86M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  510|  7.86M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  511|  7.86M|                if (dbg) \
  512|  7.86M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  513|  7.86M|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  514|  7.86M|                           ts->msac.rng); \
  515|  7.86M|                *level = (uint8_t) (tok + (3 << 6)); \
  516|  7.86M|                cf[rc_i] = (tok << 11) | rc; \
  517|  7.86M|                rc = rc_i; \
  518|  7.86M|            } else { \
  519|       |                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  520|  7.86M|                tok *= 0x17ff41; \
  521|  7.86M|                *level = (uint8_t) tok; \
  522|       |                /* tok ? (tok << 11) | rc : 0 */ \
  523|  7.86M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  524|  7.86M|                if (tok) rc = rc_i; \
  525|  7.86M|                cf[rc_i] = tok; \
  526|  7.86M|            } \
  527|  7.86M|        } \
  528|       |        /* dc */ \
  529|  7.86M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  530|  7.86M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  531|  7.86M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  532|  7.86M|        if (dbg) \
  533|  7.86M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  534|  7.86M|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  535|  7.86M|        if (dc_tok == 3) { \
  536|  7.86M|            if (tx_class == TX_CLASS_2D) \
  537|  7.86M|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  538|  7.86M|                      levels[1 * stride + 1]; \
  539|  7.86M|            mag &= 63; \
  540|  7.86M|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  541|  7.86M|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  542|  7.86M|            if (dbg) \
  543|  7.86M|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  544|  7.86M|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  545|  7.86M|        } \
  546|  7.86M|        break
  547|       |
  548|  7.86M|        const uint16_t *scan;
  549|  7.86M|        switch (tx_class) {
  550|  7.15M|        case TX_CLASS_2D: {
  ------------------
  |  Branch (550:9): [True: 7.15M, False: 702k]
  ------------------
  551|  7.15M|            const unsigned nonsquare_tx = tx >= RTX_4X8;
  552|  7.15M|            const uint8_t (*const lo_ctx_offsets)[5] =
  553|  7.15M|                dav1d_lo_ctx_offsets[nonsquare_tx + (tx & nonsquare_tx)];
  554|  7.15M|            scan = dav1d_scans[tx];
  555|  7.15M|            const ptrdiff_t stride = 4 << slh;
  556|  7.15M|            const unsigned shift = slh + 2, shift2 = 0;
  557|  7.15M|            const unsigned mask = (4 << slh) - 1;
  558|  7.15M|            memset(levels, 0, stride * ((4 << slw) + 2));
  559|  7.15M|            DECODE_COEFS_CLASS(TX_CLASS_2D);
  ------------------
  |  |  459|  7.15M|        unsigned x, y; \
  |  |  460|  7.15M|        uint8_t *level; \
  |  |  461|  7.15M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [True: 7.15M, Folded]
  |  |  ------------------
  |  |  462|  7.15M|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|  7.15M|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 3.43k]
  |  |  ------------------
  |  |  464|  3.43k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|  3.43k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|  3.43k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|  3.43k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|  7.15M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 7.15M]
  |  |  ------------------
  |  |  469|  7.15M|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|  7.15M|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 218k, False: 6.93M]
  |  |  ------------------
  |  |  472|   218k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 214k, False: 4.48k]
  |  |  |  Branch (472:20): [True: 218k, Folded]
  |  |  ------------------
  |  |  473|   218k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   218k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|   218k|            level_tok = tok + (3 << 6); \
  |  |  475|   218k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 218k]
  |  |  ------------------
  |  |  476|   218k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|   218k|        } \
  |  |  480|  7.15M|        cf[rc] = tok << 11; \
  |  |  481|  7.15M|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [True: 7.14M, Folded]
  |  |  ------------------
  |  |  482|  7.15M|            level = levels + rc; \
  |  |  483|  7.15M|        else \
  |  |  484|  7.15M|            level = levels + x * stride + y; \
  |  |  485|  7.15M|        *level = (uint8_t) level_tok; \
  |  |  486|   147M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 140M, False: 7.32M]
  |  |  ------------------
  |  |  487|   140M|            unsigned rc_i; \
  |  |  488|   140M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [True: 140M, Folded]
  |  |  ------------------
  |  |  489|   140M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|   140M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 44.4k]
  |  |  ------------------
  |  |  491|  44.4k|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  44.4k|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  44.4k|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|   140M|            assert(x < 32 && y < 32); \
  |  |  495|   140M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [True: 140M, Folded]
  |  |  ------------------
  |  |  496|   140M|                level = levels + rc_i; \
  |  |  497|   140M|            else \
  |  |  498|   140M|                level = levels + x * stride + y; \
  |  |  499|   140M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|   140M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [True: 139M, Folded]
  |  |  ------------------
  |  |  501|   140M|                y |= x; \
  |  |  502|   140M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|   140M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|   140M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 140M]
  |  |  ------------------
  |  |  504|   140M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|   140M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 12.4M, False: 127M]
  |  |  ------------------
  |  |  507|  12.4M|                mag &= 63; \
  |  |  508|  12.4M|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 9.31M, False: 3.18M]
  |  |  ------------------
  |  |  509|  12.4M|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 2.17M, False: 10.3M]
  |  |  ------------------
  |  |  510|  12.4M|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  12.4M|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|  12.4M|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 12.4M]
  |  |  ------------------
  |  |  512|  12.4M|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|  12.4M|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|  12.4M|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|  12.4M|                rc = rc_i; \
  |  |  518|   127M|            } else { \
  |  |  519|   127M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|   127M|                tok *= 0x17ff41; \
  |  |  521|   127M|                *level = (uint8_t) tok; \
  |  |  522|   127M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|   127M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|   127M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 41.5M, False: 86.2M]
  |  |  ------------------
  |  |  525|   127M|                cf[rc_i] = tok; \
  |  |  526|   127M|            } \
  |  |  527|   140M|        } \
  |  |  528|  7.15M|        /* dc */ \
  |  |  529|  7.32M|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [True: 7.20M, Folded]
  |  |  ------------------
  |  |  530|  7.32M|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|  7.32M|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  7.32M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|  7.32M|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 7.32M]
  |  |  ------------------
  |  |  533|  7.32M|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|  7.32M|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 2.86M, False: 4.45M]
  |  |  ------------------
  |  |  536|  2.86M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [True: 2.86M, Folded]
  |  |  ------------------
  |  |  537|  2.86M|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|  2.86M|                      levels[1 * stride + 1]; \
  |  |  539|  2.86M|            mag &= 63; \
  |  |  540|  2.86M|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 403k, False: 2.46M]
  |  |  ------------------
  |  |  541|  2.86M|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  2.86M|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  2.86M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 2.86M]
  |  |  ------------------
  |  |  543|  2.86M|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  2.86M|        } \
  |  |  546|  7.32M|        break
  ------------------
  |  Branch (559:13): [True: 140M, False: 18.4E]
  |  Branch (559:13): [True: 140M, False: 18.4E]
  ------------------
  560|  7.15M|        }
  561|   454k|        case TX_CLASS_H: {
  ------------------
  |  Branch (561:9): [True: 454k, False: 7.40M]
  ------------------
  562|   454k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  563|   454k|            const ptrdiff_t stride = 16;
  564|   454k|            const unsigned shift = slh + 2, shift2 = 0;
  565|   454k|            const unsigned mask = (4 << slh) - 1;
  566|   454k|            memset(levels, 0, stride * ((4 << slh) + 2));
  567|   454k|            DECODE_COEFS_CLASS(TX_CLASS_H);
  ------------------
  |  |  459|   454k|        unsigned x, y; \
  |  |  460|   454k|        uint8_t *level; \
  |  |  461|   454k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 454k]
  |  |  ------------------
  |  |  462|   454k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|   454k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [True: 454k, Folded]
  |  |  ------------------
  |  |  464|   454k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|   454k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|   454k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|   454k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|   454k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 454k]
  |  |  ------------------
  |  |  469|   454k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|   454k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 9.17k, False: 445k]
  |  |  ------------------
  |  |  472|  9.17k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 6.65k, False: 2.52k]
  |  |  |  Branch (472:20): [Folded, False: 9.17k]
  |  |  ------------------
  |  |  473|  9.17k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  9.17k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  9.17k|            level_tok = tok + (3 << 6); \
  |  |  475|  9.17k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 9.17k]
  |  |  ------------------
  |  |  476|  9.17k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  9.17k|        } \
  |  |  480|   454k|        cf[rc] = tok << 11; \
  |  |  481|   454k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 454k]
  |  |  ------------------
  |  |  482|   454k|            level = levels + rc; \
  |  |  483|   454k|        else \
  |  |  484|   454k|            level = levels + x * stride + y; \
  |  |  485|   454k|        *level = (uint8_t) level_tok; \
  |  |  486|  8.79M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 8.34M, False: 453k]
  |  |  ------------------
  |  |  487|  8.34M|            unsigned rc_i; \
  |  |  488|  8.34M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 8.34M]
  |  |  ------------------
  |  |  489|  8.34M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  8.34M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [True: 8.34M, Folded]
  |  |  ------------------
  |  |  491|  8.34M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  8.34M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  8.34M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  8.34M|            assert(x < 32 && y < 32); \
  |  |  495|  8.34M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 8.34M]
  |  |  ------------------
  |  |  496|  8.34M|                level = levels + rc_i; \
  |  |  497|  8.34M|            else \
  |  |  498|  8.34M|                level = levels + x * stride + y; \
  |  |  499|  8.34M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  8.34M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 8.34M]
  |  |  ------------------
  |  |  501|  8.34M|                y |= x; \
  |  |  502|  8.34M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  8.34M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  8.34M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 8.34M]
  |  |  ------------------
  |  |  504|  8.34M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  8.34M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 430k, False: 7.91M]
  |  |  ------------------
  |  |  507|   430k|                mag &= 63; \
  |  |  508|   430k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 260k, False: 169k]
  |  |  ------------------
  |  |  509|   430k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 46.4k, False: 384k]
  |  |  ------------------
  |  |  510|   430k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   430k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|   430k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 430k]
  |  |  ------------------
  |  |  512|   430k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|   430k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|   430k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|   430k|                rc = rc_i; \
  |  |  518|  7.91M|            } else { \
  |  |  519|  7.91M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  7.91M|                tok *= 0x17ff41; \
  |  |  521|  7.91M|                *level = (uint8_t) tok; \
  |  |  522|  7.91M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  7.91M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  7.91M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 2.10M, False: 5.80M]
  |  |  ------------------
  |  |  525|  7.91M|                cf[rc_i] = tok; \
  |  |  526|  7.91M|            } \
  |  |  527|  8.34M|        } \
  |  |  528|   454k|        /* dc */ \
  |  |  529|   454k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 453k]
  |  |  ------------------
  |  |  530|   453k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|   453k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|   453k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|   453k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 453k]
  |  |  ------------------
  |  |  533|   453k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|   453k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 52.5k, False: 400k]
  |  |  ------------------
  |  |  536|  52.5k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 52.5k]
  |  |  ------------------
  |  |  537|  52.5k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  52.5k|            mag &= 63; \
  |  |  540|  52.5k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 7.08k, False: 45.4k]
  |  |  ------------------
  |  |  541|  52.5k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  52.5k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  52.5k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 52.5k]
  |  |  ------------------
  |  |  543|  52.5k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  52.5k|        } \
  |  |  546|   453k|        break
  ------------------
  |  Branch (567:13): [True: 8.34M, False: 1.50k]
  |  Branch (567:13): [True: 8.34M, False: 18.4E]
  ------------------
  568|   454k|        }
  569|   261k|        case TX_CLASS_V: {
  ------------------
  |  Branch (569:9): [True: 261k, False: 7.59M]
  ------------------
  570|   261k|            const uint8_t (*const lo_ctx_offsets)[5] = NULL;
  571|   261k|            const ptrdiff_t stride = 16;
  572|   261k|            const unsigned shift = slw + 2, shift2 = slh + 2;
  573|   261k|            const unsigned mask = (4 << slw) - 1;
  574|   261k|            memset(levels, 0, stride * ((4 << slw) + 2));
  575|   261k|            DECODE_COEFS_CLASS(TX_CLASS_V);
  ------------------
  |  |  459|   261k|        unsigned x, y; \
  |  |  460|   261k|        uint8_t *level; \
  |  |  461|   261k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (461:13): [Folded, False: 261k]
  |  |  ------------------
  |  |  462|   261k|            rc = scan[eob], x = rc >> shift, y = rc & mask; \
  |  |  463|   261k|        else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (463:18): [Folded, False: 261k]
  |  |  ------------------
  |  |  464|   261k|            /* Transposing reduces the stride and padding requirements */ \
  |  |  465|   261k|            x = eob & mask, y = eob >> shift, rc = eob; \
  |  |  466|   261k|        else /* tx_class == TX_CLASS_V */ \
  |  |  467|   261k|            x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
  |  |  468|   261k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (468:13): [Folded, False: 261k]
  |  |  ------------------
  |  |  469|   261k|            printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  470|      0|                   t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
  |  |  471|   261k|        if (eob_tok == 2) { \
  |  |  ------------------
  |  |  |  Branch (471:13): [True: 4.27k, False: 257k]
  |  |  ------------------
  |  |  472|  4.27k|            ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
  |  |  ------------------
  |  |  |  Branch (472:19): [True: 3.77k, False: 501]
  |  |  |  Branch (472:20): [Folded, False: 4.27k]
  |  |  ------------------
  |  |  473|  4.27k|            tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  4.27k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  474|  4.27k|            level_tok = tok + (3 << 6); \
  |  |  475|  4.27k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (475:17): [Folded, False: 4.27k]
  |  |  ------------------
  |  |  476|  4.27k|                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  477|      0|                       imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
  |  |  478|      0|                       ts->msac.rng); \
  |  |  479|  4.27k|        } \
  |  |  480|   261k|        cf[rc] = tok << 11; \
  |  |  481|   261k|        if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (481:13): [Folded, False: 261k]
  |  |  ------------------
  |  |  482|   261k|            level = levels + rc; \
  |  |  483|   261k|        else \
  |  |  484|   261k|            level = levels + x * stride + y; \
  |  |  485|   261k|        *level = (uint8_t) level_tok; \
  |  |  486|  5.37M|        for (int i = eob - 1; i > 0; i--) { /* ac */ \
  |  |  ------------------
  |  |  |  Branch (486:31): [True: 5.10M, False: 262k]
  |  |  ------------------
  |  |  487|  5.10M|            unsigned rc_i; \
  |  |  488|  5.10M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (488:17): [Folded, False: 5.10M]
  |  |  ------------------
  |  |  489|  5.10M|                rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
  |  |  490|  5.10M|            else if (tx_class == TX_CLASS_H) \
  |  |  ------------------
  |  |  |  Branch (490:22): [Folded, False: 5.10M]
  |  |  ------------------
  |  |  491|  5.10M|                x = i & mask, y = i >> shift, rc_i = i; \
  |  |  492|  5.10M|            else /* tx_class == TX_CLASS_V */ \
  |  |  493|  5.10M|                x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
  |  |  494|  5.10M|            assert(x < 32 && y < 32); \
  |  |  495|  5.10M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (495:17): [Folded, False: 5.10M]
  |  |  ------------------
  |  |  496|  5.10M|                level = levels + rc_i; \
  |  |  497|  5.10M|            else \
  |  |  498|  5.10M|                level = levels + x * stride + y; \
  |  |  499|  5.10M|            ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
  |  |  500|  5.10M|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (500:17): [Folded, False: 5.10M]
  |  |  ------------------
  |  |  501|  5.10M|                y |= x; \
  |  |  502|  5.10M|            tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|  5.10M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  503|  5.10M|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (503:17): [Folded, False: 5.10M]
  |  |  ------------------
  |  |  504|  5.10M|                printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  505|      0|                       t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
  |  |  506|  5.10M|            if (tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (506:17): [True: 246k, False: 4.86M]
  |  |  ------------------
  |  |  507|   246k|                mag &= 63; \
  |  |  508|   246k|                ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
  |  |  ------------------
  |  |  |  Branch (508:24): [True: 146k, False: 99.9k]
  |  |  ------------------
  |  |  509|   246k|                      (mag > 12 ? 6 : (mag + 1) >> 1); \
  |  |  ------------------
  |  |  |  Branch (509:24): [True: 23.4k, False: 223k]
  |  |  ------------------
  |  |  510|   246k|                tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|   246k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  511|   246k|                if (dbg) \
  |  |  ------------------
  |  |  |  Branch (511:21): [Folded, False: 246k]
  |  |  ------------------
  |  |  512|   246k|                    printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
  |  |  513|      0|                           imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
  |  |  514|      0|                           ts->msac.rng); \
  |  |  515|   246k|                *level = (uint8_t) (tok + (3 << 6)); \
  |  |  516|   246k|                cf[rc_i] = (tok << 11) | rc; \
  |  |  517|   246k|                rc = rc_i; \
  |  |  518|  4.86M|            } else { \
  |  |  519|  4.86M|                /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
  |  |  520|  4.86M|                tok *= 0x17ff41; \
  |  |  521|  4.86M|                *level = (uint8_t) tok; \
  |  |  522|  4.86M|                /* tok ? (tok << 11) | rc : 0 */ \
  |  |  523|  4.86M|                tok = (tok >> 9) & (rc + ~0x7ffu); \
  |  |  524|  4.86M|                if (tok) rc = rc_i; \
  |  |  ------------------
  |  |  |  Branch (524:21): [True: 1.24M, False: 3.61M]
  |  |  ------------------
  |  |  525|  4.86M|                cf[rc_i] = tok; \
  |  |  526|  4.86M|            } \
  |  |  527|  5.10M|        } \
  |  |  528|   261k|        /* dc */ \
  |  |  529|   262k|        ctx = (tx_class == TX_CLASS_2D) ? 0 : \
  |  |  ------------------
  |  |  |  Branch (529:15): [Folded, False: 262k]
  |  |  ------------------
  |  |  530|   262k|            get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
  |  |  531|   262k|        dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
  |  |  ------------------
  |  |  |  |   47|   262k|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  |  |  ------------------
  |  |  532|   262k|        if (dbg) \
  |  |  ------------------
  |  |  |  Branch (532:13): [Folded, False: 262k]
  |  |  ------------------
  |  |  533|   262k|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
  |  |  534|      0|                   t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
  |  |  535|   262k|        if (dc_tok == 3) { \
  |  |  ------------------
  |  |  |  Branch (535:13): [True: 28.8k, False: 233k]
  |  |  ------------------
  |  |  536|  28.8k|            if (tx_class == TX_CLASS_2D) \
  |  |  ------------------
  |  |  |  Branch (536:17): [Folded, False: 28.8k]
  |  |  ------------------
  |  |  537|  28.8k|                mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
  |  |  538|      0|                      levels[1 * stride + 1]; \
  |  |  539|  28.8k|            mag &= 63; \
  |  |  540|  28.8k|            ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
  |  |  ------------------
  |  |  |  Branch (540:19): [True: 4.04k, False: 24.8k]
  |  |  ------------------
  |  |  541|  28.8k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
  |  |  ------------------
  |  |  |  |   49|  28.8k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  |  |  ------------------
  |  |  542|  28.8k|            if (dbg) \
  |  |  ------------------
  |  |  |  Branch (542:17): [Folded, False: 28.8k]
  |  |  ------------------
  |  |  543|  28.8k|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
  |  |  544|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
  |  |  545|  28.8k|        } \
  |  |  546|   262k|        break
  ------------------
  |  Branch (575:13): [True: 5.10M, False: 18.4E]
  |  Branch (575:13): [True: 5.10M, False: 18.4E]
  ------------------
  576|   261k|        }
  577|      0|#undef DECODE_COEFS_CLASS
  578|      0|        default: assert(0);
  ------------------
  |  Branch (578:9): [True: 0, False: 7.86M]
  |  Branch (578:18): [Folded, False: 0]
  ------------------
  579|  7.86M|        }
  580|  7.86M|    } else { // dc-only
  581|  2.92M|        int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[0], 2);
  ------------------
  |  |   47|  2.92M|#define dav1d_msac_decode_symbol_adapt4  dav1d_msac_decode_symbol_adapt4_sse2
  ------------------
  582|  2.92M|        dc_tok = 1 + tok_br;
  583|  2.92M|        if (dbg)
  ------------------
  |  Branch (583:13): [Folded, False: 2.92M]
  ------------------
  584|      0|            printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
  585|      0|                   t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
  586|  2.92M|        if (tok_br == 2) {
  ------------------
  |  Branch (586:13): [True: 200k, False: 2.72M]
  ------------------
  587|   200k|            dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[0]);
  ------------------
  |  |   49|   200k|#define dav1d_msac_decode_hi_tok         dav1d_msac_decode_hi_tok_sse2
  ------------------
  588|   200k|            if (dbg)
  ------------------
  |  Branch (588:17): [Folded, False: 200k]
  ------------------
  589|      0|                printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n",
  590|      0|                       imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng);
  591|   200k|        }
  592|  2.92M|        rc = 0;
  593|  2.92M|    }
  594|       |
  595|       |    // residual and sign
  596|  10.8M|    const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
  597|  10.8M|    const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL;
  ------------------
  |  Branch (597:35): [True: 6.74M, False: 4.10M]
  ------------------
  598|  10.8M|    const int dq_shift = imax(0, t_dim->ctx - 2);
  599|  10.8M|    const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
  ------------------
  |  Branch (599:36): [True: 4.50M, Folded]
  ------------------
  600|  10.8M|    unsigned cul_level, dc_sign_level;
  601|       |
  602|  10.8M|    if (!dc_tok) {
  ------------------
  |  Branch (602:9): [True: 2.05M, False: 8.79M]
  ------------------
  603|  2.05M|        cul_level = 0;
  604|  2.05M|        dc_sign_level = 1 << 6;
  605|  2.05M|        if (qm_tbl) goto ac_qm;
  ------------------
  |  Branch (605:13): [True: 148k, False: 1.90M]
  ------------------
  606|  1.90M|        goto ac_noqm;
  607|  2.05M|    }
  608|       |
  609|  8.79M|    const int dc_sign_ctx = get_dc_sign_ctx(tx, a, l);
  610|  8.79M|    uint16_t *const dc_sign_cdf = ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
  611|  8.79M|    const int dc_sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
  ------------------
  |  |   52|  8.79M|#define dav1d_msac_decode_bool_adapt     dav1d_msac_decode_bool_adapt_sse2
  ------------------
  612|  8.79M|    if (dbg)
  ------------------
  |  Branch (612:9): [Folded, False: 8.79M]
  ------------------
  613|      0|        printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
  614|      0|               chroma, dc_sign_ctx, dc_sign, ts->msac.rng);
  615|       |
  616|  8.79M|    int dc_dq = dq_tbl[0];
  617|  8.79M|    dc_sign_level = (dc_sign - 1) & (2 << 6);
  618|       |
  619|  8.79M|    if (qm_tbl) {
  ------------------
  |  Branch (619:9): [True: 882k, False: 7.91M]
  ------------------
  620|   882k|        dc_dq = (dc_dq * qm_tbl[0] + 16) >> 5;
  621|       |
  622|   882k|        if (dc_tok == 15) {
  ------------------
  |  Branch (622:13): [True: 32.9k, False: 849k]
  ------------------
  623|  32.9k|            dc_tok = read_golomb(&ts->msac) + 15;
  624|  32.9k|            if (dbg)
  ------------------
  |  Branch (624:17): [Folded, False: 32.9k]
  ------------------
  625|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  626|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  627|       |
  628|  32.9k|            dc_tok &= 0xfffff;
  629|  32.9k|            dc_dq = (dc_dq * dc_tok) & 0xffffff;
  630|   849k|        } else {
  631|   849k|            dc_dq *= dc_tok;
  632|   849k|            assert(dc_dq <= 0xffffff);
  ------------------
  |  Branch (632:13): [True: 849k, False: 18.4E]
  ------------------
  633|   849k|        }
  634|   882k|        cul_level = dc_tok;
  635|   882k|        dc_dq >>= dq_shift;
  636|   882k|        dc_dq = umin(dc_dq, cf_max + dc_sign);
  637|   882k|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (637:25): [True: 411k, False: 471k]
  ------------------
  638|       |
  639|  1.14M|        if (rc) ac_qm: {
  ------------------
  |  Branch (639:13): [True: 498k, False: 383k]
  ------------------
  640|  1.14M|            const unsigned ac_dq = dq_tbl[1];
  641|  10.2M|            do {
  642|  10.2M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  10.2M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  643|  10.2M|                if (dbg)
  ------------------
  |  Branch (643:21): [Folded, False: 10.2M]
  ------------------
  644|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  645|  10.2M|                const unsigned rc_tok = cf[rc];
  646|  10.2M|                unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5;
  647|  10.2M|                int dq_sat;
  648|       |
  649|  10.2M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (649:21): [True: 485k, False: 9.81M]
  ------------------
  650|   485k|                    tok = read_golomb(&ts->msac) + 15;
  651|   485k|                    if (dbg)
  ------------------
  |  Branch (651:25): [Folded, False: 485k]
  ------------------
  652|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  653|      0|                               rc, tok - 15, tok, ts->msac.rng);
  654|       |
  655|   485k|                    tok &= 0xfffff;
  656|   485k|                    dq = (dq * tok) & 0xffffff;
  657|  9.81M|                } else {
  658|  9.81M|                    tok = rc_tok >> 11;
  659|  9.81M|                    dq *= tok;
  660|  9.81M|                    assert(dq <= 0xffffff);
  ------------------
  |  Branch (660:21): [True: 9.81M, False: 18.4E]
  ------------------
  661|  9.81M|                }
  662|  10.3M|                cul_level += tok;
  663|  10.3M|                dq >>= dq_shift;
  664|  10.3M|                dq_sat = umin(dq, cf_max + sign);
  665|  10.3M|                cf[rc] = (coef) (sign ? -dq_sat : dq_sat);
  ------------------
  |  Branch (665:34): [True: 5.18M, False: 5.11M]
  ------------------
  666|       |
  667|  10.3M|                rc = rc_tok & 0x3ff;
  668|  10.3M|            } while (rc);
  ------------------
  |  Branch (668:22): [True: 9.65M, False: 647k]
  ------------------
  669|  1.14M|        }
  670|  7.91M|    } else {
  671|       |        // non-qmatrix is the common case and allows for additional optimizations
  672|  7.91M|        if (dc_tok == 15) {
  ------------------
  |  Branch (672:13): [True: 331k, False: 7.58M]
  ------------------
  673|   331k|            dc_tok = read_golomb(&ts->msac) + 15;
  674|   331k|            if (dbg)
  ------------------
  |  Branch (674:17): [Folded, False: 331k]
  ------------------
  675|      0|                printf("Post-dc_residual[%d->%d]: r=%d\n",
  676|      0|                       dc_tok - 15, dc_tok, ts->msac.rng);
  677|       |
  678|   331k|            dc_tok &= 0xfffff;
  679|   331k|            dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift;
  680|   331k|            dc_dq = umin(dc_dq, cf_max + dc_sign);
  681|  7.58M|        } else {
  682|  7.58M|            dc_dq = ((dc_dq * dc_tok) >> dq_shift);
  683|  7.58M|            assert(dc_dq <= cf_max);
  ------------------
  |  Branch (683:13): [True: 7.64M, False: 18.4E]
  ------------------
  684|  7.58M|        }
  685|  7.97M|        cul_level = dc_tok;
  686|  7.97M|        cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
  ------------------
  |  Branch (686:25): [True: 3.59M, False: 4.38M]
  ------------------
  687|       |
  688|  12.6M|        if (rc) ac_noqm: {
  ------------------
  |  Branch (688:13): [True: 5.38M, False: 2.58M]
  ------------------
  689|  12.6M|            const unsigned ac_dq = dq_tbl[1];
  690|  57.4M|            do {
  691|  57.4M|                const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
  ------------------
  |  |   53|  57.4M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  692|  57.4M|                if (dbg)
  ------------------
  |  Branch (692:21): [Folded, False: 57.4M]
  ------------------
  693|      0|                    printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
  694|  57.4M|                const unsigned rc_tok = cf[rc];
  695|  57.4M|                unsigned tok;
  696|  57.4M|                int dq;
  697|       |
  698|       |                // residual
  699|  57.4M|                if (rc_tok >= (15 << 11)) {
  ------------------
  |  Branch (699:21): [True: 934k, False: 56.5M]
  ------------------
  700|   934k|                    tok = read_golomb(&ts->msac) + 15;
  701|   934k|                    if (dbg)
  ------------------
  |  Branch (701:25): [Folded, False: 934k]
  ------------------
  702|      0|                        printf("Post-residual[%d=%d->%d]: r=%d\n",
  703|      0|                               rc, tok - 15, tok, ts->msac.rng);
  704|       |
  705|       |                    // coefficient parsing, see 5.11.39
  706|   934k|                    tok &= 0xfffff;
  707|       |
  708|       |                    // dequant, see 7.12.3
  709|   934k|                    dq = ((ac_dq * tok) & 0xffffff) >> dq_shift;
  710|   934k|                    dq = umin(dq, cf_max + sign);
  711|  56.5M|                } else {
  712|       |                    // cannot exceed cf_max, so we can avoid the clipping
  713|  56.5M|                    tok = rc_tok >> 11;
  714|  56.5M|                    dq = ((ac_dq * tok) >> dq_shift);
  715|  56.5M|                    assert(dq <= cf_max);
  ------------------
  |  Branch (715:21): [True: 56.4M, False: 47.9k]
  ------------------
  716|  56.5M|                }
  717|  57.3M|                cul_level += tok;
  718|  57.3M|                cf[rc] = (coef) (sign ? -dq : dq);
  ------------------
  |  Branch (718:34): [True: 28.9M, False: 28.4M]
  ------------------
  719|       |
  720|  57.3M|                rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob
  721|  57.3M|            } while (rc);
  ------------------
  |  Branch (721:22): [True: 50.1M, False: 7.23M]
  ------------------
  722|  12.6M|        }
  723|  7.97M|    }
  724|       |
  725|       |    // context
  726|  10.8M|    *res_ctx = umin(cul_level, 63) | dc_sign_level;
  727|       |
  728|  10.8M|    return eob;
  729|  8.79M|}
recon_tmpl.c:get_skip_ctx:
   65|  25.5M|{
   66|  25.5M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
   67|       |
   68|  25.5M|    if (chroma) {
  ------------------
  |  Branch (68:9): [True: 15.5M, False: 10.0M]
  ------------------
   69|  15.5M|        const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
   70|  15.5M|        const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
   71|  15.5M|        const int not_one_blk = b_dim[2] - (!!b_dim[2] && ss_hor) > t_dim->lw ||
  ------------------
  |  Branch (71:33): [True: 8.00M, False: 7.57M]
  |  Branch (71:45): [True: 14.7M, False: 849k]
  |  Branch (71:59): [True: 3.69M, False: 11.0M]
  ------------------
   72|  7.57M|                                b_dim[3] - (!!b_dim[3] && ss_ver) > t_dim->lh;
  ------------------
  |  Branch (72:33): [True: 529k, False: 7.04M]
  |  Branch (72:45): [True: 6.35M, False: 1.22M]
  |  Branch (72:59): [True: 3.16M, False: 3.19M]
  ------------------
   73|  15.5M|        unsigned ca, cl;
   74|       |
   75|  15.5M|#define MERGE_CTX(dir, type, no_val) \
   76|  15.5M|        c##dir = *(const type *) dir != no_val; \
   77|  15.5M|        break
   78|       |
   79|  15.5M|        switch (t_dim->lw) {
   80|       |        /* For some reason the MSVC CRT _wassert() function is not flagged as
   81|       |         * __declspec(noreturn), so when using those headers the compiler will
   82|       |         * expect execution to continue after an assertion has been triggered
   83|       |         * and will therefore complain about the use of uninitialized variables
   84|       |         * when compiled in debug mode if we put the default case at the end. */
   85|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (85:9): [True: 0, False: 15.5M]
  |  Branch (85:18): [Folded, False: 0]
  ------------------
   86|  8.40M|        case TX_4X4:   MERGE_CTX(a, uint8_t,  0x40);
  ------------------
  |  |   76|  8.40M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  8.40M|        break
  ------------------
  |  Branch (86:9): [True: 8.40M, False: 7.17M]
  ------------------
   87|  2.43M|        case TX_8X8:   MERGE_CTX(a, uint16_t, 0x4040);
  ------------------
  |  |   76|  2.43M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  2.43M|        break
  ------------------
  |  Branch (87:9): [True: 2.43M, False: 13.1M]
  ------------------
   88|  1.85M|        case TX_16X16: MERGE_CTX(a, uint32_t, 0x40404040U);
  ------------------
  |  |   76|  1.85M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.85M|        break
  ------------------
  |  Branch (88:9): [True: 1.85M, False: 13.7M]
  ------------------
   89|  2.95M|        case TX_32X32: MERGE_CTX(a, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  2.95M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  2.95M|        break
  ------------------
  |  Branch (89:9): [True: 2.95M, False: 12.6M]
  ------------------
   90|  15.5M|        }
   91|  15.5M|        switch (t_dim->lh) {
   92|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (92:9): [True: 0, False: 15.5M]
  |  Branch (92:18): [Folded, False: 0]
  ------------------
   93|  9.07M|        case TX_4X4:   MERGE_CTX(l, uint8_t,  0x40);
  ------------------
  |  |   76|  9.07M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  9.07M|        break
  ------------------
  |  Branch (93:9): [True: 9.07M, False: 6.51M]
  ------------------
   94|  2.32M|        case TX_8X8:   MERGE_CTX(l, uint16_t, 0x4040);
  ------------------
  |  |   76|  2.32M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  2.32M|        break
  ------------------
  |  Branch (94:9): [True: 2.32M, False: 13.2M]
  ------------------
   95|  1.57M|        case TX_16X16: MERGE_CTX(l, uint32_t, 0x40404040U);
  ------------------
  |  |   76|  1.57M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  1.57M|        break
  ------------------
  |  Branch (95:9): [True: 1.57M, False: 14.0M]
  ------------------
   96|  2.68M|        case TX_32X32: MERGE_CTX(l, uint64_t, 0x4040404040404040ULL);
  ------------------
  |  |   76|  2.68M|        c##dir = *(const type *) dir != no_val; \
  |  |   77|  2.68M|        break
  ------------------
  |  Branch (96:9): [True: 2.68M, False: 12.9M]
  ------------------
   97|  15.5M|        }
   98|  15.5M|#undef MERGE_CTX
   99|       |
  100|  15.5M|        return 7 + not_one_blk * 3 + ca + cl;
  101|  15.5M|    } else if (b_dim[2] == t_dim->lw && b_dim[3] == t_dim->lh) {
  ------------------
  |  Branch (101:16): [True: 4.60M, False: 5.40M]
  |  Branch (101:41): [True: 4.33M, False: 272k]
  ------------------
  102|  4.33M|        return 0;
  103|  5.67M|    } else {
  104|  5.67M|        unsigned la, ll;
  105|       |
  106|  5.67M|#define MERGE_CTX(dir, type, tx) \
  107|  5.67M|        if (tx == TX_64X64) { \
  108|  5.67M|            uint64_t tmp = *(const uint64_t *) dir; \
  109|  5.67M|            tmp |= *(const uint64_t *) &dir[8]; \
  110|  5.67M|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  111|  5.67M|        } else \
  112|  5.67M|            l##dir = *(const type *) dir; \
  113|  5.67M|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  114|  5.67M|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  115|  5.67M|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  116|  5.67M|        break
  117|       |
  118|  5.67M|        switch (t_dim->lw) {
  119|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (119:9): [True: 0, False: 5.67M]
  |  Branch (119:18): [Folded, False: 0]
  ------------------
  120|  4.28M|        case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4);
  ------------------
  |  |  107|  4.28M|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 4.28M]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  4.28M|            l##dir = *(const type *) dir; \
  |  |  113|  4.28M|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 4.28M]
  |  |  ------------------
  |  |  114|  4.28M|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 4.28M]
  |  |  ------------------
  |  |  115|  4.28M|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 4.28M]
  |  |  ------------------
  |  |  116|  4.28M|        break
  ------------------
  |  Branch (120:9): [True: 4.28M, False: 1.39M]
  ------------------
  121|   936k|        case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8);
  ------------------
  |  |  107|   936k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 936k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   936k|            l##dir = *(const type *) dir; \
  |  |  113|   936k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 936k]
  |  |  ------------------
  |  |  114|   936k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 936k]
  |  |  ------------------
  |  |  115|   936k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 936k, Folded]
  |  |  ------------------
  |  |  116|   936k|        break
  ------------------
  |  Branch (121:9): [True: 936k, False: 4.74M]
  ------------------
  122|   375k|        case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16);
  ------------------
  |  |  107|   375k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 375k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   375k|            l##dir = *(const type *) dir; \
  |  |  113|   375k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 375k]
  |  |  ------------------
  |  |  114|   375k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 375k, Folded]
  |  |  ------------------
  |  |  115|   375k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 375k, Folded]
  |  |  ------------------
  |  |  116|   375k|        break
  ------------------
  |  Branch (122:9): [True: 375k, False: 5.30M]
  ------------------
  123|  39.4k|        case TX_32X32: MERGE_CTX(a, uint32_t, TX_32X32);
  ------------------
  |  |  107|  39.4k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 39.4k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  39.4k|            l##dir = *(const type *) dir; \
  |  |  113|  39.4k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 39.4k, Folded]
  |  |  ------------------
  |  |  114|  39.4k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 39.4k, Folded]
  |  |  ------------------
  |  |  115|  39.4k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 39.4k, Folded]
  |  |  ------------------
  |  |  116|  39.4k|        break
  ------------------
  |  Branch (123:9): [True: 39.4k, False: 5.63M]
  ------------------
  124|   333k|        case TX_64X64: MERGE_CTX(a, uint32_t, TX_64X64);
  ------------------
  |  |  107|   333k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  108|   333k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   333k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   333k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   333k|        } else \
  |  |  112|  18.4E|            l##dir = *(const type *) dir; \
  |  |  113|   333k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 333k]
  |  |  ------------------
  |  |  114|   333k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  115|   333k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  116|   333k|        break
  ------------------
  |  Branch (124:9): [True: 333k, False: 5.34M]
  ------------------
  125|  5.67M|        }
  126|  5.96M|        switch (t_dim->lh) {
  127|      0|        default: assert(0); /* fall-through */
  ------------------
  |  Branch (127:9): [True: 0, False: 5.96M]
  |  Branch (127:18): [Folded, False: 0]
  ------------------
  128|  4.31M|        case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4);
  ------------------
  |  |  107|  4.31M|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 4.31M]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  4.31M|            l##dir = *(const type *) dir; \
  |  |  113|  4.31M|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 4.31M]
  |  |  ------------------
  |  |  114|  4.31M|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 4.31M]
  |  |  ------------------
  |  |  115|  4.31M|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [Folded, False: 4.31M]
  |  |  ------------------
  |  |  116|  4.31M|        break
  ------------------
  |  Branch (128:9): [True: 4.31M, False: 1.64M]
  ------------------
  129|   928k|        case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8);
  ------------------
  |  |  107|   928k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 928k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   928k|            l##dir = *(const type *) dir; \
  |  |  113|   928k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 928k]
  |  |  ------------------
  |  |  114|   928k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [Folded, False: 928k]
  |  |  ------------------
  |  |  115|   928k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 928k, Folded]
  |  |  ------------------
  |  |  116|   928k|        break
  ------------------
  |  Branch (129:9): [True: 928k, False: 5.03M]
  ------------------
  130|   353k|        case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16);
  ------------------
  |  |  107|   353k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 353k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|   353k|            l##dir = *(const type *) dir; \
  |  |  113|   353k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 353k]
  |  |  ------------------
  |  |  114|   353k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 353k, Folded]
  |  |  ------------------
  |  |  115|   353k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 353k, Folded]
  |  |  ------------------
  |  |  116|   353k|        break
  ------------------
  |  Branch (130:9): [True: 353k, False: 5.61M]
  ------------------
  131|  38.5k|        case TX_32X32: MERGE_CTX(l, uint32_t, TX_32X32);
  ------------------
  |  |  107|  38.5k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [Folded, False: 38.5k]
  |  |  ------------------
  |  |  108|      0|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|      0|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|      0|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|      0|        } else \
  |  |  112|  38.5k|            l##dir = *(const type *) dir; \
  |  |  113|  38.5k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [True: 38.5k, Folded]
  |  |  ------------------
  |  |  114|  38.5k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 38.5k, Folded]
  |  |  ------------------
  |  |  115|  38.5k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 38.5k, Folded]
  |  |  ------------------
  |  |  116|  38.5k|        break
  ------------------
  |  Branch (131:9): [True: 38.5k, False: 5.92M]
  ------------------
  132|   333k|        case TX_64X64: MERGE_CTX(l, uint32_t, TX_64X64);
  ------------------
  |  |  107|   333k|        if (tx == TX_64X64) { \
  |  |  ------------------
  |  |  |  Branch (107:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  108|   333k|            uint64_t tmp = *(const uint64_t *) dir; \
  |  |  109|   333k|            tmp |= *(const uint64_t *) &dir[8]; \
  |  |  110|   333k|            l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
  |  |  111|   333k|        } else \
  |  |  112|   333k|            l##dir = *(const type *) dir; \
  |  |  113|   333k|        if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
  |  |  ------------------
  |  |  |  Branch (113:13): [Folded, False: 333k]
  |  |  ------------------
  |  |  114|   333k|        if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
  |  |  ------------------
  |  |  |  Branch (114:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  115|   333k|        if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
  |  |  ------------------
  |  |  |  Branch (115:13): [True: 333k, Folded]
  |  |  ------------------
  |  |  116|   333k|        break
  ------------------
  |  Branch (132:9): [True: 333k, False: 5.63M]
  ------------------
  133|  5.96M|        }
  134|  5.96M|#undef MERGE_CTX
  135|       |
  136|  5.96M|        return dav1d_skip_ctx[umin(la & 0x3F, 4)][umin(ll & 0x3F, 4)];
  137|  5.96M|    }
  138|  25.5M|}
recon_tmpl.c:get_lo_ctx:
  304|   152M|{
  305|   152M|    unsigned mag = levels[0 * stride + 1] + levels[1 * stride + 0];
  306|   152M|    unsigned offset;
  307|   152M|    if (tx_class == TX_CLASS_2D) {
  ------------------
  |  Branch (307:9): [True: 140M, False: 11.7M]
  ------------------
  308|   140M|        mag += levels[1 * stride + 1];
  309|   140M|        *hi_mag = mag;
  310|   140M|        mag += levels[0 * stride + 2] + levels[2 * stride + 0];
  311|   140M|        offset = ctx_offsets[umin(y, 4)][umin(x, 4)];
  312|   140M|    } else {
  313|  11.7M|        mag += levels[0 * stride + 2];
  314|  11.7M|        *hi_mag = mag;
  315|  11.7M|        mag += levels[0 * stride + 3] + levels[0 * stride + 4];
  316|  11.7M|        offset = 26 + (y > 1 ? 10 : y * 5);
  ------------------
  |  Branch (316:24): [True: 6.55M, False: 5.14M]
  ------------------
  317|  11.7M|    }
  318|   152M|    return offset + (mag > 512 ? 4 : (mag + 64) >> 7);
  ------------------
  |  Branch (318:22): [True: 8.90M, False: 143M]
  ------------------
  319|   152M|}
recon_tmpl.c:get_dc_sign_ctx:
  143|  8.84M|{
  144|  8.84M|    uint64_t mask = 0xC0C0C0C0C0C0C0C0ULL, mul = 0x0101010101010101ULL;
  145|  8.84M|    int s;
  146|       |
  147|  8.84M|#if ARCH_X86_64 && defined(__GNUC__)
  148|       |    /* Coerce compilers into producing better code. For some reason
  149|       |     * every x86-64 compiler is awful at handling 64-bit constants. */
  150|  8.84M|    __asm__("" : "+r"(mask), "+r"(mul));
  151|  8.84M|#endif
  152|       |
  153|  8.84M|    switch(tx) {
  154|      0|    default: assert(0); /* fall-through */
  ------------------
  |  Branch (154:5): [True: 0, False: 8.84M]
  |  Branch (154:14): [Folded, False: 0]
  ------------------
  155|  3.38M|    case TX_4X4: {
  ------------------
  |  Branch (155:5): [True: 3.38M, False: 5.46M]
  ------------------
  156|  3.38M|        int t = *(const uint8_t *) a >> 6;
  157|  3.38M|        t    += *(const uint8_t *) l >> 6;
  158|  3.38M|        s = t - 1 - 1;
  159|  3.38M|        break;
  160|      0|    }
  161|  1.10M|    case TX_8X8: {
  ------------------
  |  Branch (161:5): [True: 1.10M, False: 7.74M]
  ------------------
  162|  1.10M|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  163|  1.10M|        t         += *(const uint16_t *) l & (uint32_t) mask;
  164|  1.10M|        t *= 0x04040404U;
  165|  1.10M|        s = (int) (t >> 24) - 2 - 2;
  166|  1.10M|        break;
  167|      0|    }
  168|   659k|    case TX_16X16: {
  ------------------
  |  Branch (168:5): [True: 659k, False: 8.18M]
  ------------------
  169|   659k|        uint32_t t = (*(const uint32_t *) a & (uint32_t) mask) >> 6;
  170|   659k|        t         += (*(const uint32_t *) l & (uint32_t) mask) >> 6;
  171|   659k|        t *= (uint32_t) mul;
  172|   659k|        s = (int) (t >> 24) - 4 - 4;
  173|   659k|        break;
  174|      0|    }
  175|   626k|    case TX_32X32: {
  ------------------
  |  Branch (175:5): [True: 626k, False: 8.22M]
  ------------------
  176|   626k|        uint64_t t = (*(const uint64_t *) a & mask) >> 6;
  177|   626k|        t         += (*(const uint64_t *) l & mask) >> 6;
  178|   626k|        t *= mul;
  179|   626k|        s = (int) (t >> 56) - 8 - 8;
  180|   626k|        break;
  181|      0|    }
  182|   291k|    case TX_64X64: {
  ------------------
  |  Branch (182:5): [True: 291k, False: 8.55M]
  ------------------
  183|   291k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  184|   291k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  185|   291k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  186|   291k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  187|   291k|        t *= mul;
  188|   291k|        s = (int) (t >> 56) - 16 - 16;
  189|   291k|        break;
  190|      0|    }
  191|   334k|    case RTX_4X8: {
  ------------------
  |  Branch (191:5): [True: 334k, False: 8.51M]
  ------------------
  192|   334k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  193|   334k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  194|   334k|        t *= 0x04040404U;
  195|   334k|        s = (int) (t >> 24) - 1 - 2;
  196|   334k|        break;
  197|      0|    }
  198|   539k|    case RTX_8X4: {
  ------------------
  |  Branch (198:5): [True: 539k, False: 8.30M]
  ------------------
  199|   539k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  200|   539k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  201|   539k|        t *= 0x04040404U;
  202|   539k|        s = (int) (t >> 24) - 2 - 1;
  203|   539k|        break;
  204|      0|    }
  205|   241k|    case RTX_8X16: {
  ------------------
  |  Branch (205:5): [True: 241k, False: 8.60M]
  ------------------
  206|   241k|        uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
  207|   241k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  208|   241k|        t = (t >> 6) * (uint32_t) mul;
  209|   241k|        s = (int) (t >> 24) - 2 - 4;
  210|   241k|        break;
  211|      0|    }
  212|   462k|    case RTX_16X8: {
  ------------------
  |  Branch (212:5): [True: 462k, False: 8.38M]
  ------------------
  213|   462k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  214|   462k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  215|   462k|        t = (t >> 6) * (uint32_t) mul;
  216|   462k|        s = (int) (t >> 24) - 4 - 2;
  217|   462k|        break;
  218|      0|    }
  219|   121k|    case RTX_16X32: {
  ------------------
  |  Branch (219:5): [True: 121k, False: 8.72M]
  ------------------
  220|   121k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  221|   121k|        t         += *(const uint64_t *) l & mask;
  222|   121k|        t = (t >> 6) * mul;
  223|   121k|        s = (int) (t >> 56) - 4 - 8;
  224|   121k|        break;
  225|      0|    }
  226|   174k|    case RTX_32X16: {
  ------------------
  |  Branch (226:5): [True: 174k, False: 8.67M]
  ------------------
  227|   174k|        uint64_t t = *(const uint64_t *) a & mask;
  228|   174k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  229|   174k|        t = (t >> 6) * mul;
  230|   174k|        s = (int) (t >> 56) - 8 - 4;
  231|   174k|        break;
  232|      0|    }
  233|  22.5k|    case RTX_32X64: {
  ------------------
  |  Branch (233:5): [True: 22.5k, False: 8.82M]
  ------------------
  234|  22.5k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  235|  22.5k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  236|  22.5k|        t         += (*(const uint64_t *) &l[8] & mask) >> 6;
  237|  22.5k|        t *= mul;
  238|  22.5k|        s = (int) (t >> 56) - 8 - 16;
  239|  22.5k|        break;
  240|      0|    }
  241|  34.0k|    case RTX_64X32: {
  ------------------
  |  Branch (241:5): [True: 34.0k, False: 8.81M]
  ------------------
  242|  34.0k|        uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
  243|  34.0k|        t         += (*(const uint64_t *) &a[8] & mask) >> 6;
  244|  34.0k|        t         += (*(const uint64_t *) &l[0] & mask) >> 6;
  245|  34.0k|        t *= mul;
  246|  34.0k|        s = (int) (t >> 56) - 16 - 8;
  247|  34.0k|        break;
  248|      0|    }
  249|   176k|    case RTX_4X16: {
  ------------------
  |  Branch (249:5): [True: 176k, False: 8.67M]
  ------------------
  250|   176k|        uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
  251|   176k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  252|   176k|        t = (t >> 6) * (uint32_t) mul;
  253|   176k|        s = (int) (t >> 24) - 1 - 4;
  254|   176k|        break;
  255|      0|    }
  256|   363k|    case RTX_16X4: {
  ------------------
  |  Branch (256:5): [True: 363k, False: 8.48M]
  ------------------
  257|   363k|        uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
  258|   363k|        t         += *(const uint8_t  *) l & (uint32_t) mask;
  259|   363k|        t = (t >> 6) * (uint32_t) mul;
  260|   363k|        s = (int) (t >> 24) - 4 - 1;
  261|   363k|        break;
  262|      0|    }
  263|  92.9k|    case RTX_8X32: {
  ------------------
  |  Branch (263:5): [True: 92.9k, False: 8.75M]
  ------------------
  264|  92.9k|        uint64_t t = *(const uint16_t *) a & (uint32_t) mask;
  265|  92.9k|        t         += *(const uint64_t *) l & mask;
  266|  92.9k|        t = (t >> 6) * mul;
  267|  92.9k|        s = (int) (t >> 56) - 2 - 8;
  268|  92.9k|        break;
  269|      0|    }
  270|   191k|    case RTX_32X8: {
  ------------------
  |  Branch (270:5): [True: 191k, False: 8.65M]
  ------------------
  271|   191k|        uint64_t t = *(const uint64_t *) a & mask;
  272|   191k|        t         += *(const uint16_t *) l & (uint32_t) mask;
  273|   191k|        t = (t >> 6) * mul;
  274|   191k|        s = (int) (t >> 56) - 8 - 2;
  275|   191k|        break;
  276|      0|    }
  277|  27.1k|    case RTX_16X64: {
  ------------------
  |  Branch (277:5): [True: 27.1k, False: 8.82M]
  ------------------
  278|  27.1k|        uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
  279|  27.1k|        t         += *(const uint64_t *) &l[0] & mask;
  280|  27.1k|        t = (t >> 6) + ((*(const uint64_t *) &l[8] & mask) >> 6);
  281|  27.1k|        t *= mul;
  282|  27.1k|        s = (int) (t >> 56) - 4 - 16;
  283|  27.1k|        break;
  284|      0|    }
  285|  39.0k|    case RTX_64X16: {
  ------------------
  |  Branch (285:5): [True: 39.0k, False: 8.80M]
  ------------------
  286|  39.0k|        uint64_t t = *(const uint64_t *) &a[0] & mask;
  287|  39.0k|        t         += *(const uint32_t *) l & (uint32_t) mask;
  288|  39.0k|        t = (t >> 6) + ((*(const uint64_t *) &a[8] & mask) >> 6);
  289|  39.0k|        t *= mul;
  290|  39.0k|        s = (int) (t >> 56) - 16 - 4;
  291|  39.0k|        break;
  292|      0|    }
  293|  8.84M|    }
  294|       |
  295|  8.84M|    return (s != 0) + (s > 0);
  296|  8.84M|}
recon_tmpl.c:read_golomb:
   49|  1.78M|static inline unsigned read_golomb(MsacContext *const msac) {
   50|  1.78M|    int len = 0;
   51|  1.78M|    unsigned val = 1;
   52|       |
   53|  3.83M|    while (!dav1d_msac_decode_bool_equi(msac) && len < 32) len++;
  ------------------
  |  |   53|  3.83M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (53:12): [True: 2.04M, False: 1.78M]
  |  Branch (53:50): [True: 2.04M, False: 203]
  ------------------
   54|  3.83M|    while (len--) val = (val << 1) + dav1d_msac_decode_bool_equi(msac);
  ------------------
  |  |   53|  2.04M|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (54:12): [True: 2.04M, False: 1.78M]
  ------------------
   55|       |
   56|  1.78M|    return val - 1;
   57|  1.78M|}
recon_tmpl.c:mc:
  944|  5.81M|{
  945|  5.81M|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (945:5): [True: 5.81M, False: 18.4E]
  ------------------
  946|  5.81M|    const Dav1dFrameContext *const f = t->f;
  947|  5.81M|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (947:24): [True: 3.32M, False: 2.49M]
  |  Branch (947:32): [True: 2.88M, False: 444k]
  ------------------
  948|  5.81M|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (948:24): [True: 3.32M, False: 2.49M]
  |  Branch (948:32): [True: 2.90M, False: 419k]
  ------------------
  949|  5.81M|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
  950|  5.81M|    const int mvx = mv.x, mvy = mv.y;
  951|  5.81M|    const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
  952|  5.81M|    ptrdiff_t ref_stride = refp->p.stride[!!pl];
  953|  5.81M|    const pixel *ref;
  954|       |
  955|  5.81M|    if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
  ------------------
  |  Branch (955:9): [True: 5.36M, False: 452k]
  |  Branch (955:38): [True: 5.24M, False: 120k]
  ------------------
  956|  5.24M|        const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
  957|  5.24M|        const int dy = by * v_mul + (mvy >> (3 + ss_ver));
  958|  5.24M|        int w, h;
  959|       |
  960|  5.24M|        if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
  ------------------
  |  Branch (960:13): [True: 5.17M, False: 73.3k]
  ------------------
  961|  5.17M|            w = (f->cur.p.w + ss_hor) >> ss_hor;
  962|  5.17M|            h = (f->cur.p.h + ss_ver) >> ss_ver;
  963|  5.17M|        } else {
  964|  73.3k|            w = f->bw * 4 >> ss_hor;
  965|  73.3k|            h = f->bh * 4 >> ss_ver;
  966|  73.3k|        }
  967|  5.24M|        if (dx < !!mx * 3 || dy < !!my * 3 ||
  ------------------
  |  Branch (967:13): [True: 108k, False: 5.13M]
  |  Branch (967:30): [True: 67.8k, False: 5.07M]
  ------------------
  968|  5.06M|            dx + bw4 * h_mul + !!mx * 4 > w ||
  ------------------
  |  Branch (968:13): [True: 508k, False: 4.56M]
  ------------------
  969|  4.56M|            dy + bh4 * v_mul + !!my * 4 > h)
  ------------------
  |  Branch (969:13): [True: 220k, False: 4.34M]
  ------------------
  970|   916k|        {
  971|   916k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   916k|#define bitfn(x) x##_8bpc
  ------------------
  972|   916k|            f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
  973|   916k|                                w, h, dx - !!mx * 3, dy - !!my * 3,
  974|   916k|                                emu_edge_buf, 192 * sizeof(pixel),
  975|   916k|                                refp->p.data[pl], ref_stride);
  976|   916k|            ref = &emu_edge_buf[192 * !!my * 3 + !!mx * 3];
  977|   916k|            ref_stride = 192 * sizeof(pixel);
  978|  4.32M|        } else {
  979|  4.32M|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  4.32M|#define PXSTRIDE(x) (x)
  ------------------
  980|  4.32M|        }
  981|       |
  982|  5.24M|        if (dst8 != NULL) {
  ------------------
  |  Branch (982:13): [True: 4.52M, False: 717k]
  ------------------
  983|  4.52M|            f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
  984|  4.52M|                                     bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  985|  4.52M|                                     HIGHBD_CALL_SUFFIX);
  986|  4.52M|        } else {
  987|   717k|            f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
  988|   717k|                                      bh4 * v_mul, mx << !ss_hor, my << !ss_ver
  989|   717k|                                      HIGHBD_CALL_SUFFIX);
  990|   717k|        }
  991|  5.24M|    } else {
  992|   573k|        assert(refp != &f->sr_cur);
  ------------------
  |  Branch (992:9): [True: 577k, False: 18.4E]
  ------------------
  993|       |
  994|   577k|        const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
  995|   577k|        const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
  996|   577k|#define scale_mv(res, val, scale) do { \
  997|   577k|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  998|   577k|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  999|   577k|        } while (0)
 1000|   577k|        int pos_y, pos_x;
 1001|   577k|        scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
  ------------------
  |  |  996|   577k|#define scale_mv(res, val, scale) do { \
  |  |  997|   577k|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|   577k|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|   577k|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 577k]
  |  |  ------------------
  ------------------
 1002|   577k|        scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
  ------------------
  |  |  996|   577k|#define scale_mv(res, val, scale) do { \
  |  |  997|   577k|            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
  |  |  998|   577k|            res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
  |  |  999|   577k|        } while (0)
  |  |  ------------------
  |  |  |  Branch (999:18): [Folded, False: 577k]
  |  |  ------------------
  ------------------
 1003|   577k|#undef scale_mv
 1004|   577k|        const int left = pos_x >> 10;
 1005|   577k|        const int top = pos_y >> 10;
 1006|   577k|        const int right =
 1007|   577k|            ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
 1008|   577k|        const int bottom =
 1009|   577k|            ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
 1010|       |
 1011|   577k|        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|   577k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 577k]
  |  |  ------------------
  |  |   35|   577k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   577k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1012|      0|            printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
 1013|      0|                   left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
 1014|      0|                   right-left, bottom-top,
 1015|      0|                   f->svc[refidx][0].step, f->svc[refidx][1].step);
 1016|       |
 1017|   577k|        const int w = (refp->p.p.w + ss_hor) >> ss_hor;
 1018|   577k|        const int h = (refp->p.p.h + ss_ver) >> ss_ver;
 1019|   577k|        if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
  ------------------
  |  Branch (1019:13): [True: 116k, False: 461k]
  |  Branch (1019:25): [True: 20.6k, False: 440k]
  |  Branch (1019:36): [True: 115k, False: 324k]
  |  Branch (1019:53): [True: 40.4k, False: 284k]
  ------------------
 1020|   291k|            pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|   291k|#define bitfn(x) x##_8bpc
  ------------------
 1021|   291k|            f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
 1022|   291k|                                w, h, left - 3, top - 3,
 1023|   291k|                                emu_edge_buf, 320 * sizeof(pixel),
 1024|   291k|                                refp->p.data[pl], ref_stride);
 1025|   291k|            ref = &emu_edge_buf[320 * 3 + 3];
 1026|   291k|            ref_stride = 320 * sizeof(pixel);
 1027|   291k|            if (DEBUG_BLOCK_INFO) printf("Emu\n");
  ------------------
  |  |   34|   291k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 291k]
  |  |  ------------------
  |  |   35|   291k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   291k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1028|   291k|        } else {
 1029|   285k|            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
  ------------------
  |  |   53|   285k|#define PXSTRIDE(x) (x)
  ------------------
 1030|   285k|        }
 1031|       |
 1032|   577k|        if (dst8 != NULL) {
  ------------------
  |  Branch (1032:13): [True: 505k, False: 72.1k]
  ------------------
 1033|   505k|            f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
 1034|   505k|                                            bw4 * h_mul, bh4 * v_mul,
 1035|   505k|                                            pos_x & 0x3ff, pos_y & 0x3ff,
 1036|   505k|                                            f->svc[refidx][0].step,
 1037|   505k|                                            f->svc[refidx][1].step
 1038|   505k|                                            HIGHBD_CALL_SUFFIX);
 1039|   505k|        } else {
 1040|  72.1k|            f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
 1041|  72.1k|                                             bw4 * h_mul, bh4 * v_mul,
 1042|  72.1k|                                             pos_x & 0x3ff, pos_y & 0x3ff,
 1043|  72.1k|                                             f->svc[refidx][0].step,
 1044|  72.1k|                                             f->svc[refidx][1].step
 1045|  72.1k|                                             HIGHBD_CALL_SUFFIX);
 1046|  72.1k|        }
 1047|   577k|    }
 1048|       |
 1049|  5.82M|    return 0;
 1050|  5.81M|}
recon_tmpl.c:warp_affine:
 1120|   804k|{
 1121|   804k|    assert((dst8 != NULL) ^ (dst16 != NULL));
  ------------------
  |  Branch (1121:5): [True: 804k, False: 18.4E]
  ------------------
 1122|   804k|    const Dav1dFrameContext *const f = t->f;
 1123|   804k|    const Dav1dDSPContext *const dsp = f->dsp;
 1124|   804k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1124:24): [True: 163k, False: 641k]
  |  Branch (1124:32): [True: 139k, False: 23.5k]
  ------------------
 1125|   804k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1125:24): [True: 163k, False: 641k]
  |  Branch (1125:32): [True: 140k, False: 23.0k]
  ------------------
 1126|   804k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1127|   804k|    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
  ------------------
  |  Branch (1127:5): [True: 805k, False: 18.4E]
  |  Branch (1127:5): [True: 805k, False: 5]
  ------------------
 1128|   805k|    const int32_t *const mat = wmp->matrix;
 1129|   805k|    const int width = (refp->p.p.w + ss_hor) >> ss_hor;
 1130|   805k|    const int height = (refp->p.p.h + ss_ver) >> ss_ver;
 1131|       |
 1132|  6.37M|    for (int y = 0; y < b_dim[1] * v_mul; y += 8) {
  ------------------
  |  Branch (1132:21): [True: 5.56M, False: 805k]
  ------------------
 1133|  5.56M|        const int src_y = t->by * 4 + ((y + 4) << ss_ver);
 1134|  5.56M|        const int64_t mat3_y = (int64_t) mat[3] * src_y + mat[0];
 1135|  5.56M|        const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
 1136|  54.0M|        for (int x = 0; x < b_dim[0] * h_mul; x += 8) {
  ------------------
  |  Branch (1136:25): [True: 48.4M, False: 5.56M]
  ------------------
 1137|       |            // calculate transformation relative to center of 8x8 block in
 1138|       |            // luma pixel units
 1139|  48.4M|            const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
 1140|  48.4M|            const int64_t mvx = ((int64_t) mat[2] * src_x + mat3_y) >> ss_hor;
 1141|  48.4M|            const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
 1142|       |
 1143|  48.4M|            const int dx = (int) (mvx >> 16) - 4;
 1144|  48.4M|            const int mx = (((int) mvx & 0xffff) - wmp->u.p.alpha * 4 -
 1145|  48.4M|                                                   wmp->u.p.beta  * 7) & ~0x3f;
 1146|  48.4M|            const int dy = (int) (mvy >> 16) - 4;
 1147|  48.4M|            const int my = (((int) mvy & 0xffff) - wmp->u.p.gamma * 4 -
 1148|  48.4M|                                                   wmp->u.p.delta * 4) & ~0x3f;
 1149|       |
 1150|  48.4M|            const pixel *ref_ptr;
 1151|  48.4M|            ptrdiff_t ref_stride = refp->p.stride[!!pl];
 1152|       |
 1153|  48.4M|            if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
  ------------------
  |  Branch (1153:17): [True: 4.11M, False: 44.3M]
  |  Branch (1153:27): [True: 11.3M, False: 32.9M]
  |  Branch (1153:49): [True: 96.0k, False: 32.8M]
  |  Branch (1153:59): [True: 134k, False: 32.7M]
  ------------------
 1154|  16.5M|                pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
  ------------------
  |  |   51|  16.5M|#define bitfn(x) x##_8bpc
  ------------------
 1155|  16.5M|                f->dsp->mc.emu_edge(15, 15, width, height, dx - 3, dy - 3,
 1156|  16.5M|                                    emu_edge_buf, 32 * sizeof(pixel),
 1157|  16.5M|                                    refp->p.data[pl], ref_stride);
 1158|  16.5M|                ref_ptr = &emu_edge_buf[32 * 3 + 3];
 1159|  16.5M|                ref_stride = 32 * sizeof(pixel);
 1160|  31.9M|            } else {
 1161|  31.9M|                ref_ptr = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
  ------------------
  |  |   53|  31.9M|#define PXSTRIDE(x) (x)
  ------------------
 1162|  31.9M|            }
 1163|  48.4M|            if (dst16 != NULL)
  ------------------
  |  Branch (1163:17): [True: 128k, False: 48.3M]
  ------------------
 1164|   128k|                dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
 1165|   128k|                                 wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1166|  48.3M|            else
 1167|  48.3M|                dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
 1168|  48.3M|                                wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
 1169|  48.4M|        }
 1170|  5.56M|        if (dst8) dst8  += 8 * PXSTRIDE(dstride);
  ------------------
  |  |   53|  5.51M|#define PXSTRIDE(x) (x)
  ------------------
  |  Branch (1170:13): [True: 5.51M, False: 50.9k]
  ------------------
 1171|  50.9k|        else      dst16 += 8 * dstride;
 1172|  5.56M|    }
 1173|   805k|    return 0;
 1174|   804k|}
recon_tmpl.c:obmc:
 1056|   760k|{
 1057|   760k|    assert(!(t->bx & 1) && !(t->by & 1));
  ------------------
  |  Branch (1057:5): [True: 760k, False: 18.4E]
  |  Branch (1057:5): [True: 760k, False: 18.4E]
  ------------------
 1058|   760k|    const Dav1dFrameContext *const f = t->f;
 1059|   760k|    /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
 1060|   760k|    pixel *const lap = bitfn(t->scratch.lap);
  ------------------
  |  |   51|   760k|#define bitfn(x) x##_8bpc
  ------------------
 1061|   760k|    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (1061:24): [True: 491k, False: 269k]
  |  Branch (1061:32): [True: 417k, False: 73.5k]
  ------------------
 1062|   760k|    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (1062:24): [True: 491k, False: 269k]
  |  Branch (1062:32): [True: 422k, False: 68.6k]
  ------------------
 1063|   760k|    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
 1064|   760k|    int res;
 1065|       |
 1066|   760k|    if (t->by > t->ts->tiling.row_start &&
  ------------------
  |  Branch (1066:9): [True: 743k, False: 17.4k]
  ------------------
 1067|   743k|        (!pl || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
  ------------------
  |  Branch (1067:10): [True: 263k, False: 479k]
  |  Branch (1067:17): [True: 181k, False: 298k]
  ------------------
 1068|   445k|    {
 1069|   954k|        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
  ------------------
  |  Branch (1069:32): [True: 510k, False: 443k]
  |  Branch (1069:42): [True: 508k, False: 1.74k]
  ------------------
 1070|       |            // only odd blocks are considered for overlap handling, hence +1
 1071|   508k|            const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
 1072|   508k|            const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
 1073|   508k|            const int step4 = iclip(a_b_dim[0], 2, 16);
 1074|       |
 1075|   508k|            if (a_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1075:17): [True: 476k, False: 32.5k]
  ------------------
 1076|   476k|                const int ow4 = imin(step4, b_dim[0]);
 1077|   476k|                const int oh4 = imin(b_dim[1], 16) >> 1;
 1078|   476k|                res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, (oh4 * 3 + 3) >> 2,
 1079|   476k|                         t->bx + x, t->by, pl, a_r->mv.mv[0],
 1080|   476k|                         &f->refp[a_r->ref.ref[0] - 1], a_r->ref.ref[0] - 1,
 1081|   476k|                         dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
 1082|   476k|                if (res) return res;
  ------------------
  |  Branch (1082:21): [True: 0, False: 476k]
  ------------------
 1083|   476k|                f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
 1084|   476k|                                   h_mul * ow4, v_mul * oh4);
 1085|   476k|                i++;
 1086|   476k|            }
 1087|   508k|            x += step4;
 1088|   508k|        }
 1089|   445k|    }
 1090|       |
 1091|   760k|    if (t->bx > t->ts->tiling.col_start)
  ------------------
  |  Branch (1091:9): [True: 734k, False: 26.6k]
  ------------------
 1092|  1.53M|        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
  ------------------
  |  Branch (1092:32): [True: 807k, False: 732k]
  |  Branch (1092:42): [True: 805k, False: 2.15k]
  ------------------
 1093|       |            // only odd blocks are considered for overlap handling, hence +1
 1094|   805k|            const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
 1095|   805k|            const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
 1096|   805k|            const int step4 = iclip(l_b_dim[1], 2, 16);
 1097|       |
 1098|   805k|            if (l_r->ref.ref[0] > 0) {
  ------------------
  |  Branch (1098:17): [True: 747k, False: 57.3k]
  ------------------
 1099|   747k|                const int ow4 = imin(b_dim[0], 16) >> 1;
 1100|   747k|                const int oh4 = imin(step4, b_dim[1]);
 1101|   747k|                res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
 1102|   747k|                         t->bx, t->by + y, pl, l_r->mv.mv[0],
 1103|   747k|                         &f->refp[l_r->ref.ref[0] - 1], l_r->ref.ref[0] - 1,
 1104|   747k|                         dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
 1105|   747k|                if (res) return res;
  ------------------
  |  Branch (1105:21): [True: 0, False: 747k]
  ------------------
 1106|   747k|                f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
  ------------------
  |  |   53|   747k|#define PXSTRIDE(x) (x)
  ------------------
 1107|   747k|                                   dst_stride, lap, h_mul * ow4, v_mul * oh4);
 1108|   747k|                i++;
 1109|   747k|            }
 1110|   805k|            y += step4;
 1111|   805k|        }
 1112|   760k|    return 0;
 1113|   760k|}
dav1d_read_coef_blocks_16bpc:
  826|  3.97M|{
  827|  3.97M|    const Dav1dFrameContext *const f = t->f;
  828|  3.97M|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  829|  3.97M|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  830|  3.97M|    const int bx4 = t->bx & 31, by4 = t->by & 31;
  831|  3.97M|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
  832|  3.97M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  833|  3.97M|    const int bw4 = b_dim[0], bh4 = b_dim[1];
  834|  3.97M|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
  835|  3.97M|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (835:28): [True: 2.83M, False: 1.14M]
  ------------------
  836|  2.83M|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (836:29): [True: 2.60M, False: 228k]
  |  Branch (836:45): [True: 114k, False: 114k]
  ------------------
  837|  2.72M|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (837:29): [True: 2.48M, False: 236k]
  |  Branch (837:45): [True: 118k, False: 118k]
  ------------------
  838|       |
  839|  3.97M|    if (b->skip) {
  ------------------
  |  Branch (839:9): [True: 1.54M, False: 2.43M]
  ------------------
  840|  1.54M|        BlockContext *const a = t->a;
  841|  1.54M|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
  842|  1.54M|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
  843|  1.54M|        if (has_chroma) {
  ------------------
  |  Branch (843:13): [True: 865k, False: 678k]
  ------------------
  844|   865k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
  845|   865k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
  846|   865k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
  847|   865k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
  848|   865k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
  849|   865k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
  850|   865k|        }
  851|  1.54M|        return;
  852|  1.54M|    }
  853|       |
  854|  2.43M|    Dav1dTileState *const ts = t->ts;
  855|  2.43M|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
  856|  2.43M|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
  857|  2.43M|    assert(t->frame_thread.pass == 1);
  ------------------
  |  Branch (857:5): [True: 2.41M, False: 18.9k]
  ------------------
  858|  2.43M|    assert(!b->skip);
  ------------------
  |  Branch (858:5): [True: 2.41M, False: 18.4E]
  ------------------
  859|  2.41M|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
  860|  2.41M|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->intra ? b->tx : b->max_ytx];
  ------------------
  |  Branch (860:58): [True: 1.68M, False: 723k]
  ------------------
  861|  2.41M|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
  862|       |
  863|  4.88M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (863:26): [True: 2.46M, False: 2.41M]
  ------------------
  864|  2.46M|        const int sub_h4 = imin(h4, 16 + init_y);
  865|  5.04M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (865:30): [True: 2.57M, False: 2.46M]
  ------------------
  866|  2.57M|            const int sub_w4 = imin(w4, init_x + 16);
  867|  2.57M|            int y_off = !!init_y, y, x;
  868|  5.50M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (868:47): [True: 2.93M, False: 2.57M]
  ------------------
  869|  2.93M|                 y += t_dim->h, t->by += t_dim->h, y_off++)
  870|  2.93M|            {
  871|  2.93M|                int x_off = !!init_x;
  872|  9.22M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (872:51): [True: 6.29M, False: 2.93M]
  ------------------
  873|  6.29M|                     x += t_dim->w, t->bx += t_dim->w, x_off++)
  874|  6.29M|                {
  875|  6.29M|                    if (!b->intra) {
  ------------------
  |  Branch (875:25): [True: 2.10M, False: 4.18M]
  ------------------
  876|  2.10M|                        read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
  877|  2.10M|                                       x_off, y_off, NULL);
  878|  4.18M|                    } else {
  879|  4.18M|                        uint8_t cf_ctx = 0x40;
  880|  4.18M|                        enum TxfmType txtp;
  881|  4.18M|                        const int eob =
  882|  4.18M|                            decode_coefs(t, &t->a->lcoef[bx4 + x],
  883|  4.18M|                                         &t->l.lcoef[by4 + y], b->tx, bs, b, 1,
  884|  4.18M|                                         0, ts->frame_thread[1].cf, &txtp, &cf_ctx);
  885|  4.18M|                        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  4.18M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 4.18M]
  |  |  ------------------
  |  |   35|  4.18M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  4.18M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  886|      0|                            printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
  887|      0|                                   b->tx, txtp, eob, ts->msac.rng);
  888|  4.18M|                        *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  889|  4.18M|                        ts->frame_thread[1].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
  890|  4.18M|                        dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
  891|  4.18M|                        dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
  892|  4.18M|                    }
  893|  6.29M|                }
  894|  2.93M|                t->bx -= x;
  895|  2.93M|            }
  896|  2.57M|            t->by -= y;
  897|       |
  898|  2.57M|            if (!has_chroma) continue;
  ------------------
  |  Branch (898:17): [True: 722k, False: 1.85M]
  ------------------
  899|       |
  900|  1.85M|            const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
  901|  1.85M|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
  902|  5.52M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (902:30): [True: 3.67M, False: 1.85M]
  ------------------
  903|  8.07M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (903:61): [True: 4.39M, False: 3.67M]
  ------------------
  904|  4.39M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
  905|  4.39M|                {
  906|  14.5M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (906:65): [True: 10.1M, False: 4.39M]
  ------------------
  907|  10.1M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
  908|  10.1M|                    {
  909|  10.1M|                        uint8_t cf_ctx = 0x40;
  910|  10.1M|                        enum TxfmType txtp;
  911|  10.1M|                        if (!b->intra)
  ------------------
  |  Branch (911:29): [True: 3.70M, False: 6.45M]
  ------------------
  912|  3.70M|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
  913|  3.70M|                                                        bx4 + (x << ss_hor)];
  914|  10.1M|                        const int eob =
  915|  10.1M|                            decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
  916|  10.1M|                                         &t->l.ccoef[pl][cby4 + y], b->uvtx, bs,
  917|  10.1M|                                         b, b->intra, 1 + pl, ts->frame_thread[1].cf,
  918|  10.1M|                                         &txtp, &cf_ctx);
  919|  10.1M|                        if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  10.1M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 10.1M]
  |  |  ------------------
  |  |   35|  10.1M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  10.1M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  920|      0|                            printf("Post-uv-cf-blk[pl=%d,tx=%d,"
  921|      0|                                   "txtp=%d,eob=%d]: r=%d\n",
  922|      0|                                   pl, b->uvtx, txtp, eob, ts->msac.rng);
  923|  10.1M|                        *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
  924|  10.1M|                        ts->frame_thread[1].cf += uv_t_dim->w * uv_t_dim->h * 16;
  925|  10.1M|                        int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
  926|  10.1M|                        int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
  927|  10.1M|                        dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
  928|  10.1M|                        dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
  929|  10.1M|                    }
  930|  4.39M|                    t->bx -= x << ss_hor;
  931|  4.39M|                }
  932|  3.67M|                t->by -= y << ss_ver;
  933|  3.67M|            }
  934|  1.85M|        }
  935|  2.46M|    }
  936|  2.41M|}
dav1d_recon_b_intra_16bpc:
 1179|   946k|{
 1180|   946k|    Dav1dTileState *const ts = t->ts;
 1181|   946k|    const Dav1dFrameContext *const f = t->f;
 1182|   946k|    const Dav1dDSPContext *const dsp = f->dsp;
 1183|   946k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1184|   946k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1185|   946k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1186|   946k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1187|   946k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1188|   946k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1189|   946k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1190|   946k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1191|   946k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1191:28): [True: 581k, False: 365k]
  ------------------
 1192|   581k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1192:29): [True: 512k, False: 69.5k]
  |  Branch (1192:45): [True: 34.9k, False: 34.5k]
  ------------------
 1193|   547k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1193:29): [True: 470k, False: 77.1k]
  |  Branch (1193:45): [True: 38.7k, False: 38.4k]
  ------------------
 1194|   946k|    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
 1195|   946k|    const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
 1196|       |
 1197|       |    // coefficient coding
 1198|   946k|    pixel *const edge = bitfn(t->scratch.edge) + 128;
  ------------------
  |  |   77|   946k|#define bitfn(x) x##_16bpc
  ------------------
 1199|   946k|    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
 1200|       |
 1201|   946k|    const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 1202|       |
 1203|  1.97M|    for (int init_y = 0; init_y < h4; init_y += 16) {
  ------------------
  |  Branch (1203:26): [True: 1.02M, False: 947k]
  ------------------
 1204|  1.02M|        const int sub_h4 = imin(h4, 16 + init_y);
 1205|  1.02M|        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
 1206|  2.19M|        for (int init_x = 0; init_x < w4; init_x += 16) {
  ------------------
  |  Branch (1206:30): [True: 1.16M, False: 1.02M]
  ------------------
 1207|  1.16M|            if (b->pal_sz[0]) {
  ------------------
  |  Branch (1207:17): [True: 9.94k, False: 1.15M]
  ------------------
 1208|  9.94k|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1209|  9.94k|                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1210|  9.94k|                const uint8_t *pal_idx;
 1211|  9.94k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1211:21): [True: 9.94k, False: 1]
  ------------------
 1212|  9.94k|                    const int p = t->frame_thread.pass & 1;
 1213|  9.94k|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1213:21): [True: 9.94k, False: 0]
  ------------------
 1214|  9.94k|                    pal_idx = ts->frame_thread[p].pal_idx;
 1215|  9.94k|                    ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
 1216|  9.94k|                } else {
 1217|      1|                    pal_idx = t->scratch.pal_idx_y;
 1218|      1|                }
 1219|  9.94k|                const pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (1219:42): [True: 9.94k, False: 1]
  ------------------
 1220|  9.94k|                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1221|  9.94k|                                        ((t->bx >> 1) + (t->by & 1))][0] :
 1222|  9.94k|                    bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|      1|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  9.94k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1223|  9.94k|                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
 1224|  9.94k|                                       pal_idx, bw4 * 4, bh4 * 4);
 1225|  9.94k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|  9.94k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 9.94k]
  |  |  ------------------
  |  |   35|  9.94k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  9.94k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1226|      0|                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
 1227|      0|                             bw4 * 4, bh4 * 4, "y-pal-pred");
 1228|  9.94k|            }
 1229|       |
 1230|  1.16M|            const int intra_flags = (sm_flag(t->a, bx4) |
 1231|  1.16M|                                     sm_flag(&t->l, by4) |
 1232|  1.16M|                                     intra_edge_filter_flag);
 1233|  1.16M|            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1233:35): [True: 146k, False: 1.02M]
  |  Branch (1233:58): [True: 78.2k, False: 945k]
  ------------------
 1234|  1.02M|                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
 1235|  1.16M|            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
  ------------------
  |  Branch (1235:35): [True: 146k, False: 1.02M]
  |  Branch (1235:48): [True: 78.2k, False: 945k]
  ------------------
 1236|  1.02M|                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
 1237|  1.16M|            int y, x;
 1238|  1.16M|            const int sub_w4 = imin(w4, init_x + 16);
 1239|  2.64M|            for (y = init_y, t->by += init_y; y < sub_h4;
  ------------------
  |  Branch (1239:47): [True: 1.47M, False: 1.17M]
  ------------------
 1240|  1.47M|                 y += t_dim->h, t->by += t_dim->h)
 1241|  1.47M|            {
 1242|  1.47M|                pixel *dst = ((pixel *) f->cur.data[0]) +
 1243|  1.47M|                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
 1244|  1.47M|                                    t->bx + init_x);
 1245|  6.02M|                for (x = init_x, t->bx += init_x; x < sub_w4;
  ------------------
  |  Branch (1245:51): [True: 4.54M, False: 1.47M]
  ------------------
 1246|  4.54M|                     x += t_dim->w, t->bx += t_dim->w)
 1247|  4.54M|                {
 1248|  4.54M|                    if (b->pal_sz[0]) goto skip_y_pred;
  ------------------
  |  Branch (1248:25): [True: 65.5k, False: 4.47M]
  ------------------
 1249|       |
 1250|  4.47M|                    int angle = b->y_angle;
 1251|  4.47M|                    const enum EdgeFlags edge_flags =
 1252|  4.47M|                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
  ------------------
  |  Branch (1252:28): [True: 3.00M, False: 1.47M]
  |  Branch (1252:42): [True: 471k, False: 1.00M]
  |  Branch (1252:57): [True: 655k, False: 2.82M]
  ------------------
 1253|  3.82M|                             0 : EDGE_I444_TOP_HAS_RIGHT) |
 1254|  4.47M|                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
  ------------------
  |  Branch (1254:27): [True: 3.02M, False: 1.45M]
  |  Branch (1254:42): [True: 881k, False: 571k]
  |  Branch (1254:56): [True: 670k, False: 210k]
  ------------------
 1255|  3.69M|                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1256|  4.47M|                    const pixel *top_sb_edge = NULL;
 1257|  4.47M|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1257:25): [True: 542k, False: 3.93M]
  ------------------
 1258|   542k|                        top_sb_edge = f->ipred_edge[0];
 1259|   542k|                        const int sby = t->by >> f->sb_shift;
 1260|   542k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1261|   542k|                    }
 1262|  4.47M|                    const enum IntraPredMode m =
 1263|  4.47M|                        bytefn(dav1d_prepare_intra_edges)(t->bx,
  ------------------
  |  |   87|  4.47M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  4.47M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1264|  4.47M|                                                          t->bx > ts->tiling.col_start,
 1265|  4.47M|                                                          t->by,
 1266|  4.47M|                                                          t->by > ts->tiling.row_start,
 1267|  4.47M|                                                          ts->tiling.col_end,
 1268|  4.47M|                                                          ts->tiling.row_end,
 1269|  4.47M|                                                          edge_flags, dst,
 1270|  4.47M|                                                          f->cur.stride[0], top_sb_edge,
 1271|  4.47M|                                                          b->y_mode, &angle,
 1272|  4.47M|                                                          t_dim->w, t_dim->h,
 1273|  4.47M|                                                          f->seq_hdr->intra_edge_filter,
 1274|  4.47M|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  4.47M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1275|  4.47M|                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
 1276|  4.47M|                                             t_dim->w * 4, t_dim->h * 4,
 1277|  4.47M|                                             angle | intra_flags,
 1278|  4.47M|                                             4 * f->bw - 4 * t->bx,
 1279|  4.47M|                                             4 * f->bh - 4 * t->by
 1280|  4.47M|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  4.47M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1281|       |
 1282|  4.47M|                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  4.47M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 4.47M]
  |  |  ------------------
  |  |   35|  4.47M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  4.47M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1283|      0|                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
 1284|      0|                                 t_dim->h * 4, 2, "l");
 1285|      0|                        hex_dump(edge, 0, 1, 1, "tl");
 1286|      0|                        hex_dump(edge + 1, t_dim->w * 4,
 1287|      0|                                 t_dim->w * 4, 2, "t");
 1288|      0|                        hex_dump(dst, f->cur.stride[0],
 1289|      0|                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
 1290|      0|                    }
 1291|       |
 1292|  4.54M|                skip_y_pred: {}
 1293|  4.54M|                    if (!b->skip) {
  ------------------
  |  Branch (1293:25): [True: 999k, False: 3.54M]
  ------------------
 1294|   999k|                        coef *cf;
 1295|   999k|                        int eob;
 1296|   999k|                        enum TxfmType txtp;
 1297|   999k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1297:29): [True: 999k, False: 18.4E]
  ------------------
 1298|   999k|                            const int p = t->frame_thread.pass & 1;
 1299|   999k|                            const int cbi = *ts->frame_thread[p].cbi++;
 1300|   999k|                            cf = ts->frame_thread[p].cf;
 1301|   999k|                            ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
 1302|   999k|                            eob  = cbi >> 5;
 1303|   999k|                            txtp = cbi & 0x1f;
 1304|  18.4E|                        } else {
 1305|  18.4E|                            uint8_t cf_ctx;
 1306|  18.4E|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|  18.4E|#define bitfn(x) x##_16bpc
  ------------------
 1307|  18.4E|                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
 1308|  18.4E|                                               &t->l.lcoef[by4 + y], b->tx, bs,
 1309|  18.4E|                                               b, 1, 0, cf, &txtp, &cf_ctx);
 1310|  18.4E|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1311|      0|                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
 1312|      0|                                       b->tx, txtp, eob, ts->msac.rng);
 1313|  18.4E|                            dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
 1314|  18.4E|                            dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
 1315|  18.4E|                        }
 1316|   999k|                        if (eob >= 0) {
  ------------------
  |  Branch (1316:29): [True: 760k, False: 239k]
  ------------------
 1317|   760k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   760k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 760k]
  |  |  ------------------
  |  |   35|   760k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   760k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1318|      0|                                coef_dump(cf, imin(t_dim->h, 8) * 4,
 1319|      0|                                          imin(t_dim->w, 8) * 4, 3, "dq");
 1320|   760k|                            dsp->itx.itxfm_add[b->tx]
 1321|   760k|                                              [txtp](dst,
 1322|   760k|                                                     f->cur.stride[0],
 1323|   760k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   760k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1324|   760k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   760k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 760k]
  |  |  ------------------
  |  |   35|   760k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   760k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1325|      0|                                hex_dump(dst, f->cur.stride[0],
 1326|      0|                                         t_dim->w * 4, t_dim->h * 4, "recon");
 1327|   760k|                        }
 1328|  3.54M|                    } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1328:32): [True: 0, False: 3.54M]
  ------------------
 1329|      0|                        dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
 1330|      0|                        dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
 1331|      0|                    }
 1332|  4.54M|                    dst += 4 * t_dim->w;
 1333|  4.54M|                }
 1334|  1.47M|                t->bx -= x;
 1335|  1.47M|            }
 1336|  1.17M|            t->by -= y;
 1337|       |
 1338|  1.17M|            if (!has_chroma) continue;
  ------------------
  |  Branch (1338:17): [True: 498k, False: 673k]
  ------------------
 1339|       |
 1340|   673k|            const ptrdiff_t stride = f->cur.stride[1];
 1341|       |
 1342|   673k|            if (b->uv_mode == CFL_PRED) {
  ------------------
  |  Branch (1342:17): [True: 53.6k, False: 620k]
  ------------------
 1343|  53.6k|                assert(!init_x && !init_y);
  ------------------
  |  Branch (1343:17): [True: 53.6k, False: 18.4E]
  |  Branch (1343:17): [True: 53.6k, False: 0]
  ------------------
 1344|       |
 1345|  53.6k|                int16_t *const ac = t->scratch.ac;
 1346|  53.6k|                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
 1347|  53.6k|                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
 1348|  53.6k|                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
 1349|  53.6k|                                              (t->by >> ss_ver) * PXSTRIDE(stride));
 1350|  53.6k|                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
 1351|  53.6k|                                           ((pixel *) f->cur.data[2]) + uv_off };
 1352|       |
 1353|  53.6k|                const int furthest_r =
 1354|  53.6k|                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
 1355|  53.6k|                const int furthest_b =
 1356|  53.6k|                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
 1357|  53.6k|                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
 1358|  53.6k|                                                         cbw4 - (furthest_r >> ss_hor),
 1359|  53.6k|                                                         cbh4 - (furthest_b >> ss_ver),
 1360|  53.6k|                                                         cbw4 * 4, cbh4 * 4);
 1361|   160k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1361:34): [True: 107k, False: 53.6k]
  ------------------
 1362|   107k|                    if (!b->cfl_alpha[pl]) continue;
  ------------------
  |  Branch (1362:25): [True: 21.3k, False: 85.9k]
  ------------------
 1363|  85.9k|                    int angle = 0;
 1364|  85.9k|                    const pixel *top_sb_edge = NULL;
 1365|  85.9k|                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1365:25): [True: 9.91k, False: 76.0k]
  ------------------
 1366|  9.91k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1367|  9.91k|                        const int sby = t->by >> f->sb_shift;
 1368|  9.91k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1369|  9.91k|                    }
 1370|  85.9k|                    const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1371|  85.9k|                    const int xstart = ts->tiling.col_start >> ss_hor;
 1372|  85.9k|                    const int ystart = ts->tiling.row_start >> ss_ver;
 1373|  85.9k|                    const enum IntraPredMode m =
 1374|  85.9k|                        bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  85.9k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  85.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1375|  85.9k|                                                          ypos, ypos > ystart,
 1376|  85.9k|                                                          ts->tiling.col_end >> ss_hor,
 1377|  85.9k|                                                          ts->tiling.row_end >> ss_ver,
 1378|  85.9k|                                                          0, uv_dst[pl], stride,
 1379|  85.9k|                                                          top_sb_edge, DC_PRED, &angle,
 1380|  85.9k|                                                          uv_t_dim->w, uv_t_dim->h, 0,
 1381|  85.9k|                                                          edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  85.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1382|  85.9k|                    dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
 1383|  85.9k|                                           uv_t_dim->w * 4,
 1384|  85.9k|                                           uv_t_dim->h * 4,
 1385|  85.9k|                                           ac, b->cfl_alpha[pl]
 1386|  85.9k|                                           HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  85.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1387|  85.9k|                }
 1388|  53.6k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  53.6k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 53.6k]
  |  |  ------------------
  |  |   35|  53.6k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  53.6k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1389|      0|                    ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
 1390|      0|                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
 1391|      0|                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
 1392|      0|                }
 1393|   620k|            } else if (b->pal_sz[1]) {
  ------------------
  |  Branch (1393:24): [True: 1.83k, False: 618k]
  ------------------
 1394|  1.83k|                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
 1395|  1.83k|                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1396|  1.83k|                const pixel (*pal)[8];
 1397|  1.83k|                const uint8_t *pal_idx;
 1398|  1.83k|                if (t->frame_thread.pass) {
  ------------------
  |  Branch (1398:21): [True: 1.83k, False: 0]
  ------------------
 1399|  1.83k|                    const int p = t->frame_thread.pass & 1;
 1400|  1.83k|                    assert(ts->frame_thread[p].pal_idx);
  ------------------
  |  Branch (1400:21): [True: 1.83k, False: 0]
  ------------------
 1401|  1.83k|                    pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 1402|  1.83k|                                              ((t->bx >> 1) + (t->by & 1))];
 1403|  1.83k|                    pal_idx = ts->frame_thread[p].pal_idx;
 1404|  1.83k|                    ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
 1405|  1.83k|                } else {
 1406|      0|                    pal = bytefn(t->scratch.pal);
  ------------------
  |  |   87|      0|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|      0|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1407|      0|                    pal_idx = t->scratch.pal_idx_uv;
 1408|      0|                }
 1409|       |
 1410|  1.83k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
 1411|  1.83k|                                       f->cur.stride[1], pal[1],
 1412|  1.83k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1413|  1.83k|                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
 1414|  1.83k|                                       f->cur.stride[1], pal[2],
 1415|  1.83k|                                       pal_idx, cbw4 * 4, cbh4 * 4);
 1416|  1.83k|                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  1.83k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 1.83k]
  |  |  ------------------
  |  |   35|  1.83k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  1.83k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1417|      0|                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
 1418|      0|                             PXSTRIDE(f->cur.stride[1]),
 1419|      0|                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
 1420|      0|                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
 1421|      0|                             PXSTRIDE(f->cur.stride[1]),
 1422|      0|                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
 1423|      0|                }
 1424|  1.83k|            }
 1425|       |
 1426|   673k|            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
 1427|   673k|                                 sm_uv_flag(&t->l, cby4);
 1428|   673k|            const int uv_sb_has_tr =
 1429|   673k|                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
  ------------------
  |  Branch (1429:17): [True: 106k, False: 567k]
  |  Branch (1429:55): [True: 56.9k, False: 510k]
  ------------------
 1430|   567k|                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
 1431|   673k|            const int uv_sb_has_bl =
 1432|   673k|                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
  ------------------
  |  Branch (1432:17): [True: 106k, False: 567k]
  |  Branch (1432:30): [True: 56.9k, False: 510k]
  ------------------
 1433|   567k|                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
 1434|   673k|            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
 1435|  2.01M|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1435:30): [True: 1.34M, False: 672k]
  ------------------
 1436|  3.30M|                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
  ------------------
  |  Branch (1436:61): [True: 1.95M, False: 1.34M]
  ------------------
 1437|  1.95M|                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
 1438|  1.95M|                {
 1439|  1.95M|                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
 1440|  1.95M|                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
 1441|  1.95M|                                        ((t->bx + init_x) >> ss_hor));
 1442|  9.13M|                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
  ------------------
  |  Branch (1442:65): [True: 7.18M, False: 1.95M]
  ------------------
 1443|  7.18M|                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
 1444|  7.18M|                    {
 1445|  7.18M|                        if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
  ------------------
  |  Branch (1445:30): [True: 107k, False: 7.07M]
  |  Branch (1445:56): [True: 85.9k, False: 21.3k]
  ------------------
 1446|  7.09M|                            b->pal_sz[1])
  ------------------
  |  Branch (1446:29): [True: 10.4k, False: 7.08M]
  ------------------
 1447|  96.7k|                        {
 1448|  96.7k|                            goto skip_uv_pred;
 1449|  96.7k|                        }
 1450|       |
 1451|  7.08M|                        int angle = b->uv_angle;
 1452|       |                        // this probably looks weird because we're using
 1453|       |                        // luma flags in a chroma loop, but that's because
 1454|       |                        // prepare_intra_edges() expects luma flags as input
 1455|  7.08M|                        const enum EdgeFlags edge_flags =
 1456|  7.08M|                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
  ------------------
  |  Branch (1456:32): [True: 5.44M, False: 1.64M]
  |  Branch (1456:58): [True: 461k, False: 1.18M]
  ------------------
 1457|  5.90M|                              (x + uv_t_dim->w >= sub_cw4)) ?
  ------------------
  |  Branch (1457:31): [True: 974k, False: 4.93M]
  ------------------
 1458|  6.11M|                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
 1459|  7.08M|                            ((x > (init_x >> ss_hor) ||
  ------------------
  |  Branch (1459:31): [True: 5.21M, False: 1.86M]
  ------------------
 1460|  1.86M|                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
  ------------------
  |  Branch (1460:32): [True: 961k, False: 905k]
  |  Branch (1460:49): [True: 667k, False: 293k]
  ------------------
 1461|  5.89M|                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
 1462|  7.08M|                        const pixel *top_sb_edge = NULL;
 1463|  7.08M|                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
  ------------------
  |  Branch (1463:29): [True: 722k, False: 6.36M]
  ------------------
 1464|   722k|                            top_sb_edge = f->ipred_edge[1 + pl];
 1465|   722k|                            const int sby = t->by >> f->sb_shift;
 1466|   722k|                            top_sb_edge += f->sb128w * 128 * (sby - 1);
 1467|   722k|                        }
 1468|  7.08M|                        const enum IntraPredMode uv_mode =
 1469|  7.08M|                             b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
  ------------------
  |  Branch (1469:30): [True: 21.3k, False: 7.06M]
  ------------------
 1470|  7.08M|                        const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
 1471|  7.08M|                        const int xstart = ts->tiling.col_start >> ss_hor;
 1472|  7.08M|                        const int ystart = ts->tiling.row_start >> ss_ver;
 1473|  7.08M|                        const enum IntraPredMode m =
 1474|  7.08M|                            bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
  ------------------
  |  |   87|  7.08M|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  7.08M|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1475|  7.08M|                                                              ypos, ypos > ystart,
 1476|  7.08M|                                                              ts->tiling.col_end >> ss_hor,
 1477|  7.08M|                                                              ts->tiling.row_end >> ss_ver,
 1478|  7.08M|                                                              edge_flags, dst, stride,
 1479|  7.08M|                                                              top_sb_edge, uv_mode,
 1480|  7.08M|                                                              &angle, uv_t_dim->w,
 1481|  7.08M|                                                              uv_t_dim->h,
 1482|  7.08M|                                                              f->seq_hdr->intra_edge_filter,
 1483|  7.08M|                                                              edge HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  7.08M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1484|  7.08M|                        angle |= intra_edge_filter_flag;
 1485|  7.08M|                        dsp->ipred.intra_pred[m](dst, stride, edge,
 1486|  7.08M|                                                 uv_t_dim->w * 4,
 1487|  7.08M|                                                 uv_t_dim->h * 4,
 1488|  7.08M|                                                 angle | sm_uv_fl,
 1489|  7.08M|                                                 (4 * f->bw + ss_hor -
 1490|  7.08M|                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
 1491|  7.08M|                                                 (4 * f->bh + ss_ver -
 1492|  7.08M|                                                  4 * (t->by & ~ss_ver)) >> ss_ver
 1493|  7.08M|                                                 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  7.08M|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1494|  7.08M|                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|  7.08M|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 7.08M]
  |  |  ------------------
  |  |   35|  7.08M|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  7.08M|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1495|      0|                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
 1496|      0|                                     uv_t_dim->h * 4, 2, "l");
 1497|      0|                            hex_dump(edge, 0, 1, 1, "tl");
 1498|      0|                            hex_dump(edge + 1, uv_t_dim->w * 4,
 1499|      0|                                     uv_t_dim->w * 4, 2, "t");
 1500|      0|                            hex_dump(dst, stride, uv_t_dim->w * 4,
 1501|      0|                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
  ------------------
  |  Branch (1501:55): [True: 0, False: 0]
  ------------------
 1502|      0|                        }
 1503|       |
 1504|  7.18M|                    skip_uv_pred: {}
 1505|  7.18M|                        if (!b->skip) {
  ------------------
  |  Branch (1505:29): [True: 1.03M, False: 6.15M]
  ------------------
 1506|  1.03M|                            enum TxfmType txtp;
 1507|  1.03M|                            int eob;
 1508|  1.03M|                            coef *cf;
 1509|  1.03M|                            if (t->frame_thread.pass) {
  ------------------
  |  Branch (1509:33): [True: 1.03M, False: 18.4E]
  ------------------
 1510|  1.03M|                                const int p = t->frame_thread.pass & 1;
 1511|  1.03M|                                const int cbi = *ts->frame_thread[p].cbi++;
 1512|  1.03M|                                cf = ts->frame_thread[p].cf;
 1513|  1.03M|                                ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
 1514|  1.03M|                                eob  = cbi >> 5;
 1515|  1.03M|                                txtp = cbi & 0x1f;
 1516|  18.4E|                            } else {
 1517|  18.4E|                                uint8_t cf_ctx;
 1518|  18.4E|                                cf = bitfn(t->cf);
  ------------------
  |  |   77|  18.4E|#define bitfn(x) x##_16bpc
  ------------------
 1519|  18.4E|                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1520|  18.4E|                                                   &t->l.ccoef[pl][cby4 + y],
 1521|  18.4E|                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
 1522|  18.4E|                                                   &txtp, &cf_ctx);
 1523|  18.4E|                                if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1524|      0|                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1525|      0|                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
 1526|      0|                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
 1527|  18.4E|                                int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1528|  18.4E|                                int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1529|  18.4E|                                dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1530|  18.4E|                                dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1531|  18.4E|                            }
 1532|  1.03M|                            if (eob >= 0) {
  ------------------
  |  Branch (1532:33): [True: 263k, False: 766k]
  ------------------
 1533|   263k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   263k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 263k]
  |  |  ------------------
  |  |   35|   263k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   263k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1534|      0|                                    coef_dump(cf, uv_t_dim->h * 4,
 1535|      0|                                              uv_t_dim->w * 4, 3, "dq");
 1536|   263k|                                dsp->itx.itxfm_add[b->uvtx]
 1537|   263k|                                                  [txtp](dst, stride,
 1538|   263k|                                                         cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   263k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1539|   263k|                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   263k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 263k]
  |  |  ------------------
  |  |   35|   263k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   263k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                              if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1540|      0|                                    hex_dump(dst, stride, uv_t_dim->w * 4,
 1541|      0|                                             uv_t_dim->h * 4, "recon");
 1542|   263k|                            }
 1543|  6.15M|                        } else if (!t->frame_thread.pass) {
  ------------------
  |  Branch (1543:36): [True: 0, False: 6.15M]
  ------------------
 1544|      0|                            dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
 1545|      0|                            dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
 1546|      0|                        }
 1547|  7.18M|                        dst += uv_t_dim->w * 4;
 1548|  7.18M|                    }
 1549|  1.95M|                    t->bx -= x << ss_hor;
 1550|  1.95M|                }
 1551|  1.34M|                t->by -= y << ss_ver;
 1552|  1.34M|            }
 1553|   673k|        }
 1554|  1.02M|    }
 1555|   946k|}
dav1d_recon_b_inter_16bpc:
 1559|   802k|{
 1560|   802k|    Dav1dTileState *const ts = t->ts;
 1561|   802k|    const Dav1dFrameContext *const f = t->f;
 1562|   802k|    const Dav1dDSPContext *const dsp = f->dsp;
 1563|   802k|    const int bx4 = t->bx & 31, by4 = t->by & 31;
 1564|   802k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1565|   802k|    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 1566|   802k|    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
 1567|   802k|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
 1568|   802k|    const int bw4 = b_dim[0], bh4 = b_dim[1];
 1569|   802k|    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
 1570|   802k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
  ------------------
  |  Branch (1570:28): [True: 390k, False: 411k]
  ------------------
 1571|   390k|                           (bw4 > ss_hor || t->bx & 1) &&
  ------------------
  |  Branch (1571:29): [True: 336k, False: 54.5k]
  |  Branch (1571:45): [True: 27.0k, False: 27.4k]
  ------------------
 1572|   363k|                           (bh4 > ss_ver || t->by & 1);
  ------------------
  |  Branch (1572:29): [True: 310k, False: 53.0k]
  |  Branch (1572:45): [True: 26.3k, False: 26.6k]
  ------------------
 1573|   802k|    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
  ------------------
  |  Branch (1573:32): [True: 410k, False: 392k]
  ------------------
 1574|   802k|                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
 1575|   802k|    int res;
 1576|       |
 1577|       |    // prediction
 1578|   802k|    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
 1579|   802k|    pixel *dst = ((pixel *) f->cur.data[0]) +
 1580|   802k|        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
 1581|   802k|    const ptrdiff_t uvdstoff =
 1582|   802k|        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
 1583|   802k|    if (IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|   802k|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|   802k|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  |  |  |  Branch (43:5): [True: 12.3k, False: 789k]
  |  |  ------------------
  ------------------
 1584|       |        // intrabc
 1585|  12.3k|        assert(!f->frame_hdr->super_res.enabled);
  ------------------
  |  Branch (1585:9): [True: 12.3k, False: 0]
  ------------------
 1586|  12.3k|        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
 1587|  12.3k|                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1588|  12.3k|        if (res) return res;
  ------------------
  |  Branch (1588:13): [True: 0, False: 12.3k]
  ------------------
 1589|  25.7k|        if (has_chroma) for (int pl = 1; pl < 3; pl++) {
  ------------------
  |  Branch (1589:13): [True: 8.59k, False: 3.74k]
  |  Branch (1589:42): [True: 17.1k, False: 8.59k]
  ------------------
 1590|  17.1k|            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
 1591|  17.1k|                     bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1592|  17.1k|                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
 1593|  17.1k|                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
 1594|  17.1k|            if (res) return res;
  ------------------
  |  Branch (1594:17): [True: 0, False: 17.1k]
  ------------------
 1595|  17.1k|        }
 1596|   789k|    } else if (b->comp_type == COMP_INTER_NONE) {
  ------------------
  |  Branch (1596:16): [True: 719k, False: 70.2k]
  ------------------
 1597|   719k|        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
 1598|   719k|        const enum Filter2d filter_2d = b->filter2d;
 1599|       |
 1600|   719k|        if (imin(bw4, bh4) > 1 &&
  ------------------
  |  Branch (1600:13): [True: 573k, False: 145k]
  ------------------
 1601|   573k|            ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1601:15): [True: 374k, False: 199k]
  |  Branch (1601:44): [True: 81.5k, False: 292k]
  ------------------
 1602|   492k|             (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1602:15): [True: 46.2k, False: 446k]
  |  Branch (1602:44): [True: 40.5k, False: 5.68k]
  ------------------
 1603|   122k|        {
 1604|   122k|            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
 1605|   122k|                              b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1605:31): [True: 40.5k, False: 81.5k]
  ------------------
 1606|   122k|                                  &f->frame_hdr->gmv[b->ref[0]]);
 1607|   122k|            if (res) return res;
  ------------------
  |  Branch (1607:17): [True: 0, False: 122k]
  ------------------
 1608|   597k|        } else {
 1609|   597k|            res = mc(t, dst, NULL, f->cur.stride[0],
 1610|   597k|                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
 1611|   597k|            if (res) return res;
  ------------------
  |  Branch (1611:17): [True: 0, False: 597k]
  ------------------
 1612|   597k|            if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1612:17): [True: 83.4k, False: 514k]
  ------------------
 1613|  83.4k|                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
 1614|  83.4k|                if (res) return res;
  ------------------
  |  Branch (1614:21): [True: 0, False: 83.4k]
  ------------------
 1615|  83.4k|            }
 1616|   597k|        }
 1617|   719k|        if (b->interintra_type) {
  ------------------
  |  Branch (1617:13): [True: 16.3k, False: 703k]
  ------------------
 1618|  16.3k|            pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  16.3k|#define bitfn(x) x##_16bpc
  ------------------
 1619|  16.3k|            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1619:36): [True: 2.65k, False: 13.7k]
  ------------------
 1620|  13.7k|                                   SMOOTH_PRED : b->interintra_mode;
 1621|  16.3k|            pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  16.3k|#define bitfn(x) x##_16bpc
  ------------------
 1622|  16.3k|            int angle = 0;
 1623|  16.3k|            const pixel *top_sb_edge = NULL;
 1624|  16.3k|            if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1624:17): [True: 2.08k, False: 14.2k]
  ------------------
 1625|  2.08k|                top_sb_edge = f->ipred_edge[0];
 1626|  2.08k|                const int sby = t->by >> f->sb_shift;
 1627|  2.08k|                top_sb_edge += f->sb128w * 128 * (sby - 1);
 1628|  2.08k|            }
 1629|  16.3k|            m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
  ------------------
  |  |   87|  16.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  16.3k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1630|  16.3k|                                                  t->by, t->by > ts->tiling.row_start,
 1631|  16.3k|                                                  ts->tiling.col_end, ts->tiling.row_end,
 1632|  16.3k|                                                  0, dst, f->cur.stride[0], top_sb_edge,
 1633|  16.3k|                                                  m, &angle, bw4, bh4, 0, tl_edge
 1634|  16.3k|                                                  HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  16.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1635|  16.3k|            dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
 1636|  16.3k|                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
 1637|  16.3k|                                     HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  16.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1638|  16.3k|            dsp->mc.blend(dst, f->cur.stride[0], tmp,
 1639|  16.3k|                          bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
  ------------------
  |  |   83|  16.3k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  16.3k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 12.5k, False: 3.85k]
  |  |  ------------------
  |  |   85|  16.3k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  16.3k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1640|  16.3k|        }
 1641|       |
 1642|   719k|        if (!has_chroma) goto skip_inter_chroma_pred;
  ------------------
  |  Branch (1642:13): [True: 417k, False: 302k]
  ------------------
 1643|       |
 1644|       |        // sub8x8 derivation
 1645|   302k|        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
  ------------------
  |  Branch (1645:25): [True: 19.2k, False: 283k]
  |  Branch (1645:42): [True: 20.9k, False: 262k]
  ------------------
 1646|   302k|        refmvs_block *const *r;
 1647|   302k|        if (is_sub8x8) {
  ------------------
  |  Branch (1647:13): [True: 41.8k, False: 260k]
  ------------------
 1648|  41.8k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1648:13): [True: 41.8k, False: 0]
  ------------------
 1649|  41.8k|            r = &t->rt.r[(t->by & 31) + 5];
 1650|  41.8k|            if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
  ------------------
  |  Branch (1650:17): [True: 20.8k, False: 21.0k]
  ------------------
 1651|  41.8k|            if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
  ------------------
  |  Branch (1651:17): [True: 25.0k, False: 16.8k]
  ------------------
 1652|  41.8k|            if (bw4 == 1 && bh4 == ss_ver)
  ------------------
  |  Branch (1652:17): [True: 20.8k, False: 21.0k]
  |  Branch (1652:29): [True: 4.00k, False: 16.8k]
  ------------------
 1653|  4.00k|                is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
 1654|  41.8k|        }
 1655|       |
 1656|       |        // chroma prediction
 1657|   302k|        if (is_sub8x8) {
  ------------------
  |  Branch (1657:13): [True: 35.7k, False: 266k]
  ------------------
 1658|  35.7k|            assert(ss_hor == 1);
  ------------------
  |  Branch (1658:13): [True: 35.7k, False: 0]
  ------------------
 1659|  35.7k|            ptrdiff_t h_off = 0, v_off = 0;
 1660|  35.7k|            if (bw4 == 1 && bh4 == ss_ver) {
  ------------------
  |  Branch (1660:17): [True: 17.9k, False: 17.8k]
  |  Branch (1660:29): [True: 2.99k, False: 14.9k]
  ------------------
 1661|  8.97k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1661:34): [True: 5.98k, False: 2.99k]
  ------------------
 1662|  5.98k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1663|  5.98k|                             NULL, f->cur.stride[1],
 1664|  5.98k|                             bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
 1665|  5.98k|                             r[-1][t->bx - 1].mv.mv[0],
 1666|  5.98k|                             &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
 1667|  5.98k|                             r[-1][t->bx - 1].ref.ref[0] - 1,
 1668|  5.98k|                             t->frame_thread.pass != 2 ? t->tl_4x4_filter :
  ------------------
  |  Branch (1668:30): [True: 0, False: 5.98k]
  ------------------
 1669|  5.98k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
 1670|  5.98k|                    if (res) return res;
  ------------------
  |  Branch (1670:25): [True: 0, False: 5.98k]
  ------------------
 1671|  5.98k|                }
 1672|  2.99k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1673|  2.99k|                h_off = 2;
 1674|  2.99k|            }
 1675|  35.7k|            if (bw4 == 1) {
  ------------------
  |  Branch (1675:17): [True: 17.9k, False: 17.8k]
  ------------------
 1676|  17.9k|                const enum Filter2d left_filter_2d =
 1677|  17.9k|                    dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
 1678|  53.7k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1678:34): [True: 35.8k, False: 17.9k]
  ------------------
 1679|  35.8k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
 1680|  35.8k|                             f->cur.stride[1], bw4, bh4, t->bx - 1,
 1681|  35.8k|                             t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
 1682|  35.8k|                             &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
 1683|  35.8k|                             r[0][t->bx - 1].ref.ref[0] - 1,
 1684|  35.8k|                             t->frame_thread.pass != 2 ? left_filter_2d :
  ------------------
  |  Branch (1684:30): [True: 0, False: 35.8k]
  ------------------
 1685|  35.8k|                                 f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
 1686|  35.8k|                    if (res) return res;
  ------------------
  |  Branch (1686:25): [True: 0, False: 35.8k]
  ------------------
 1687|  35.8k|                }
 1688|  17.9k|                h_off = 2;
 1689|  17.9k|            }
 1690|  35.7k|            if (bh4 == ss_ver) {
  ------------------
  |  Branch (1690:17): [True: 20.8k, False: 14.9k]
  ------------------
 1691|  20.8k|                const enum Filter2d top_filter_2d =
 1692|  20.8k|                    dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
 1693|  62.5k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1693:34): [True: 41.6k, False: 20.8k]
  ------------------
 1694|  41.6k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
 1695|  41.6k|                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
 1696|  41.6k|                             1 + pl, r[-1][t->bx].mv.mv[0],
 1697|  41.6k|                             &f->refp[r[-1][t->bx].ref.ref[0] - 1],
 1698|  41.6k|                             r[-1][t->bx].ref.ref[0] - 1,
 1699|  41.6k|                             t->frame_thread.pass != 2 ? top_filter_2d :
  ------------------
  |  Branch (1699:30): [True: 0, False: 41.6k]
  ------------------
 1700|  41.6k|                                 f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
 1701|  41.6k|                    if (res) return res;
  ------------------
  |  Branch (1701:25): [True: 0, False: 41.6k]
  ------------------
 1702|  41.6k|                }
 1703|  20.8k|                v_off = 2 * PXSTRIDE(f->cur.stride[1]);
 1704|  20.8k|            }
 1705|   107k|            for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1705:30): [True: 71.5k, False: 35.7k]
  ------------------
 1706|  71.5k|                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
 1707|  71.5k|                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
 1708|  71.5k|                         refp, b->ref[0], filter_2d);
 1709|  71.5k|                if (res) return res;
  ------------------
  |  Branch (1709:21): [True: 0, False: 71.5k]
  ------------------
 1710|  71.5k|            }
 1711|   266k|        } else {
 1712|   266k|            if (imin(cbw4, cbh4) > 1 &&
  ------------------
  |  Branch (1712:17): [True: 126k, False: 140k]
  ------------------
 1713|   126k|                ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
  ------------------
  |  Branch (1713:19): [True: 57.8k, False: 68.7k]
  |  Branch (1713:48): [True: 11.1k, False: 46.7k]
  ------------------
 1714|   115k|                 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
  ------------------
  |  Branch (1714:19): [True: 15.5k, False: 99.9k]
  |  Branch (1714:48): [True: 14.9k, False: 586]
  ------------------
 1715|  26.0k|            {
 1716|  78.1k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1716:34): [True: 52.0k, False: 26.0k]
  ------------------
 1717|  52.0k|                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
 1718|  52.0k|                                      f->cur.stride[1], b_dim, 1 + pl, refp,
 1719|  52.0k|                                      b->motion_mode == MM_WARP ? &t->warpmv :
  ------------------
  |  Branch (1719:39): [True: 29.8k, False: 22.2k]
  ------------------
 1720|  52.0k|                                          &f->frame_hdr->gmv[b->ref[0]]);
 1721|  52.0k|                    if (res) return res;
  ------------------
  |  Branch (1721:25): [True: 0, False: 52.0k]
  ------------------
 1722|  52.0k|                }
 1723|   240k|            } else {
 1724|   725k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1724:34): [True: 484k, False: 240k]
  ------------------
 1725|   484k|                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1726|   484k|                             NULL, f->cur.stride[1],
 1727|   484k|                             bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
 1728|   484k|                             t->bx & ~ss_hor, t->by & ~ss_ver,
 1729|   484k|                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
 1730|   484k|                    if (res) return res;
  ------------------
  |  Branch (1730:25): [True: 0, False: 484k]
  ------------------
 1731|   484k|                    if (b->motion_mode == MM_OBMC) {
  ------------------
  |  Branch (1731:25): [True: 133k, False: 351k]
  ------------------
 1732|   133k|                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
 1733|   133k|                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
 1734|   133k|                        if (res) return res;
  ------------------
  |  Branch (1734:29): [True: 0, False: 133k]
  ------------------
 1735|   133k|                    }
 1736|   484k|                }
 1737|   240k|            }
 1738|   266k|            if (b->interintra_type) {
  ------------------
  |  Branch (1738:17): [True: 15.1k, False: 251k]
  ------------------
 1739|       |                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
 1740|       |                // the wrong thing since it will select 4x16, not 4x32, as a
 1741|       |                // transform size...
 1742|  15.1k|                const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
  ------------------
  |  |   83|  15.1k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   84|  15.1k|    (size_t)((b)->interintra_type == INTER_INTRA_BLEND ? \
  |  |  ------------------
  |  |  |  Branch (84:14): [True: 11.6k, False: 3.48k]
  |  |  ------------------
  |  |   85|  15.1k|    dav1d_masks.offsets[c][(bs)-BS_32x32].ii[(b)->interintra_mode] : \
  |  |   86|  15.1k|    dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[0][(b)->wedge_idx]) * 8))
  ------------------
 1743|       |
 1744|  45.5k|                for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1744:34): [True: 30.3k, False: 15.1k]
  ------------------
 1745|  30.3k|                    pixel *const tmp = bitfn(t->scratch.interintra);
  ------------------
  |  |   77|  30.3k|#define bitfn(x) x##_16bpc
  ------------------
 1746|  30.3k|                    pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
  ------------------
  |  |   77|  30.3k|#define bitfn(x) x##_16bpc
  ------------------
 1747|  30.3k|                    enum IntraPredMode m =
 1748|  30.3k|                        b->interintra_mode == II_SMOOTH_PRED ?
  ------------------
  |  Branch (1748:25): [True: 4.83k, False: 25.5k]
  ------------------
 1749|  25.5k|                        SMOOTH_PRED : b->interintra_mode;
 1750|  30.3k|                    int angle = 0;
 1751|  30.3k|                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1752|  30.3k|                    const pixel *top_sb_edge = NULL;
 1753|  30.3k|                    if (!(t->by & (f->sb_step - 1))) {
  ------------------
  |  Branch (1753:25): [True: 3.71k, False: 26.6k]
  ------------------
 1754|  3.71k|                        top_sb_edge = f->ipred_edge[pl + 1];
 1755|  3.71k|                        const int sby = t->by >> f->sb_shift;
 1756|  3.71k|                        top_sb_edge += f->sb128w * 128 * (sby - 1);
 1757|  3.71k|                    }
 1758|  30.3k|                    m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
  ------------------
  |  |   87|  30.3k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  30.3k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 1759|  30.3k|                                                          (t->bx >> ss_hor) >
 1760|  30.3k|                                                              (ts->tiling.col_start >> ss_hor),
 1761|  30.3k|                                                          t->by >> ss_ver,
 1762|  30.3k|                                                          (t->by >> ss_ver) >
 1763|  30.3k|                                                              (ts->tiling.row_start >> ss_ver),
 1764|  30.3k|                                                          ts->tiling.col_end >> ss_hor,
 1765|  30.3k|                                                          ts->tiling.row_end >> ss_ver,
 1766|  30.3k|                                                          0, uvdst, f->cur.stride[1],
 1767|  30.3k|                                                          top_sb_edge, m,
 1768|  30.3k|                                                          &angle, cbw4, cbh4, 0, tl_edge
 1769|  30.3k|                                                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  30.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1770|  30.3k|                    dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
 1771|  30.3k|                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
 1772|  30.3k|                                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  30.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1773|  30.3k|                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
 1774|  30.3k|                                  cbw4 * 4, cbh4 * 4, ii_mask);
 1775|  30.3k|                }
 1776|  15.1k|            }
 1777|   266k|        }
 1778|       |
 1779|   721k|    skip_inter_chroma_pred: {}
 1780|   721k|        t->tl_4x4_filter = filter_2d;
 1781|   721k|    } else {
 1782|  70.2k|        const enum Filter2d filter_2d = b->filter2d;
 1783|       |        // Maximum super block size is 128x128
 1784|  70.2k|        int16_t (*tmp)[128 * 128] = t->scratch.compinter;
 1785|  70.2k|        int jnt_weight;
 1786|  70.2k|        uint8_t *const seg_mask = t->scratch.seg_mask;
 1787|  70.2k|        const uint8_t *mask;
 1788|       |
 1789|   207k|        for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1789:25): [True: 137k, False: 70.2k]
  ------------------
 1790|   137k|            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1791|       |
 1792|   137k|            if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
  ------------------
  |  Branch (1792:17): [True: 21.0k, False: 116k]
  |  Branch (1792:55): [True: 10.4k, False: 10.6k]
  ------------------
 1793|  10.4k|                res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
 1794|  10.4k|                                  &f->frame_hdr->gmv[b->ref[i]]);
 1795|  10.4k|                if (res) return res;
  ------------------
  |  Branch (1795:21): [True: 0, False: 10.4k]
  ------------------
 1796|   127k|            } else {
 1797|   127k|                res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
 1798|   127k|                         b->mv[i], refp, b->ref[i], filter_2d);
 1799|   127k|                if (res) return res;
  ------------------
  |  Branch (1799:21): [True: 0, False: 127k]
  ------------------
 1800|   127k|            }
 1801|   137k|        }
 1802|  70.2k|        switch (b->comp_type) {
  ------------------
  |  Branch (1802:17): [True: 68.8k, False: 1.40k]
  ------------------
 1803|  39.7k|        case COMP_INTER_AVG:
  ------------------
  |  Branch (1803:9): [True: 39.7k, False: 30.4k]
  ------------------
 1804|  39.7k|            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1805|  39.7k|                        bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  39.7k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1806|  39.7k|            break;
 1807|  13.0k|        case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1807:9): [True: 13.0k, False: 57.2k]
  ------------------
 1808|  13.0k|            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
 1809|  13.0k|            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
 1810|  13.0k|                          bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  13.0k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1811|  13.0k|            break;
 1812|  10.9k|        case COMP_INTER_SEG:
  ------------------
  |  Branch (1812:9): [True: 10.9k, False: 59.3k]
  ------------------
 1813|  10.9k|            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
 1814|  10.9k|                                           tmp[b->mask_sign], tmp[!b->mask_sign],
 1815|  10.9k|                                           bw4 * 4, bh4 * 4, seg_mask,
 1816|  10.9k|                                           b->mask_sign HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  10.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1817|  10.9k|            mask = seg_mask;
 1818|  10.9k|            break;
 1819|  5.13k|        case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1819:9): [True: 5.13k, False: 65.1k]
  ------------------
 1820|  5.13k|            mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
  ------------------
  |  |   89|  5.13k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  5.13k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1821|  5.13k|            dsp->mc.mask(dst, f->cur.stride[0],
 1822|  5.13k|                         tmp[b->mask_sign], tmp[!b->mask_sign],
 1823|  5.13k|                         bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  5.13k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1824|  5.13k|            if (has_chroma)
  ------------------
  |  Branch (1824:17): [True: 3.31k, False: 1.81k]
  ------------------
 1825|  3.31k|                mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
  ------------------
  |  |   89|  3.31k|    ((const uint8_t*)((uintptr_t)&dav1d_masks + \
  |  |   90|  3.31k|    (size_t)dav1d_masks.offsets[c][(bs)-BS_32x32].wedge[sign][idx] * 8))
  ------------------
 1826|  5.13k|            break;
 1827|  70.2k|        }
 1828|       |
 1829|       |        // chroma
 1830|  75.6k|        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1830:13): [True: 25.2k, False: 43.5k]
  |  Branch (1830:42): [True: 50.3k, False: 25.2k]
  ------------------
 1831|   151k|            for (int i = 0; i < 2; i++) {
  ------------------
  |  Branch (1831:29): [True: 100k, False: 50.3k]
  ------------------
 1832|   100k|                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
 1833|   100k|                if (b->inter_mode == GLOBALMV_GLOBALMV &&
  ------------------
  |  Branch (1833:21): [True: 19.4k, False: 81.3k]
  ------------------
 1834|  19.4k|                    imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
  ------------------
  |  Branch (1834:21): [True: 6.38k, False: 13.0k]
  |  Branch (1834:45): [True: 2.98k, False: 3.39k]
  ------------------
 1835|  2.98k|                {
 1836|  2.98k|                    res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
 1837|  2.98k|                                      b_dim, 1 + pl,
 1838|  2.98k|                                      refp, &f->frame_hdr->gmv[b->ref[i]]);
 1839|  2.98k|                    if (res) return res;
  ------------------
  |  Branch (1839:25): [True: 0, False: 2.98k]
  ------------------
 1840|  97.7k|                } else {
 1841|  97.7k|                    res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
 1842|  97.7k|                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
 1843|  97.7k|                    if (res) return res;
  ------------------
  |  Branch (1843:25): [True: 0, False: 97.7k]
  ------------------
 1844|  97.7k|                }
 1845|   100k|            }
 1846|  50.3k|            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
 1847|  50.3k|            switch (b->comp_type) {
  ------------------
  |  Branch (1847:21): [True: 50.5k, False: 18.4E]
  ------------------
 1848|  27.3k|            case COMP_INTER_AVG:
  ------------------
  |  Branch (1848:13): [True: 27.3k, False: 23.0k]
  ------------------
 1849|  27.3k|                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1850|  27.3k|                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
 1851|  27.3k|                            HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  27.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1852|  27.3k|                break;
 1853|  3.85k|            case COMP_INTER_WEIGHTED_AVG:
  ------------------
  |  Branch (1853:13): [True: 3.85k, False: 46.5k]
  ------------------
 1854|  3.85k|                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
 1855|  3.85k|                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
 1856|  3.85k|                              HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  3.85k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1857|  3.85k|                break;
 1858|  6.63k|            case COMP_INTER_WEDGE:
  ------------------
  |  Branch (1858:13): [True: 6.63k, False: 43.7k]
  ------------------
 1859|  19.3k|            case COMP_INTER_SEG:
  ------------------
  |  Branch (1859:13): [True: 12.6k, False: 37.7k]
  ------------------
 1860|  19.3k|                dsp->mc.mask(uvdst, f->cur.stride[1],
 1861|  19.3k|                             tmp[b->mask_sign], tmp[!b->mask_sign],
 1862|  19.3k|                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
 1863|  19.3k|                             HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  19.3k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1864|  19.3k|                break;
 1865|  50.3k|            }
 1866|  50.3k|        }
 1867|  68.7k|    }
 1868|       |
 1869|   802k|    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   34|   802k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 802k]
  |  |  ------------------
  |  |   35|   802k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   802k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                  if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1870|      0|        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
 1871|      0|        if (has_chroma) {
  ------------------
  |  Branch (1871:13): [True: 0, False: 0]
  ------------------
 1872|      0|            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
 1873|      0|                     cbw4 * 4, cbh4 * 4, "u-pred");
 1874|      0|            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
 1875|      0|                     cbw4 * 4, cbh4 * 4, "v-pred");
 1876|      0|        }
 1877|      0|    }
 1878|       |
 1879|   802k|    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
 1880|       |
 1881|   802k|    if (b->skip) {
  ------------------
  |  Branch (1881:9): [True: 521k, False: 280k]
  ------------------
 1882|       |        // reset coef contexts
 1883|   521k|        BlockContext *const a = t->a;
 1884|   521k|        dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
 1885|   521k|        dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
 1886|   521k|        if (has_chroma) {
  ------------------
  |  Branch (1886:13): [True: 149k, False: 372k]
  ------------------
 1887|   149k|            dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
 1888|   149k|            dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
 1889|   149k|            memset_cw(&a->ccoef[0][cbx4], 0x40);
 1890|   149k|            memset_cw(&a->ccoef[1][cbx4], 0x40);
 1891|   149k|            memset_ch(&t->l.ccoef[0][cby4], 0x40);
 1892|   149k|            memset_ch(&t->l.ccoef[1][cby4], 0x40);
 1893|   149k|        }
 1894|   521k|        return 0;
 1895|   521k|    }
 1896|       |
 1897|   280k|    const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
 1898|   280k|    const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
 1899|   280k|    const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 1900|       |
 1901|   564k|    for (int init_y = 0; init_y < bh4; init_y += 16) {
  ------------------
  |  Branch (1901:26): [True: 283k, False: 280k]
  ------------------
 1902|   572k|        for (int init_x = 0; init_x < bw4; init_x += 16) {
  ------------------
  |  Branch (1902:30): [True: 289k, False: 283k]
  ------------------
 1903|       |            // coefficient coding & inverse transforms
 1904|   289k|            int y_off = !!init_y, y;
 1905|   289k|            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
 1906|   582k|            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
  ------------------
  |  Branch (1906:47): [True: 293k, False: 289k]
  ------------------
 1907|   293k|                 y += ytx->h, y_off++)
 1908|   293k|            {
 1909|   293k|                int x, x_off = !!init_x;
 1910|   603k|                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
  ------------------
  |  Branch (1910:51): [True: 310k, False: 293k]
  ------------------
 1911|   310k|                     x += ytx->w, x_off++)
 1912|   310k|                {
 1913|   310k|                    read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
 1914|   310k|                                   x_off, y_off, &dst[x * 4]);
 1915|   310k|                    t->bx += ytx->w;
 1916|   310k|                }
 1917|   293k|                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
 1918|   293k|                t->bx -= x;
 1919|   293k|                t->by += ytx->h;
 1920|   293k|            }
 1921|   289k|            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
 1922|   289k|            t->by -= y;
 1923|       |
 1924|       |            // chroma coefs and inverse transform
 1925|   579k|            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
  ------------------
  |  Branch (1925:17): [True: 193k, False: 96.1k]
  |  Branch (1925:46): [True: 386k, False: 193k]
  ------------------
 1926|   386k|                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
 1927|   386k|                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
 1928|   386k|                for (y = init_y >> ss_ver, t->by += init_y;
 1929|   780k|                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
  ------------------
  |  Branch (1929:22): [True: 394k, False: 386k]
  ------------------
 1930|   394k|                {
 1931|   394k|                    int x;
 1932|   394k|                    for (x = init_x >> ss_hor, t->bx += init_x;
 1933|   802k|                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
  ------------------
  |  Branch (1933:26): [True: 408k, False: 394k]
  ------------------
 1934|   408k|                    {
 1935|   408k|                        coef *cf;
 1936|   408k|                        int eob;
 1937|   408k|                        enum TxfmType txtp;
 1938|   408k|                        if (t->frame_thread.pass) {
  ------------------
  |  Branch (1938:29): [True: 408k, False: 18.4E]
  ------------------
 1939|   408k|                            const int p = t->frame_thread.pass & 1;
 1940|   408k|                            const int cbi = *ts->frame_thread[p].cbi++;
 1941|   408k|                            cf = ts->frame_thread[p].cf;
 1942|   408k|                            ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
 1943|   408k|                            eob  = cbi >> 5;
 1944|   408k|                            txtp = cbi & 0x1f;
 1945|  18.4E|                        } else {
 1946|  18.4E|                            uint8_t cf_ctx;
 1947|  18.4E|                            cf = bitfn(t->cf);
  ------------------
  |  |   77|  18.4E|#define bitfn(x) x##_16bpc
  ------------------
 1948|  18.4E|                            txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
 1949|  18.4E|                                                        bx4 + (x << ss_hor)];
 1950|  18.4E|                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
 1951|  18.4E|                                               &t->l.ccoef[pl][cby4 + y],
 1952|  18.4E|                                               b->uvtx, bs, b, 0, 1 + pl,
 1953|  18.4E|                                               cf, &txtp, &cf_ctx);
 1954|  18.4E|                            if (DEBUG_BLOCK_INFO)
  ------------------
  |  |   34|  18.4E|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 18.4E]
  |  |  ------------------
  |  |   35|  18.4E|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  18.4E|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1955|      0|                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
 1956|      0|                                       "txtp=%d,eob=%d]: r=%d\n",
 1957|      0|                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
 1958|  18.4E|                            int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
 1959|  18.4E|                            int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
 1960|  18.4E|                            dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
 1961|  18.4E|                            dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
 1962|  18.4E|                        }
 1963|   408k|                        if (eob >= 0) {
  ------------------
  |  Branch (1963:29): [True: 175k, False: 232k]
  ------------------
 1964|   175k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   175k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 175k]
  |  |  ------------------
  |  |   35|   175k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   175k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1965|      0|                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
 1966|   175k|                            dsp->itx.itxfm_add[b->uvtx]
 1967|   175k|                                              [txtp](&uvdst[4 * x],
 1968|   175k|                                                     f->cur.stride[1],
 1969|   175k|                                                     cf, eob HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|   175k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 1970|   175k|                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   34|   175k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 175k]
  |  |  ------------------
  |  |   35|   175k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|   175k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
  ------------------
  |  |   37|      0|#define DEBUG_B_PIXELS 0
  |  |  ------------------
  |  |  |  Branch (37:24): [Folded, False: 0]
  |  |  ------------------
  ------------------
 1971|      0|                                hex_dump(&uvdst[4 * x], f->cur.stride[1],
 1972|      0|                                         uvtx->w * 4, uvtx->h * 4, "recon");
 1973|   175k|                        }
 1974|   408k|                        t->bx += uvtx->w << ss_hor;
 1975|   408k|                    }
 1976|   394k|                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
 1977|   394k|                    t->bx -= x << ss_hor;
 1978|   394k|                    t->by += uvtx->h << ss_ver;
 1979|   394k|                }
 1980|   386k|                t->by -= y << ss_ver;
 1981|   386k|            }
 1982|   289k|        }
 1983|   283k|    }
 1984|   280k|    return 0;
 1985|   802k|}
dav1d_filter_sbrow_deblock_cols_16bpc:
 1987|   312k|void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
 1988|   312k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
  ------------------
  |  Branch (1988:9): [True: 2, False: 312k]
  ------------------
 1989|   312k|        (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (1989:10): [True: 8.81k, False: 303k]
  |  Branch (1989:50): [True: 0, False: 8.81k]
  ------------------
 1990|      0|    {
 1991|      0|        return;
 1992|      0|    }
 1993|   312k|    const int y = sby * f->sb_step * 4;
 1994|   312k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 1995|   312k|    pixel *const p[3] = {
 1996|   312k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 1997|   312k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 1998|   312k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 1999|   312k|    };
 2000|   312k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2001|   312k|    bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
  ------------------
  |  |   87|   312k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   312k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2002|   312k|                                        f->lf.start_of_tile_row[sby]);
 2003|   312k|}
dav1d_filter_sbrow_deblock_rows_16bpc:
 2005|   464k|void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
 2006|   464k|    const int y = sby * f->sb_step * 4;
 2007|   464k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2008|   464k|    pixel *const p[3] = {
 2009|   464k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2010|   464k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2011|   464k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2012|   464k|    };
 2013|   464k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2014|   464k|    if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
  ------------------
  |  Branch (2014:9): [True: 464k, False: 265]
  ------------------
 2015|   464k|        (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
  ------------------
  |  Branch (2015:10): [True: 303k, False: 160k]
  |  Branch (2015:49): [True: 8.80k, False: 151k]
  ------------------
 2016|   312k|    {
 2017|   312k|        bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
  ------------------
  |  |   87|   312k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   312k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2018|   312k|    }
 2019|   464k|    if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (2019:9): [True: 417k, False: 46.3k]
  |  Branch (2019:29): [True: 13.9k, False: 32.3k]
  ------------------
 2020|       |        // Store loop filtered pixels required by CDEF / LR
 2021|   431k|        bytefn(dav1d_copy_lpf)(f, p, sby);
  ------------------
  |  |   87|   431k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   431k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2022|   431k|    }
 2023|   464k|}
dav1d_filter_sbrow_cdef_16bpc:
 2025|   417k|void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
 2026|   417k|    const Dav1dFrameContext *const f = tc->f;
 2027|   417k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
  ------------------
  |  Branch (2027:9): [True: 0, False: 417k]
  ------------------
 2028|   417k|    const int sbsz = f->sb_step;
 2029|   417k|    const int y = sby * sbsz * 4;
 2030|   417k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2031|   417k|    pixel *const p[3] = {
 2032|   417k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2033|   417k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2034|   417k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2035|   417k|    };
 2036|   417k|    Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
 2037|   417k|    Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
 2038|   417k|    const int start = sby * sbsz;
 2039|   417k|    if (sby) {
  ------------------
  |  Branch (2039:9): [True: 305k, False: 112k]
  ------------------
 2040|   305k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2041|   305k|        pixel *p_up[3] = {
 2042|   305k|            p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
 2043|   305k|            p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2044|   305k|            p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2045|   305k|        };
 2046|   305k|        bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
  ------------------
  |  |   87|   305k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   305k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2047|   305k|    }
 2048|   417k|    const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
 2049|   417k|    const int end = imin(start + n_blks, f->bh);
 2050|   417k|    bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
  ------------------
  |  |   87|   417k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   417k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2051|   417k|}
dav1d_filter_sbrow_resize_16bpc:
 2053|  25.4k|void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
 2054|  25.4k|    const int sbsz = f->sb_step;
 2055|  25.4k|    const int y = sby * sbsz * 4;
 2056|  25.4k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2057|  25.4k|    const pixel *const p[3] = {
 2058|  25.4k|        f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
 2059|  25.4k|        f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
 2060|  25.4k|        f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
 2061|  25.4k|    };
 2062|  25.4k|    pixel *const sr_p[3] = {
 2063|  25.4k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2064|  25.4k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2065|  25.4k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2066|  25.4k|    };
 2067|  25.4k|    const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
 2068|  70.3k|    for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
  ------------------
  |  Branch (2068:22): [True: 44.9k, False: 25.4k]
  ------------------
 2069|  44.9k|        const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  ------------------
  |  Branch (2069:28): [True: 19.5k, False: 25.4k]
  |  Branch (2069:34): [True: 8.87k, False: 10.6k]
  ------------------
 2070|  44.9k|        const int h_start = 8 * !!sby >> ss_ver;
 2071|  44.9k|        const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
 2072|  44.9k|        pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
 2073|  44.9k|        const ptrdiff_t src_stride = f->cur.stride[!!pl];
 2074|  44.9k|        const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
 2075|  44.9k|        const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
 2076|  44.9k|        const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
  ------------------
  |  Branch (2076:28): [True: 19.5k, False: 25.4k]
  |  Branch (2076:34): [True: 9.71k, False: 9.78k]
  ------------------
 2077|  44.9k|        const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
 2078|  44.9k|        const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
 2079|  44.9k|        const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
 2080|       |
 2081|  44.9k|        f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
 2082|  44.9k|                          imin(img_h, h_end) + h_start, src_w,
 2083|  44.9k|                          f->resize_step[!!pl], f->resize_start[!!pl]
 2084|  44.9k|                          HIGHBD_CALL_SUFFIX);
  ------------------
  |  |   73|  44.9k|#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
  ------------------
 2085|  44.9k|    }
 2086|  25.4k|}
dav1d_filter_sbrow_lr_16bpc:
 2088|  54.5k|void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
 2089|  54.5k|    if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
  ------------------
  |  Branch (2089:9): [True: 0, False: 54.5k]
  ------------------
 2090|  54.5k|    const int y = sby * f->sb_step * 4;
 2091|  54.5k|    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2092|  54.5k|    pixel *const sr_p[3] = {
 2093|  54.5k|        f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
 2094|  54.5k|        f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
 2095|  54.5k|        f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
 2096|  54.5k|    };
 2097|  54.5k|    bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
  ------------------
  |  |   87|  54.5k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  54.5k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2098|  54.5k|}
dav1d_backup_ipred_edge_16bpc:
 2111|   523k|void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
 2112|   523k|    const Dav1dFrameContext *const f = t->f;
 2113|   523k|    Dav1dTileState *const ts = t->ts;
 2114|   523k|    const int sby = t->by >> f->sb_shift;
 2115|   523k|    const int sby_off = f->sb128w * 128 * sby;
 2116|   523k|    const int x_off = ts->tiling.col_start;
 2117|       |
 2118|   523k|    const pixel *const y =
 2119|   523k|        ((const pixel *) f->cur.data[0]) + x_off * 4 +
 2120|   523k|                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
 2121|   523k|    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
  ------------------
  |  |   65|   523k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2122|   523k|               4 * (ts->tiling.col_end - x_off));
 2123|       |
 2124|   523k|    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
  ------------------
  |  Branch (2124:9): [True: 193k, False: 330k]
  ------------------
 2125|   193k|        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
 2126|   193k|        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
 2127|       |
 2128|   193k|        const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
 2129|   193k|            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
 2130|   579k|        for (int pl = 1; pl <= 2; pl++)
  ------------------
  |  Branch (2130:26): [True: 386k, False: 193k]
  ------------------
 2131|   386k|            pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
  ------------------
  |  |   65|   386k|#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
  ------------------
 2132|   193k|                       &((const pixel *) f->cur.data[pl])[uv_off],
 2133|   193k|                       4 * (ts->tiling.col_end - x_off) >> ss_hor);
 2134|   193k|    }
 2135|   523k|}
dav1d_copy_pal_block_y_16bpc:
 2141|  36.9k|{
 2142|  36.9k|    const Dav1dFrameContext *const f = t->f;
 2143|  36.9k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2143:24): [True: 36.9k, False: 0]
  ------------------
 2144|  36.9k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2145|  36.9k|                            ((t->bx >> 1) + (t->by & 1))][0] :
 2146|  36.9k|        bytefn(t->scratch.pal)[0];
  ------------------
  |  |   87|      0|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  36.9k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2147|   186k|    for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2147:21): [True: 149k, False: 36.9k]
  ------------------
 2148|   149k|        memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   149k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   149k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2149|   138k|    for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2149:21): [True: 101k, False: 36.9k]
  ------------------
 2150|   101k|        memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
  ------------------
  |  |   87|   101k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|   101k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2151|  36.9k|}
dav1d_copy_pal_block_uv_16bpc:
 2157|  8.67k|{
 2158|  8.67k|    const Dav1dFrameContext *const f = t->f;
 2159|  8.67k|    const pixel (*const pal)[8] = t->frame_thread.pass ?
  ------------------
  |  Branch (2159:35): [True: 8.67k, False: 0]
  ------------------
 2160|  8.67k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2161|  8.67k|                            ((t->bx >> 1) + (t->by & 1))] :
 2162|  8.67k|        bytefn(t->scratch.pal);
  ------------------
  |  |   87|      0|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|      0|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2163|       |    // see aomedia bug 2183 for why we use luma coordinates here
 2164|  26.0k|    for (int pl = 1; pl <= 2; pl++) {
  ------------------
  |  Branch (2164:22): [True: 17.3k, False: 8.67k]
  ------------------
 2165|  92.4k|        for (int x = 0; x < bw4; x++)
  ------------------
  |  Branch (2165:25): [True: 75.1k, False: 17.3k]
  ------------------
 2166|  75.1k|            memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  75.1k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  75.1k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2167|  80.5k|        for (int y = 0; y < bh4; y++)
  ------------------
  |  Branch (2167:25): [True: 63.2k, False: 17.3k]
  ------------------
 2168|  63.2k|            memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
  ------------------
  |  |   87|  63.2k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  63.2k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2169|  17.3k|    }
 2170|  8.67k|}
dav1d_read_pal_plane_16bpc:
 2175|  45.6k|{
 2176|  45.6k|    Dav1dTileState *const ts = t->ts;
 2177|  45.6k|    const Dav1dFrameContext *const f = t->f;
 2178|  45.6k|    const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
  ------------------
  |  |   48|  45.6k|#define dav1d_msac_decode_symbol_adapt8  dav1d_msac_decode_symbol_adapt8_sse2
  ------------------
 2179|  45.6k|                                           ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
 2180|  45.6k|    pixel cache[16], used_cache[8];
 2181|  45.6k|    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
  ------------------
  |  Branch (2181:19): [True: 8.67k, False: 36.9k]
  ------------------
 2182|  45.6k|    int n_cache = 0;
 2183|       |    // don't reuse above palette outside SB64 boundaries
 2184|  45.6k|    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
  ------------------
  |  Branch (2184:19): [True: 39.4k, False: 6.16k]
  |  Branch (2184:30): [True: 7.22k, False: 32.2k]
  ------------------
 2185|  45.6k|    const pixel *l = bytefn(t->al_pal)[1][by4][pl];
  ------------------
  |  |   87|  45.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  45.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2186|  45.6k|    const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
  ------------------
  |  |   87|  45.6k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  45.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2187|       |
 2188|       |    // fill/sort cache
 2189|   101k|    while (l_cache && a_cache) {
  ------------------
  |  Branch (2189:12): [True: 71.5k, False: 30.4k]
  |  Branch (2189:23): [True: 56.3k, False: 15.1k]
  ------------------
 2190|  56.3k|        if (*l < *a) {
  ------------------
  |  Branch (2190:13): [True: 25.8k, False: 30.5k]
  ------------------
 2191|  25.8k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2191:17): [True: 4.76k, False: 21.0k]
  |  Branch (2191:29): [True: 20.4k, False: 566]
  ------------------
 2192|  25.2k|                cache[n_cache++] = *l;
 2193|  25.8k|            l++;
 2194|  25.8k|            l_cache--;
 2195|  30.5k|        } else {
 2196|  30.5k|            if (*a == *l) {
  ------------------
  |  Branch (2196:17): [True: 10.0k, False: 20.4k]
  ------------------
 2197|  10.0k|                l++;
 2198|  10.0k|                l_cache--;
 2199|  10.0k|            }
 2200|  30.5k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2200:17): [True: 4.11k, False: 26.4k]
  |  Branch (2200:29): [True: 26.0k, False: 395]
  ------------------
 2201|  30.1k|                cache[n_cache++] = *a;
 2202|  30.5k|            a++;
 2203|  30.5k|            a_cache--;
 2204|  30.5k|        }
 2205|  56.3k|    }
 2206|  45.6k|    if (l_cache) {
  ------------------
  |  Branch (2206:9): [True: 15.2k, False: 30.4k]
  ------------------
 2207|  65.5k|        do {
 2208|  65.5k|            if (!n_cache || cache[n_cache - 1] != *l)
  ------------------
  |  Branch (2208:17): [True: 12.1k, False: 53.4k]
  |  Branch (2208:29): [True: 44.4k, False: 8.94k]
  ------------------
 2209|  56.6k|                cache[n_cache++] = *l;
 2210|  65.5k|            l++;
 2211|  65.5k|        } while (--l_cache > 0);
  ------------------
  |  Branch (2211:18): [True: 50.3k, False: 15.2k]
  ------------------
 2212|  30.4k|    } else if (a_cache) {
  ------------------
  |  Branch (2212:16): [True: 12.9k, False: 17.4k]
  ------------------
 2213|  56.4k|        do {
 2214|  56.4k|            if (!n_cache || cache[n_cache - 1] != *a)
  ------------------
  |  Branch (2214:17): [True: 8.42k, False: 48.0k]
  |  Branch (2214:29): [True: 40.1k, False: 7.84k]
  ------------------
 2215|  48.5k|                cache[n_cache++] = *a;
 2216|  56.4k|            a++;
 2217|  56.4k|        } while (--a_cache > 0);
  ------------------
  |  Branch (2217:18): [True: 43.5k, False: 12.9k]
  ------------------
 2218|  12.9k|    }
 2219|       |
 2220|       |    // find reused cache entries
 2221|  45.6k|    int i = 0;
 2222|   188k|    for (int n = 0; n < n_cache && i < pal_sz; n++)
  ------------------
  |  Branch (2222:21): [True: 148k, False: 39.5k]
  |  Branch (2222:36): [True: 142k, False: 6.10k]
  ------------------
 2223|   142k|        if (dav1d_msac_decode_bool_equi(&ts->msac))
  ------------------
  |  |   53|   142k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2223:13): [True: 60.9k, False: 81.6k]
  ------------------
 2224|  60.9k|            used_cache[i++] = cache[n];
 2225|  45.6k|    const int n_used_cache = i;
 2226|       |
 2227|       |    // parse new entries
 2228|  45.6k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2228:24): [True: 45.6k, False: 18.4E]
  ------------------
 2229|  45.6k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2230|  45.6k|                            ((t->bx >> 1) + (t->by & 1))][pl] :
 2231|  18.4E|        bytefn(t->scratch.pal)[pl];
  ------------------
  |  |   87|  18.4E|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  45.6k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2232|  45.6k|    if (i < pal_sz) {
  ------------------
  |  Branch (2232:9): [True: 38.2k, False: 7.38k]
  ------------------
 2233|  38.2k|        const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2233:25): [Folded, False: 38.2k]
  ------------------
 2234|  38.2k|        int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2235|       |
 2236|  38.2k|        if (i < pal_sz) {
  ------------------
  |  Branch (2236:13): [True: 33.6k, False: 4.59k]
  ------------------
 2237|  33.6k|            int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
 2238|  33.6k|            const int max = (1 << bpc) - 1;
 2239|       |
 2240|  88.9k|            do {
 2241|  88.9k|                const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2242|  88.9k|                prev = pal[i++] = imin(prev + delta + !pl, max);
 2243|  88.9k|                if (prev + !pl >= max) {
  ------------------
  |  Branch (2243:21): [True: 13.2k, False: 75.6k]
  ------------------
 2244|  36.6k|                    for (; i < pal_sz; i++)
  ------------------
  |  Branch (2244:28): [True: 23.3k, False: 13.2k]
  ------------------
 2245|  23.3k|                        pal[i] = max;
 2246|  13.2k|                    break;
 2247|  13.2k|                }
 2248|  75.6k|                bits = imin(bits, 1 + ulog2(max - prev - !pl));
 2249|  75.6k|            } while (i < pal_sz);
  ------------------
  |  Branch (2249:22): [True: 55.3k, False: 20.3k]
  ------------------
 2250|  33.6k|        }
 2251|       |
 2252|       |        // merge cache+new entries
 2253|  38.2k|        int n = 0, m = n_used_cache;
 2254|   228k|        for (i = 0; i < pal_sz; i++) {
  ------------------
  |  Branch (2254:21): [True: 190k, False: 38.2k]
  ------------------
 2255|   190k|            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
  ------------------
  |  Branch (2255:17): [True: 72.2k, False: 117k]
  |  Branch (2255:38): [True: 14.6k, False: 57.5k]
  |  Branch (2255:53): [True: 24.8k, False: 32.7k]
  ------------------
 2256|  39.5k|                pal[i] = used_cache[n++];
 2257|   150k|            } else {
 2258|   150k|                assert(m < pal_sz);
  ------------------
  |  Branch (2258:17): [True: 150k, False: 18.4E]
  ------------------
 2259|   150k|                pal[i] = pal[m++];
 2260|   150k|            }
 2261|   190k|        }
 2262|  38.2k|    } else {
 2263|  7.38k|        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
 2264|  7.38k|    }
 2265|       |
 2266|  45.6k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  45.6k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 45.6k]
  |  |  ------------------
  |  |   35|  45.6k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  45.6k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2267|      0|        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
 2268|      0|               pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
 2269|      0|        for (int n = 0; n < n_cache; n++)
  ------------------
  |  Branch (2269:25): [True: 0, False: 0]
  ------------------
 2270|      0|            printf("%c%02x", n ? ' ' : '[', cache[n]);
  ------------------
  |  Branch (2270:30): [True: 0, False: 0]
  ------------------
 2271|      0|        printf("%s, pal=", n_cache ? "]" : "[]");
  ------------------
  |  Branch (2271:28): [True: 0, False: 0]
  ------------------
 2272|      0|        for (int n = 0; n < pal_sz; n++)
  ------------------
  |  Branch (2272:25): [True: 0, False: 0]
  ------------------
 2273|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2273:30): [True: 0, False: 0]
  ------------------
 2274|      0|        printf("]\n");
 2275|      0|    }
 2276|  45.6k|}
dav1d_read_pal_uv_16bpc:
 2280|  8.67k|{
 2281|  8.67k|    bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
  ------------------
  |  |   87|  8.67k|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  8.67k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2282|       |
 2283|       |    // V pal coding
 2284|  8.67k|    Dav1dTileState *const ts = t->ts;
 2285|  8.67k|    const Dav1dFrameContext *const f = t->f;
 2286|  8.67k|    pixel *const pal = t->frame_thread.pass ?
  ------------------
  |  Branch (2286:24): [True: 8.67k, False: 18.4E]
  ------------------
 2287|  8.67k|        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
 2288|  8.67k|                            ((t->bx >> 1) + (t->by & 1))][2] :
 2289|  18.4E|        bytefn(t->scratch.pal)[2];
  ------------------
  |  |   87|  18.4E|#define bytefn(x) bitfn(x)
  |  |  ------------------
  |  |  |  |   77|  8.67k|#define bitfn(x) x##_16bpc
  |  |  ------------------
  ------------------
 2290|  8.67k|    const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
  ------------------
  |  Branch (2290:21): [Folded, False: 8.67k]
  ------------------
 2291|  8.67k|    if (dav1d_msac_decode_bool_equi(&ts->msac)) {
  ------------------
  |  |   53|  8.67k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2291:9): [True: 4.67k, False: 4.00k]
  ------------------
 2292|  4.67k|        const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
 2293|  4.67k|        int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2294|  4.67k|        const int max = (1 << bpc) - 1;
 2295|  18.7k|        for (int i = 1; i < b->pal_sz[1]; i++) {
  ------------------
  |  Branch (2295:25): [True: 14.0k, False: 4.67k]
  ------------------
 2296|  14.0k|            int delta = dav1d_msac_decode_bools(&ts->msac, bits);
 2297|  14.0k|            if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
  ------------------
  |  |   53|  13.6k|#define dav1d_msac_decode_bool_equi      dav1d_msac_decode_bool_equi_sse2
  ------------------
  |  Branch (2297:17): [True: 13.6k, False: 389]
  |  Branch (2297:26): [True: 7.07k, False: 6.56k]
  ------------------
 2298|  14.0k|            prev = pal[i] = (prev + delta) & max;
 2299|  14.0k|        }
 2300|  4.67k|    } else {
 2301|  18.1k|        for (int i = 0; i < b->pal_sz[1]; i++)
  ------------------
  |  Branch (2301:25): [True: 14.1k, False: 4.00k]
  ------------------
 2302|  14.1k|            pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
 2303|  4.00k|    }
 2304|  8.67k|    if (DEBUG_BLOCK_INFO) {
  ------------------
  |  |   34|  8.67k|#define DEBUG_BLOCK_INFO 0 && \
  |  |  ------------------
  |  |  |  Branch (34:26): [Folded, False: 8.67k]
  |  |  ------------------
  |  |   35|  8.67k|        f->frame_hdr->frame_offset == 2 && t->by >= 0 && t->by < 4 && \
  |  |  ------------------
  |  |  |  Branch (35:9): [True: 0, False: 0]
  |  |  |  Branch (35:44): [True: 0, False: 0]
  |  |  |  Branch (35:58): [True: 0, False: 0]
  |  |  ------------------
  |  |   36|  8.67k|        t->bx >= 8 && t->bx < 12
  |  |  ------------------
  |  |  |  Branch (36:9): [True: 0, False: 0]
  |  |  |  Branch (36:23): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 2305|      0|        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
 2306|      0|        for (int n = 0; n < b->pal_sz[1]; n++)
  ------------------
  |  Branch (2306:25): [True: 0, False: 0]
  ------------------
 2307|      0|            printf("%c%02x", n ? ' ' : '[', pal[n]);
  ------------------
  |  Branch (2307:30): [True: 0, False: 0]
  ------------------
 2308|      0|        printf("]\n");
 2309|      0|    }
 2310|  8.67k|}

dav1d_ref_create:
   37|   335k|Dav1dRef *dav1d_ref_create(const enum AllocationType type, size_t size) {
   38|   335k|    size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
   39|       |
   40|   335k|    uint8_t *const data = dav1d_alloc_aligned(type, size + sizeof(Dav1dRef), 64);
  ------------------
  |  |  134|   335k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
   41|   335k|    if (!data) return NULL;
  ------------------
  |  Branch (41:9): [True: 0, False: 335k]
  ------------------
   42|       |
   43|   335k|    Dav1dRef *const res = (Dav1dRef*)(data + size);
   44|   335k|    res->const_data = res->user_data = res->data = data;
   45|   335k|    atomic_init(&res->ref_cnt, 1);
   46|   335k|    res->free_ref = 0;
   47|   335k|    res->free_callback = default_free_callback;
   48|       |
   49|   335k|    return res;
   50|   335k|}
dav1d_ref_create_using_pool:
   56|   620k|Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *const pool, size_t size) {
   57|   620k|    void *const buf = dav1d_mem_pool_pop(pool, size);
   58|   620k|    if (!buf) return NULL;
  ------------------
  |  Branch (58:9): [True: 0, False: 620k]
  ------------------
   59|       |
   60|       |    /* Store Dav1dRef inside the Dav1dMemPoolBuffer alignment padding */
   61|   620k|    assert(sizeof(Dav1dMemPoolBuffer) + sizeof(Dav1dRef) <= 64);
  ------------------
  |  Branch (61:5): [True: 620k, Folded]
  ------------------
   62|   620k|    Dav1dRef *const res = &((Dav1dRef*)buf)[-1];
   63|   620k|    res->data = buf;
   64|   620k|    res->const_data = pool;
   65|   620k|    atomic_init(&res->ref_cnt, 1);
   66|   620k|    res->free_ref = 0;
   67|   620k|    res->free_callback = pool_free_callback;
   68|   620k|    res->user_data = buf;
   69|       |
   70|   620k|    return res;
   71|   620k|}
dav1d_ref_dec:
   73|  40.3M|void dav1d_ref_dec(Dav1dRef **const pref) {
   74|  40.3M|    assert(pref != NULL);
  ------------------
  |  Branch (74:5): [True: 40.3M, False: 5]
  ------------------
   75|       |
   76|  40.3M|    Dav1dRef *const ref = *pref;
   77|  40.3M|    if (!ref) return;
  ------------------
  |  Branch (77:9): [True: 26.7M, False: 13.6M]
  ------------------
   78|       |
   79|  13.6M|    *pref = NULL;
   80|  13.6M|    if (atomic_fetch_sub(&ref->ref_cnt, 1) == 1) {
  ------------------
  |  Branch (80:9): [True: 1.26M, False: 12.3M]
  ------------------
   81|  1.26M|        const int free_ref = ref->free_ref;
   82|  1.26M|        ref->free_callback(ref->const_data, ref->user_data);
   83|  1.26M|        if (free_ref) dav1d_free(ref);
  ------------------
  |  |  135|      0|#define dav1d_free(ptr) free(ptr)
  ------------------
  |  Branch (83:13): [True: 0, False: 1.26M]
  ------------------
   84|  1.26M|    }
   85|  13.6M|}
ref.c:default_free_callback:
   32|   335k|static void default_free_callback(const uint8_t *const data, void *const user_data) {
   33|   335k|    assert(data == user_data);
  ------------------
  |  Branch (33:5): [True: 335k, False: 18.4E]
  ------------------
   34|   335k|    dav1d_free_aligned(user_data);
  ------------------
  |  |  136|   335k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
   35|   335k|}
ref.c:pool_free_callback:
   52|   620k|static void pool_free_callback(const uint8_t *const data, void *const user_data) {
   53|   620k|    dav1d_mem_pool_push((Dav1dMemPool*)data, user_data);
   54|   620k|}

obu.c:dav1d_ref_is_writable:
   73|   328k|static inline int dav1d_ref_is_writable(Dav1dRef *const ref) {
   74|   328k|    return atomic_load(&ref->ref_cnt) == 1 && ref->data;
  ------------------
  |  Branch (74:12): [True: 328k, False: 0]
  |  Branch (74:47): [True: 328k, False: 0]
  ------------------
   75|   328k|}
obu.c:dav1d_ref_init:
   59|    977|{
   60|    977|    ref->data = NULL;
   61|    977|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|    977|    ref->free_ref = free_ref;
   64|    977|    ref->free_callback = free_callback;
   65|    977|    ref->user_data = user_data;
   66|    977|    return ref;
   67|    977|}
obu.c:dav1d_ref_inc:
   69|  8.84k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  8.84k|}
picture.c:dav1d_ref_inc:
   69|  10.3M|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  10.3M|}
picture.c:dav1d_ref_init:
   59|   313k|{
   60|   313k|    ref->data = NULL;
   61|   313k|    ref->const_data = ptr;
   62|       |    atomic_init(&ref->ref_cnt, 1);
   63|   313k|    ref->free_ref = free_ref;
   64|   313k|    ref->free_callback = free_callback;
   65|   313k|    ref->user_data = user_data;
   66|   313k|    return ref;
   67|   313k|}
cdf.c:dav1d_ref_inc:
   69|   344k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   344k|}
data.c:dav1d_ref_inc:
   69|   621k|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|   621k|}
decode.c:dav1d_ref_inc:
   69|  1.03M|static inline void dav1d_ref_inc(Dav1dRef *const ref) {
   70|       |    atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
   71|  1.03M|}

dav1d_refmvs_find:
  354|  4.02M|{
  355|  4.02M|    const refmvs_frame *const rf = rt->rf;
  356|  4.02M|    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
  357|  4.02M|    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
  358|  4.02M|    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
  359|  4.02M|    mv gmv[2], tgmv[2];
  360|       |
  361|  4.02M|    *cnt = 0;
  362|  4.02M|    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
  ------------------
  |  Branch (362:5): [True: 4.02M, False: 18.4E]
  |  Branch (362:5): [True: 4.02M, False: 328]
  |  Branch (362:5): [True: 4.02M, False: 18.4E]
  |  Branch (362:5): [True: 4.02M, False: 18.4E]
  ------------------
  363|  4.02M|           ref.ref[1] >= -1 && ref.ref[1] <= 8);
  364|  4.02M|    if (ref.ref[0] > 0) {
  ------------------
  |  Branch (364:9): [True: 3.28M, False: 745k]
  ------------------
  365|  3.28M|        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
  366|  3.28M|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  367|  3.28M|        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (367:18): [True: 928k, False: 2.35M]
  ------------------
  368|  2.35M|                 tgmv[0] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|  2.35M|#define INVALID_MV 0x80008000
  ------------------
  369|  3.28M|    } else {
  370|   745k|        tgmv[0] = (mv) { .n = 0 };
  371|   745k|        gmv[0] = (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   745k|#define INVALID_MV 0x80008000
  ------------------
  372|   745k|    }
  373|  4.02M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (373:9): [True: 342k, False: 3.68M]
  ------------------
  374|   342k|        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
  375|   342k|                             bx4, by4, bw4, bh4, rf->frm_hdr);
  376|   342k|        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
  ------------------
  |  Branch (376:18): [True: 82.5k, False: 260k]
  ------------------
  377|   260k|                 tgmv[1] : (mv) { .n = INVALID_MV };
  ------------------
  |  |   40|   260k|#define INVALID_MV 0x80008000
  ------------------
  378|   342k|    }
  379|       |
  380|       |    // top
  381|  4.02M|    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
  382|  4.02M|    unsigned max_rows = 0, n_rows = ~0;
  383|  4.02M|    const refmvs_block *b_top;
  384|  4.02M|    if (by4 > rt->tile_row.start) {
  ------------------
  |  Branch (384:9): [True: 3.60M, False: 418k]
  ------------------
  385|  3.60M|        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
  386|  3.60M|        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
  387|  3.60M|        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
  388|  3.60M|                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (388:46): [True: 1.16M, False: 2.44M]
  ------------------
  389|  3.60M|                          &have_newmv, &have_row_mvs);
  390|  3.60M|    }
  391|       |
  392|       |    // left
  393|  4.02M|    unsigned max_cols = 0, n_cols = ~0U;
  394|  4.02M|    refmvs_block *const *b_left;
  395|  4.02M|    if (bx4 > rt->tile_col.start) {
  ------------------
  |  Branch (395:9): [True: 3.05M, False: 968k]
  ------------------
  396|  3.05M|        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
  397|  3.05M|        b_left = &rt->r[(by4 & 31) + 5];
  398|  3.05M|        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
  399|  3.05M|                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
  ------------------
  |  Branch (399:55): [True: 589k, False: 2.46M]
  ------------------
  400|  3.05M|                          &have_newmv, &have_col_mvs);
  401|  3.05M|    }
  402|       |
  403|       |    // top/right
  404|  4.02M|    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
  ------------------
  |  Branch (404:9): [True: 3.59M, False: 433k]
  |  Branch (404:26): [True: 2.53M, False: 1.05M]
  ------------------
  405|  2.53M|        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
  ------------------
  |  Branch (405:9): [True: 2.23M, False: 302k]
  |  Branch (405:33): [True: 1.56M, False: 665k]
  ------------------
  406|  1.56M|    {
  407|  1.56M|        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
  408|  1.56M|                              &have_newmv, &have_row_mvs);
  409|  1.56M|    }
  410|       |
  411|  4.02M|    const int nearest_match = have_col_mvs + have_row_mvs;
  412|  4.02M|    const int nearest_cnt = *cnt;
  413|  8.97M|    for (int n = 0; n < nearest_cnt; n++)
  ------------------
  |  Branch (413:21): [True: 4.95M, False: 4.02M]
  ------------------
  414|  4.95M|        mvstack[n].weight += 640;
  415|       |
  416|       |    // temporal
  417|  4.02M|    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
  418|  4.02M|    if (rf->use_ref_frame_mvs) {
  ------------------
  |  Branch (418:9): [True: 716k, False: 3.31M]
  ------------------
  419|   716k|        const ptrdiff_t stride = rf->rp_stride;
  420|   716k|        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
  421|   716k|        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
  422|   716k|        const refmvs_temporal_block *rb = rbi;
  423|   716k|        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
  ------------------
  |  Branch (423:28): [True: 240k, False: 475k]
  |  Branch (423:56): [True: 241k, False: 474k]
  ------------------
  424|   716k|        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
  425|  2.54M|        for (int y = 0; y < h8; y += step_v) {
  ------------------
  |  Branch (425:25): [True: 1.83M, False: 716k]
  ------------------
  426|  7.06M|            for (int x = 0; x < w8; x+= step_h) {
  ------------------
  |  Branch (426:29): [True: 5.22M, False: 1.83M]
  ------------------
  427|  5.22M|                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
  428|  5.22M|                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
  ------------------
  |  Branch (428:40): [True: 717k, False: 4.51M]
  ------------------
  429|  5.22M|            }
  430|  1.83M|            rb += stride * step_v;
  431|  1.83M|        }
  432|   716k|        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
  ------------------
  |  Branch (432:13): [True: 628k, False: 87.2k]
  |  Branch (432:36): [True: 379k, False: 249k]
  ------------------
  433|   379k|            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
  434|   379k|            rb = &rbi[bh8 * stride];
  435|   379k|            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
  436|   379k|                                                    (by8 & ~7) + 8);
  437|   379k|            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
  ------------------
  |  Branch (437:17): [True: 264k, False: 115k]
  |  Branch (437:31): [True: 198k, False: 66.2k]
  ------------------
  438|   198k|                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
  439|   198k|                                       NULL, NULL);
  440|   198k|            }
  441|   379k|            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
  ------------------
  |  Branch (441:17): [True: 281k, False: 98.2k]
  ------------------
  442|   281k|                if (has_bottom) {
  ------------------
  |  Branch (442:21): [True: 196k, False: 85.1k]
  ------------------
  443|   196k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
  444|   196k|                                           NULL, NULL);
  445|   196k|                }
  446|   281k|                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
  ------------------
  |  Branch (446:21): [True: 281k, False: 261]
  ------------------
  447|   281k|                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
  448|   281k|                                           ref, NULL, NULL);
  449|   281k|                }
  450|   281k|            }
  451|   379k|        }
  452|   716k|    }
  453|  4.02M|    assert(*cnt <= 8);
  ------------------
  |  Branch (453:5): [True: 4.01M, False: 12.6k]
  ------------------
  454|       |
  455|       |    // top/left (which, confusingly, is part of "secondary" references)
  456|  4.01M|    int have_dummy_newmv_match;
  457|  4.01M|    if ((n_rows | n_cols) != ~0U) {
  ------------------
  |  Branch (457:9): [True: 2.66M, False: 1.35M]
  ------------------
  458|  2.66M|        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
  459|  2.66M|                              &have_dummy_newmv_match, &have_row_mvs);
  460|  2.66M|    }
  461|       |
  462|       |    // "secondary" (non-direct neighbour) top & left edges
  463|       |    // what is different about secondary is that everything is now in 8x8 resolution
  464|  12.0M|    for (int n = 2; n <= 3; n++) {
  ------------------
  |  Branch (464:21): [True: 8.00M, False: 4.01M]
  ------------------
  465|  8.00M|        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
  ------------------
  |  Branch (465:13): [True: 3.63M, False: 4.36M]
  |  Branch (465:38): [True: 2.83M, False: 802k]
  ------------------
  466|  2.83M|            n_rows += scan_row(mvstack, cnt, ref, gmv,
  467|  2.83M|                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
  468|  2.83M|                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (468:59): [True: 46.2k, False: 2.78M]
  ------------------
  469|  2.83M|                               &have_dummy_newmv_match, &have_row_mvs);
  470|  2.83M|        }
  471|       |
  472|  8.00M|        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
  ------------------
  |  Branch (472:13): [True: 3.95M, False: 4.04M]
  |  Branch (472:38): [True: 3.25M, False: 695k]
  ------------------
  473|  3.26M|            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
  474|  3.26M|                               bh4, h4, (bx4 - n * 2 + 1) | 1,
  475|  3.26M|                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
  ------------------
  |  Branch (475:50): [True: 291k, False: 2.96M]
  ------------------
  476|  3.26M|                               &have_dummy_newmv_match, &have_col_mvs);
  477|  3.26M|        }
  478|  8.00M|    }
  479|  4.01M|    assert(*cnt <= 8);
  ------------------
  |  Branch (479:5): [True: 4.01M, False: 106]
  ------------------
  480|       |
  481|  4.01M|    const int ref_match_count = have_col_mvs + have_row_mvs;
  482|       |
  483|       |    // context build-up
  484|  4.01M|    int refmv_ctx, newmv_ctx;
  485|  4.01M|    switch (nearest_match) {
  ------------------
  |  Branch (485:13): [True: 4.02M, False: 18.4E]
  ------------------
  486|   472k|    case 0:
  ------------------
  |  Branch (486:5): [True: 472k, False: 3.54M]
  ------------------
  487|   472k|        refmv_ctx = imin(2, ref_match_count);
  488|   472k|        newmv_ctx = ref_match_count > 0;
  489|   472k|        break;
  490|  1.86M|    case 1:
  ------------------
  |  Branch (490:5): [True: 1.86M, False: 2.14M]
  ------------------
  491|  1.86M|        refmv_ctx = imin(ref_match_count * 3, 4);
  492|  1.86M|        newmv_ctx = 3 - have_newmv;
  493|  1.86M|        break;
  494|  1.68M|    case 2:
  ------------------
  |  Branch (494:5): [True: 1.68M, False: 2.32M]
  ------------------
  495|  1.68M|        refmv_ctx = 5;
  496|  1.68M|        newmv_ctx = 5 - have_newmv;
  497|  1.68M|        break;
  498|  4.01M|    }
  499|       |
  500|       |    // sorting (nearest, then "secondary")
  501|  4.01M|    int len = nearest_cnt;
  502|  8.09M|    while (len) {
  ------------------
  |  Branch (502:12): [True: 4.08M, False: 4.01M]
  ------------------
  503|  4.08M|        int last = 0;
  504|  5.71M|        for (int n = 1; n < len; n++) {
  ------------------
  |  Branch (504:25): [True: 1.63M, False: 4.08M]
  ------------------
  505|  1.63M|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (505:17): [True: 675k, False: 958k]
  ------------------
  506|   675k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  507|   675k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   675k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 675k]
  |  |  ------------------
  ------------------
  508|   675k|                last = n;
  509|   675k|            }
  510|  1.63M|        }
  511|  4.08M|        len = last;
  512|  4.08M|    }
  513|  4.01M|    len = *cnt;
  514|  5.97M|    while (len > nearest_cnt) {
  ------------------
  |  Branch (514:12): [True: 1.95M, False: 4.01M]
  ------------------
  515|  1.95M|        int last = nearest_cnt;
  516|  3.76M|        for (int n = nearest_cnt + 1; n < len; n++) {
  ------------------
  |  Branch (516:39): [True: 1.80M, False: 1.95M]
  ------------------
  517|  1.80M|            if (mvstack[n - 1].weight < mvstack[n].weight) {
  ------------------
  |  Branch (517:17): [True: 646k, False: 1.15M]
  ------------------
  518|   646k|                EXCHANGE(mvstack[n - 1], mvstack[n]);
  ------------------
  |  |  506|   646k|#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (506:80): [Folded, False: 646k]
  |  |  ------------------
  ------------------
  519|   646k|#undef EXCHANGE
  520|   646k|                last = n;
  521|   646k|            }
  522|  1.80M|        }
  523|  1.95M|        len = last;
  524|  1.95M|    }
  525|       |
  526|  4.01M|    if (ref.ref[1] > 0) {
  ------------------
  |  Branch (526:9): [True: 342k, False: 3.67M]
  ------------------
  527|   342k|        if (*cnt < 2) {
  ------------------
  |  Branch (527:13): [True: 194k, False: 148k]
  ------------------
  528|   194k|            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
  529|   194k|            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
  530|   194k|            const int sz4 = imin(w4, h4);
  531|   194k|            refmvs_candidate *const same = &mvstack[*cnt];
  532|   194k|            int same_count[4] = { 0 };
  533|       |
  534|       |            // non-self references in top
  535|   328k|            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
  ------------------
  |  Branch (535:17): [True: 157k, False: 37.2k]
  |  Branch (535:48): [True: 170k, False: 157k]
  ------------------
  536|   170k|                const refmvs_block *const cand_b = &b_top[x];
  537|   170k|                add_compound_extended_candidate(same, same_count, cand_b,
  538|   170k|                                                sign0, sign1, ref, rf->sign_bias);
  539|   170k|                x += dav1d_block_dimensions[cand_b->bs][0];
  540|   170k|            }
  541|       |
  542|       |            // non-self references in left
  543|   390k|            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
  ------------------
  |  Branch (543:17): [True: 180k, False: 13.6k]
  |  Branch (543:48): [True: 209k, False: 180k]
  ------------------
  544|   209k|                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  545|   209k|                add_compound_extended_candidate(same, same_count, cand_b,
  546|   209k|                                                sign0, sign1, ref, rf->sign_bias);
  547|   209k|                y += dav1d_block_dimensions[cand_b->bs][1];
  548|   209k|            }
  549|       |
  550|   194k|            refmvs_candidate *const diff = &same[2];
  551|   194k|            const int *const diff_count = &same_count[2];
  552|       |
  553|       |            // merge together
  554|   583k|            for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (554:29): [True: 389k, False: 194k]
  ------------------
  555|   389k|                int m = same_count[n];
  556|       |
  557|   389k|                if (m >= 2) continue;
  ------------------
  |  Branch (557:21): [True: 119k, False: 269k]
  ------------------
  558|       |
  559|   269k|                const int l = diff_count[n];
  560|   269k|                if (l) {
  ------------------
  |  Branch (560:21): [True: 244k, False: 25.4k]
  ------------------
  561|   244k|                    same[m].mv.mv[n] = diff[0].mv.mv[n];
  562|   244k|                    if (++m == 2) continue;
  ------------------
  |  Branch (562:25): [True: 158k, False: 86.4k]
  ------------------
  563|  86.4k|                    if (l == 2) {
  ------------------
  |  Branch (563:25): [True: 67.9k, False: 18.4k]
  ------------------
  564|  67.9k|                        same[1].mv.mv[n] = diff[1].mv.mv[n];
  565|  67.9k|                        continue;
  566|  67.9k|                    }
  567|  86.4k|                }
  568|  57.2k|                do {
  569|  57.2k|                    same[m].mv.mv[n] = tgmv[n];
  570|  57.2k|                } while (++m < 2);
  ------------------
  |  Branch (570:26): [True: 13.2k, False: 43.9k]
  ------------------
  571|  43.9k|            }
  572|       |
  573|       |            // if the first extended was the same as the non-extended one,
  574|       |            // then replace it with the second extended one
  575|   194k|            int n = *cnt;
  576|   194k|            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
  ------------------
  |  Branch (576:17): [True: 112k, False: 81.4k]
  |  Branch (576:27): [True: 79.4k, False: 33.5k]
  ------------------
  577|  79.4k|                mvstack[1].mv = mvstack[2].mv;
  578|   276k|            do {
  579|   276k|                mvstack[n].weight = 2;
  580|   276k|            } while (++n < 2);
  ------------------
  |  Branch (580:22): [True: 81.8k, False: 194k]
  ------------------
  581|   194k|            *cnt = 2;
  582|   194k|        }
  583|       |
  584|       |        // clamping
  585|   342k|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  586|   342k|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  587|   342k|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  588|   342k|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  589|       |
  590|   342k|        const int n_refmvs = *cnt;
  591|   342k|        int n = 0;
  592|   868k|        do {
  593|   868k|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  594|   868k|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  595|   868k|            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
  596|   868k|            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
  597|   868k|        } while (++n < n_refmvs);
  ------------------
  |  Branch (597:18): [True: 526k, False: 342k]
  ------------------
  598|       |
  599|   342k|        switch (refmv_ctx >> 1) {
  ------------------
  |  Branch (599:17): [True: 343k, False: 18.4E]
  ------------------
  600|   118k|        case 0:
  ------------------
  |  Branch (600:9): [True: 118k, False: 224k]
  ------------------
  601|   118k|            *ctx = imin(newmv_ctx, 1);
  602|   118k|            break;
  603|   122k|        case 1:
  ------------------
  |  Branch (603:9): [True: 122k, False: 220k]
  ------------------
  604|   122k|            *ctx = 1 + imin(newmv_ctx, 3);
  605|   122k|            break;
  606|   103k|        case 2:
  ------------------
  |  Branch (606:9): [True: 103k, False: 239k]
  ------------------
  607|   103k|            *ctx = iclip(3 + newmv_ctx, 4, 7);
  608|   103k|            break;
  609|   342k|        }
  610|       |
  611|   343k|        return;
  612|  3.67M|    } else if (*cnt < 2 && ref.ref[0] > 0) {
  ------------------
  |  Branch (612:16): [True: 2.12M, False: 1.54M]
  |  Branch (612:28): [True: 1.82M, False: 301k]
  ------------------
  613|  1.82M|        const int sign = rf->sign_bias[ref.ref[0] - 1];
  614|  1.82M|        const int sz4 = imin(w4, h4);
  615|       |
  616|       |        // non-self references in top
  617|  3.45M|        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (617:13): [True: 1.70M, False: 118k]
  |  Branch (617:44): [True: 1.75M, False: 1.69M]
  |  Branch (617:55): [True: 1.74M, False: 6.72k]
  ------------------
  618|  1.74M|            const refmvs_block *const cand_b = &b_top[x];
  619|  1.74M|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  620|  1.74M|            x += dav1d_block_dimensions[cand_b->bs][0];
  621|  1.74M|        }
  622|       |
  623|       |        // non-self references in left
  624|  1.82M|        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
  ------------------
  |  Branch (624:13): [True: 918k, False: 907k]
  |  Branch (624:44): [True: 975k, False: 788k]
  |  Branch (624:55): [True: 845k, False: 129k]
  ------------------
  625|   845k|            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
  626|   845k|            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
  627|   845k|            y += dav1d_block_dimensions[cand_b->bs][1];
  628|   845k|        }
  629|  1.82M|    }
  630|  4.01M|    assert(*cnt <= 8);
  ------------------
  |  Branch (630:5): [True: 3.68M, False: 18.4E]
  ------------------
  631|       |
  632|       |    // clamping
  633|  3.68M|    int n_refmvs = *cnt;
  634|  3.68M|    if (n_refmvs) {
  ------------------
  |  Branch (634:9): [True: 3.50M, False: 175k]
  ------------------
  635|  3.50M|        const int left = -(bx4 + bw4 + 4) * 4 * 8;
  636|  3.50M|        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
  637|  3.50M|        const int top = -(by4 + bh4 + 4) * 4 * 8;
  638|  3.50M|        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
  639|       |
  640|  3.50M|        int n = 0;
  641|  7.53M|        do {
  642|  7.53M|            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
  643|  7.53M|            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
  644|  7.53M|        } while (++n < n_refmvs);
  ------------------
  |  Branch (644:18): [True: 4.02M, False: 3.50M]
  ------------------
  645|  3.50M|    }
  646|       |
  647|  5.72M|    for (int n = *cnt; n < 2; n++)
  ------------------
  |  Branch (647:24): [True: 2.04M, False: 3.68M]
  ------------------
  648|  2.04M|        mvstack[n].mv.mv[0] = tgmv[0];
  649|       |
  650|  3.68M|    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
  651|  3.68M|}
dav1d_refmvs_tile_sbrow_init:
  657|  2.51M|{
  658|  2.51M|    if (rf->n_tile_threads == 1) tile_row_idx = 0;
  ------------------
  |  Branch (658:9): [True: 0, False: 2.51M]
  ------------------
  659|  2.51M|    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
  660|  2.51M|    const ptrdiff_t r_stride = rf->rp_stride * 2;
  661|  2.51M|    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
  ------------------
  |  Branch (661:33): [True: 2.51M, False: 46]
  |  Branch (661:60): [True: 1.20M, False: 1.30M]
  ------------------
  662|  1.30M|        35 * 2 * rf->n_blocks : 0;
  663|  2.51M|    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
  664|  2.51M|    const int sbsz = rf->sbsz;
  665|  2.51M|    const int off = (sbsz * sby) & 16;
  666|  59.0M|    for (int i = 0; i < sbsz; i++, r += r_stride)
  ------------------
  |  Branch (666:21): [True: 56.5M, False: 2.51M]
  ------------------
  667|  56.5M|        rt->r[off + 5 + i] = r;
  668|  2.51M|    rt->r[off + 0] = r;
  669|  2.51M|    r += r_stride;
  670|  2.51M|    rt->r[off + 1] = NULL;
  671|  2.51M|    rt->r[off + 2] = r;
  672|  2.51M|    r += r_stride;
  673|  2.51M|    rt->r[off + 3] = NULL;
  674|  2.51M|    rt->r[off + 4] = r;
  675|  2.51M|    if (sby & 1) {
  ------------------
  |  Branch (675:9): [True: 1.10M, False: 1.40M]
  ------------------
  676|  1.10M|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  677|  1.10M|        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
  ------------------
  |  |  676|  1.10M|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 1.10M]
  |  |  ------------------
  ------------------
  678|  1.10M|        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
  ------------------
  |  |  676|  1.10M|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 1.10M]
  |  |  ------------------
  ------------------
  679|  1.10M|        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
  ------------------
  |  |  676|  1.10M|#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
  |  |  ------------------
  |  |  |  Branch (676:75): [Folded, False: 1.10M]
  |  |  ------------------
  ------------------
  680|  1.10M|#undef EXCHANGE
  681|  1.10M|    }
  682|       |
  683|  2.51M|    rt->rf = rf;
  684|  2.51M|    rt->tile_row.start = tile_row_start4;
  685|  2.51M|    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
  686|  2.51M|    rt->tile_col.start = tile_col_start4;
  687|  2.51M|    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
  688|  2.51M|}
dav1d_refmvs_init_frame:
  807|   255k|{
  808|   255k|    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
  809|   255k|    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
  ------------------
  |  Branch (809:29): [True: 255k, False: 2]
  ------------------
  810|   255k|    const int n_blocks = rp_stride * n_tile_rows;
  811|       |
  812|   255k|    rf->sbsz = 16 << seq_hdr->sb128;
  813|   255k|    rf->frm_hdr = frm_hdr;
  814|   255k|    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
  815|   255k|    rf->ih8 = (frm_hdr->height + 7) >> 3;
  816|   255k|    rf->iw4 = rf->iw8 << 1;
  817|   255k|    rf->ih4 = rf->ih8 << 1;
  818|   255k|    rf->rp = rp;
  819|   255k|    rf->rp_stride = rp_stride;
  820|   255k|    rf->n_tile_threads = n_tile_threads;
  821|   255k|    rf->n_frame_threads = n_frame_threads;
  822|       |
  823|   255k|    if (n_blocks != rf->n_blocks) {
  ------------------
  |  Branch (823:9): [True: 17.2k, False: 237k]
  ------------------
  824|  17.2k|        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
  825|  17.2k|        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
  826|       |        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
  827|       |         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
  828|  17.2k|        dav1d_free_aligned(rf->r);
  ------------------
  |  |  136|  17.2k|#define dav1d_free_aligned(ptr) dav1d_free_aligned_internal(ptr)
  ------------------
  829|  17.2k|        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
  ------------------
  |  |  134|  17.2k|#define dav1d_alloc_aligned(type, sz, align) dav1d_alloc_aligned_internal(sz, align)
  ------------------
  830|  17.2k|        if (!rf->r) {
  ------------------
  |  Branch (830:13): [True: 0, False: 17.2k]
  ------------------
  831|      0|            rf->n_blocks = 0;
  832|      0|            return DAV1D_ERR(ENOMEM);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  833|      0|        }
  834|       |
  835|  17.2k|        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
  836|  17.2k|        rf->n_blocks = n_blocks;
  837|  17.2k|    }
  838|       |
  839|   255k|    const int poc = frm_hdr->frame_offset;
  840|  2.03M|    for (int i = 0; i < 7; i++) {
  ------------------
  |  Branch (840:21): [True: 1.78M, False: 255k]
  ------------------
  841|  1.78M|        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
  842|  1.78M|                                          ref_poc[i], poc);
  843|  1.78M|        rf->sign_bias[i] = poc_diff > 0;
  844|  1.78M|        rf->mfmv_sign[i] = poc_diff < 0;
  845|  1.78M|        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
  846|  1.78M|                                            poc, ref_poc[i]), -31, 31);
  847|  1.78M|    }
  848|       |
  849|       |    // temporal MV setup
  850|   255k|    rf->n_mfmvs = 0;
  851|   255k|    rf->rp_ref = rp_ref;
  852|   255k|    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
  ------------------
  |  Branch (852:9): [True: 61.5k, False: 193k]
  |  Branch (852:39): [True: 61.5k, False: 0]
  ------------------
  853|  61.5k|        int total = 2;
  854|  61.5k|        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
  ------------------
  |  Branch (854:13): [True: 52.1k, False: 9.42k]
  |  Branch (854:26): [True: 38.4k, False: 13.6k]
  ------------------
  855|  38.4k|            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
  856|  38.4k|            total = 3;
  857|  38.4k|        }
  858|  61.5k|        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
  ------------------
  |  Branch (858:13): [True: 52.0k, False: 9.47k]
  |  Branch (858:26): [True: 26.2k, False: 25.8k]
  ------------------
  859|  52.0k|                                      frm_hdr->frame_offset) > 0)
  860|  26.2k|        {
  861|  26.2k|            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
  862|  26.2k|        }
  863|  61.5k|        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
  ------------------
  |  Branch (863:13): [True: 48.9k, False: 12.6k]
  |  Branch (863:26): [True: 25.3k, False: 23.5k]
  ------------------
  864|  48.9k|                                      frm_hdr->frame_offset) > 0)
  865|  25.3k|        {
  866|  25.3k|            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
  867|  25.3k|        }
  868|  61.5k|        if (rf->n_mfmvs < total && rp_ref[6] &&
  ------------------
  |  Branch (868:13): [True: 41.3k, False: 20.2k]
  |  Branch (868:36): [True: 28.5k, False: 12.8k]
  ------------------
  869|  28.5k|            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
  ------------------
  |  Branch (869:13): [True: 20.3k, False: 8.13k]
  ------------------
  870|  28.5k|                         frm_hdr->frame_offset) > 0)
  871|  20.3k|        {
  872|  20.3k|            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
  873|  20.3k|        }
  874|  61.5k|        if (rf->n_mfmvs < total && rp_ref[1])
  ------------------
  |  Branch (874:13): [True: 30.5k, False: 31.0k]
  |  Branch (874:36): [True: 22.0k, False: 8.44k]
  ------------------
  875|  22.0k|            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
  876|       |
  877|   193k|        for (int n = 0; n < rf->n_mfmvs; n++) {
  ------------------
  |  Branch (877:25): [True: 132k, False: 61.5k]
  ------------------
  878|   132k|            const int rpoc = ref_poc[rf->mfmv_ref[n]];
  879|   132k|            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
  880|   132k|                                           rpoc, frm_hdr->frame_offset);
  881|   132k|            if (abs(diff1) > 31) {
  ------------------
  |  Branch (881:17): [True: 433, False: 131k]
  ------------------
  882|    433|                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
  ------------------
  |  |   41|    433|#define INVALID_REF2CUR (-32)
  ------------------
  883|   131k|            } else {
  884|   131k|                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
  ------------------
  |  Branch (884:39): [True: 60.2k, False: 71.7k]
  ------------------
  885|  1.05M|                for (int m = 0; m < 7; m++) {
  ------------------
  |  Branch (885:33): [True: 922k, False: 131k]
  ------------------
  886|   922k|                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
  887|   922k|                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
  888|   922k|                                                   rpoc, rrpoc);
  889|       |                    // unsigned comparison also catches the < 0 case
  890|   922k|                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
  ------------------
  |  Branch (890:46): [True: 277k, False: 645k]
  ------------------
  891|   922k|                }
  892|   131k|            }
  893|   132k|        }
  894|  61.5k|    }
  895|   255k|    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
  896|       |
  897|   255k|    return 0;
  898|   255k|}
dav1d_refmvs_dsp_init:
  921|  10.2k|{
  922|  10.2k|    c->load_tmvs = load_tmvs_c;
  923|  10.2k|    c->save_tmvs = save_tmvs_c;
  924|  10.2k|    c->splat_mv = splat_mv_c;
  925|       |
  926|  10.2k|#if HAVE_ASM
  927|       |#if ARCH_AARCH64 || ARCH_ARM
  928|       |    refmvs_dsp_init_arm(c);
  929|       |#elif ARCH_LOONGARCH64
  930|       |    refmvs_dsp_init_loongarch(c);
  931|       |#elif ARCH_X86
  932|       |    refmvs_dsp_init_x86(c);
  933|  10.2k|#endif
  934|  10.2k|#endif
  935|  10.2k|}
refmvs.c:scan_row:
  102|  6.42M|{
  103|  6.42M|    const refmvs_block *cand_b = b;
  104|  6.42M|    const enum BlockSize first_cand_bs = cand_b->bs;
  105|  6.42M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  106|  6.42M|    int cand_bw4 = first_cand_b_dim[0];
  107|  6.42M|    int len = imax(step, imin(bw4, cand_bw4));
  108|       |
  109|  6.42M|    if (bw4 <= cand_bw4) {
  ------------------
  |  Branch (109:9): [True: 5.63M, False: 788k]
  ------------------
  110|       |        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
  111|       |        // position of the first block has to be odd already, i.e. not just
  112|       |        // for row_offset=-3/-5
  113|       |        // FIXME why can this not be cand_bw4?
  114|  5.63M|        const int weight = bw4 == 1 ? 2 :
  ------------------
  |  Branch (114:28): [True: 1.37M, False: 4.26M]
  ------------------
  115|  5.63M|                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
  116|  5.63M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  117|  5.63M|                              have_newmv_match, have_refmv_match);
  118|  5.63M|        return weight >> 1;
  119|  5.63M|    }
  120|       |
  121|  1.62M|    for (int x = 0;;) {
  122|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  123|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  124|       |        // the weight here
  125|  1.62M|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  126|  1.62M|                              have_newmv_match, have_refmv_match);
  127|  1.62M|        x += len;
  128|  1.62M|        if (x >= w4) return 1;
  ------------------
  |  Branch (128:13): [True: 786k, False: 834k]
  ------------------
  129|   834k|        cand_b = &b[x];
  130|   834k|        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
  131|   834k|        assert(cand_bw4 < bw4);
  ------------------
  |  Branch (131:9): [True: 835k, False: 18.4E]
  ------------------
  132|   835k|        len = imax(step, cand_bw4);
  133|   835k|    }
  134|   788k|}
refmvs.c:scan_col:
  141|  6.29M|{
  142|  6.29M|    const refmvs_block *cand_b = &b[0][bx4];
  143|  6.29M|    const enum BlockSize first_cand_bs = cand_b->bs;
  144|  6.29M|    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
  145|  6.29M|    int cand_bh4 = first_cand_b_dim[1];
  146|  6.29M|    int len = imax(step, imin(bh4, cand_bh4));
  147|       |
  148|  6.29M|    if (bh4 <= cand_bh4) {
  ------------------
  |  Branch (148:9): [True: 5.28M, False: 1.01M]
  ------------------
  149|       |        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
  150|       |        // position of the first block has to be odd already, i.e. not just
  151|       |        // for col_offset=-3/-5
  152|       |        // FIXME why can this not be cand_bh4?
  153|  5.28M|        const int weight = bh4 == 1 ? 2 :
  ------------------
  |  Branch (153:28): [True: 1.60M, False: 3.67M]
  ------------------
  154|  5.28M|                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
  155|  5.28M|        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
  156|  5.28M|                            have_newmv_match, have_refmv_match);
  157|  5.28M|        return weight >> 1;
  158|  5.28M|    }
  159|       |
  160|  1.96M|    for (int y = 0;;) {
  161|       |        // FIXME if we overhang above, we could fill a bitmask so we don't have
  162|       |        // to repeat the add_spatial_candidate() for the next row, but just increase
  163|       |        // the weight here
  164|  1.96M|        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
  165|  1.96M|                              have_newmv_match, have_refmv_match);
  166|  1.96M|        y += len;
  167|  1.96M|        if (y >= h4) return 1;
  ------------------
  |  Branch (167:13): [True: 1.02M, False: 940k]
  ------------------
  168|   940k|        cand_b = &b[y][bx4];
  169|   940k|        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
  170|   940k|        assert(cand_bh4 < bh4);
  ------------------
  |  Branch (170:9): [True: 942k, False: 18.4E]
  ------------------
  171|   942k|        len = imax(step, cand_bh4);
  172|   942k|    }
  173|  1.01M|}
refmvs.c:add_spatial_candidate:
   46|  18.4M|{
   47|  18.4M|    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
  ------------------
  |  |   40|  18.4M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (47:9): [True: 2.67M, False: 15.7M]
  ------------------
   48|       |
   49|  15.7M|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (49:9): [True: 14.0M, False: 1.70M]
  ------------------
   50|  18.0M|        for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (50:25): [True: 16.1M, False: 1.86M]
  ------------------
   51|  16.1M|            if (b->ref.ref[n] == ref.ref[0]) {
  ------------------
  |  Branch (51:17): [True: 12.2M, False: 3.94M]
  ------------------
   52|  12.2M|                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
  ------------------
  |  |   40|  2.96M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (52:37): [True: 2.96M, False: 9.26M]
  |  Branch (52:52): [True: 1.04M, False: 1.91M]
  ------------------
   53|  11.1M|                                   gmv[0] : b->mv.mv[n];
   54|       |
   55|  12.2M|                *have_refmv_match = 1;
   56|  12.2M|                *have_newmv_match |= b->mf >> 1;
   57|       |
   58|  12.2M|                const int last = *cnt;
   59|  21.0M|                for (int m = 0; m < last; m++)
  ------------------
  |  Branch (59:33): [True: 14.4M, False: 6.57M]
  ------------------
   60|  14.4M|                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
  ------------------
  |  Branch (60:25): [True: 5.64M, False: 8.83M]
  ------------------
   61|  5.64M|                        mvstack[m].weight += weight;
   62|  5.64M|                        return;
   63|  5.64M|                    }
   64|       |
   65|  6.62M|                if (last < 8) {
  ------------------
  |  Branch (65:21): [True: 6.62M, False: 18.4E]
  ------------------
   66|  6.62M|                    mvstack[last].mv.mv[0] = cand_mv;
   67|  6.62M|                    mvstack[last].weight = weight;
   68|  6.62M|                    *cnt = last + 1;
   69|  6.62M|                }
   70|  6.57M|                return;
   71|  12.2M|            }
   72|  16.1M|        }
   73|  14.0M|    } else if (b->ref.pair == ref.pair) {
  ------------------
  |  Branch (73:16): [True: 696k, False: 1.01M]
  ------------------
   74|   696k|        const refmvs_mvpair cand_mv = { .mv = {
   75|   696k|            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
  ------------------
  |  |   40|  52.1k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (75:20): [True: 52.1k, False: 644k]
  |  Branch (75:35): [True: 32.5k, False: 19.5k]
  ------------------
   76|   696k|            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
  ------------------
  |  |   40|  52.1k|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (76:20): [True: 52.1k, False: 644k]
  |  Branch (76:35): [True: 29.4k, False: 22.7k]
  ------------------
   77|   696k|        }};
   78|       |
   79|   696k|        *have_refmv_match = 1;
   80|   696k|        *have_newmv_match |= b->mf >> 1;
   81|       |
   82|   696k|        const int last = *cnt;
   83|  1.12M|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (83:25): [True: 724k, False: 397k]
  ------------------
   84|   724k|            if (mvstack[n].mv.n == cand_mv.n) {
  ------------------
  |  Branch (84:17): [True: 299k, False: 424k]
  ------------------
   85|   299k|                mvstack[n].weight += weight;
   86|   299k|                return;
   87|   299k|            }
   88|       |
   89|   397k|        if (last < 8) {
  ------------------
  |  Branch (89:13): [True: 395k, False: 1.93k]
  ------------------
   90|   395k|            mvstack[last].mv = cand_mv;
   91|   395k|            mvstack[last].weight = weight;
   92|   395k|            *cnt = last + 1;
   93|   395k|        }
   94|   397k|    }
   95|  15.7M|}
refmvs.c:add_temporal_candidate:
  198|  5.87M|{
  199|  5.87M|    if (rb->mv.n == INVALID_MV) return;
  ------------------
  |  |   40|  5.87M|#define INVALID_MV 0x80008000
  ------------------
  |  Branch (199:9): [True: 3.33M, False: 2.53M]
  ------------------
  200|       |
  201|  2.53M|    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
  202|  2.53M|    fix_mv_precision(rf->frm_hdr, &mv);
  203|       |
  204|  2.53M|    const int last = *cnt;
  205|  2.53M|    if (ref.ref[1] == -1) {
  ------------------
  |  Branch (205:9): [True: 1.75M, False: 778k]
  ------------------
  206|  1.75M|        if (globalmv_ctx)
  ------------------
  |  Branch (206:13): [True: 337k, False: 1.41M]
  ------------------
  207|   337k|            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
  208|       |
  209|  4.54M|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (209:25): [True: 4.04M, False: 505k]
  ------------------
  210|  4.04M|            if (mvstack[n].mv.mv[0].n == mv.n) {
  ------------------
  |  Branch (210:17): [True: 1.25M, False: 2.79M]
  ------------------
  211|  1.25M|                mvstack[n].weight += 2;
  212|  1.25M|                return;
  213|  1.25M|            }
  214|   520k|        if (last < 8) {
  ------------------
  |  Branch (214:13): [True: 520k, False: 18.4E]
  ------------------
  215|   520k|            mvstack[last].mv.mv[0] = mv;
  216|   520k|            mvstack[last].weight = 2;
  217|   520k|            *cnt = last + 1;
  218|   520k|        }
  219|   778k|    } else {
  220|   778k|        refmvs_mvpair mvp = { .mv = {
  221|   778k|            [0] = mv,
  222|   778k|            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
  223|   778k|        }};
  224|   778k|        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
  225|       |
  226|  1.71M|        for (int n = 0; n < last; n++)
  ------------------
  |  Branch (226:25): [True: 1.53M, False: 180k]
  ------------------
  227|  1.53M|            if (mvstack[n].mv.n == mvp.n) {
  ------------------
  |  Branch (227:17): [True: 598k, False: 938k]
  ------------------
  228|   598k|                mvstack[n].weight += 2;
  229|   598k|                return;
  230|   598k|            }
  231|   195k|        if (last < 8) {
  ------------------
  |  Branch (231:13): [True: 195k, False: 18.4E]
  ------------------
  232|   195k|            mvstack[last].mv = mvp;
  233|   195k|            mvstack[last].weight = 2;
  234|   195k|            *cnt = last + 1;
  235|   195k|        }
  236|   180k|    }
  237|  2.53M|}
refmvs.c:mv_projection:
  175|  3.33M|static inline union mv mv_projection(const union mv mv, const int num, const int den) {
  176|  3.33M|    static const uint16_t div_mult[32] = {
  177|  3.33M|           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
  178|  3.33M|        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
  179|  3.33M|        1024,   963,  910,  862,  819,  780,  744,  712,
  180|  3.33M|         682,   655,  630,  606,  585,  564,  546,  528
  181|  3.33M|    };
  182|  3.33M|    assert(den > 0 && den < 32);
  ------------------
  |  Branch (182:5): [True: 3.33M, False: 18.4E]
  |  Branch (182:5): [True: 3.33M, False: 18.4E]
  ------------------
  183|  3.33M|    assert(num > -32 && num < 32);
  ------------------
  |  Branch (183:5): [True: 3.33M, False: 18.4E]
  |  Branch (183:5): [True: 3.33M, False: 18.4E]
  ------------------
  184|  3.33M|    const int frac = num * div_mult[den];
  185|  3.33M|    const int y = mv.y * frac, x = mv.x * frac;
  186|       |    // Round and clip according to AV1 spec section 7.9.3
  187|  3.33M|    return (union mv) { // 0x3fff == (1 << 14) - 1
  188|  3.33M|        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
  189|  3.33M|        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
  190|  3.33M|    };
  191|  3.33M|}
refmvs.c:add_compound_extended_candidate:
  245|   379k|{
  246|   379k|    refmvs_candidate *const diff = &same[2];
  247|   379k|    int *const diff_count = &same_count[2];
  248|       |
  249|   955k|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (249:21): [True: 736k, False: 218k]
  ------------------
  250|   736k|        const int cand_ref = cand_b->ref.ref[n];
  251|       |
  252|   736k|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (252:13): [True: 161k, False: 575k]
  ------------------
  253|       |
  254|   575k|        mv cand_mv = cand_b->mv.mv[n];
  255|   575k|        if (cand_ref == ref.ref[0]) {
  ------------------
  |  Branch (255:13): [True: 221k, False: 354k]
  ------------------
  256|   221k|            if (same_count[0] < 2)
  ------------------
  |  Branch (256:17): [True: 211k, False: 9.22k]
  ------------------
  257|   211k|                same[same_count[0]++].mv.mv[0] = cand_mv;
  258|   221k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (258:17): [True: 187k, False: 33.4k]
  ------------------
  259|   187k|                if (sign1 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (259:21): [True: 22.5k, False: 165k]
  ------------------
  260|  22.5k|                    cand_mv.y = -cand_mv.y;
  261|  22.5k|                    cand_mv.x = -cand_mv.x;
  262|  22.5k|                }
  263|   187k|                diff[diff_count[1]++].mv.mv[1] = cand_mv;
  264|   187k|            }
  265|   354k|        } else if (cand_ref == ref.ref[1]) {
  ------------------
  |  Branch (265:20): [True: 202k, False: 151k]
  ------------------
  266|   202k|            if (same_count[1] < 2)
  ------------------
  |  Branch (266:17): [True: 196k, False: 5.75k]
  ------------------
  267|   196k|                same[same_count[1]++].mv.mv[1] = cand_mv;
  268|   202k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (268:17): [True: 167k, False: 34.4k]
  ------------------
  269|   167k|                if (sign0 ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (269:21): [True: 23.2k, False: 144k]
  ------------------
  270|  23.2k|                    cand_mv.y = -cand_mv.y;
  271|  23.2k|                    cand_mv.x = -cand_mv.x;
  272|  23.2k|                }
  273|   167k|                diff[diff_count[0]++].mv.mv[0] = cand_mv;
  274|   167k|            }
  275|   202k|        } else {
  276|   151k|            mv i_cand_mv = (union mv) {
  277|   151k|                .x = -cand_mv.x,
  278|   151k|                .y = -cand_mv.y
  279|   151k|            };
  280|       |
  281|   151k|            if (diff_count[0] < 2) {
  ------------------
  |  Branch (281:17): [True: 119k, False: 32.0k]
  ------------------
  282|   119k|                diff[diff_count[0]++].mv.mv[0] =
  283|   119k|                    sign0 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (283:21): [True: 2.21k, False: 117k]
  ------------------
  284|   117k|                    i_cand_mv : cand_mv;
  285|   119k|            }
  286|       |
  287|   151k|            if (diff_count[1] < 2) {
  ------------------
  |  Branch (287:17): [True: 111k, False: 40.4k]
  ------------------
  288|   111k|                diff[diff_count[1]++].mv.mv[1] =
  289|   111k|                    sign1 ^ sign_bias[cand_ref - 1] ?
  ------------------
  |  Branch (289:21): [True: 5.29k, False: 105k]
  ------------------
  290|   105k|                    i_cand_mv : cand_mv;
  291|   111k|            }
  292|   151k|        }
  293|   575k|    }
  294|   379k|}
refmvs.c:add_single_extended_candidate:
  299|  2.59M|{
  300|  5.04M|    for (int n = 0; n < 2; n++) {
  ------------------
  |  Branch (300:21): [True: 4.98M, False: 61.6k]
  ------------------
  301|  4.98M|        const int cand_ref = cand_b->ref.ref[n];
  302|       |
  303|  4.98M|        if (cand_ref <= 0) break;
  ------------------
  |  Branch (303:13): [True: 2.52M, False: 2.45M]
  ------------------
  304|       |        // we need to continue even if cand_ref == ref.ref[0], since
  305|       |        // the candidate could have been added as a globalmv variant,
  306|       |        // which changes the value
  307|       |        // FIXME if scan_{row,col}() returned a mask for the nearest
  308|       |        // edge, we could skip the appropriate ones here
  309|       |
  310|  2.45M|        mv cand_mv = cand_b->mv.mv[n];
  311|  2.45M|        if (sign ^ sign_bias[cand_ref - 1]) {
  ------------------
  |  Branch (311:13): [True: 19.0k, False: 2.43M]
  ------------------
  312|  19.0k|            cand_mv.y = -cand_mv.y;
  313|  19.0k|            cand_mv.x = -cand_mv.x;
  314|  19.0k|        }
  315|       |
  316|  2.45M|        int m;
  317|  2.45M|        const int last = *cnt;
  318|  2.73M|        for (m = 0; m < last; m++)
  ------------------
  |  Branch (318:21): [True: 2.39M, False: 335k]
  ------------------
  319|  2.39M|            if (cand_mv.n == mvstack[m].mv.mv[0].n)
  ------------------
  |  Branch (319:17): [True: 2.12M, False: 277k]
  ------------------
  320|  2.12M|                break;
  321|  2.45M|        if (m == last) {
  ------------------
  |  Branch (321:13): [True: 339k, False: 2.11M]
  ------------------
  322|   339k|            mvstack[m].mv.mv[0] = cand_mv;
  323|   339k|            mvstack[m].weight = 2; // "minimal"
  324|   339k|            *cnt = last + 1;
  325|   339k|        }
  326|  2.45M|    }
  327|  2.59M|}

decode.c:dav1d_refmvs_save_tmvs:
  145|   589k|{
  146|   589k|    const refmvs_frame *const rf = rt->rf;
  147|       |
  148|   589k|    assert(row_start8 >= 0);
  ------------------
  |  Branch (148:5): [True: 589k, False: 18.4E]
  ------------------
  149|   589k|    assert((unsigned) (row_end8 - row_start8) <= 16U);
  ------------------
  |  Branch (149:5): [True: 589k, False: 18.4E]
  ------------------
  150|   589k|    row_end8 = imin(row_end8, rf->ih8);
  151|   589k|    col_end8 = imin(col_end8, rf->iw8);
  152|       |
  153|   589k|    const ptrdiff_t stride = rf->rp_stride;
  154|   589k|    const uint8_t *const ref_sign = rf->mfmv_sign;
  155|   589k|    refmvs_temporal_block *rp = &rf->rp[row_start8 * stride];
  156|       |
  157|   589k|    dsp->save_tmvs(rp, stride, rt->r + 6, ref_sign,
  158|   589k|                   col_end8, row_end8, col_start8, row_start8);
  159|   589k|}

dav1d_init_last_nonzero_col_from_eob_tables:
  350|  3.39k|COLD void dav1d_init_last_nonzero_col_from_eob_tables(void) {
  351|       |    static pthread_once_t initted = PTHREAD_ONCE_INIT;
  352|  3.39k|    pthread_once(&initted, init_internal);
  353|  3.39k|}
scan.c:init_internal:
  333|      1|static COLD void init_internal(void) {
  334|      1|    init_tbl(last_nonzero_col_from_eob_4x4,   scan_4x4,    4,  4);
  335|      1|    init_tbl(last_nonzero_col_from_eob_8x8,   scan_8x8,    8,  8);
  336|      1|    init_tbl(last_nonzero_col_from_eob_16x16, scan_16x16, 16, 16);
  337|      1|    init_tbl(last_nonzero_col_from_eob_32x32, scan_32x32, 32, 32);
  338|      1|    init_tbl(last_nonzero_col_from_eob_4x8,   scan_4x8,    4,  8);
  339|      1|    init_tbl(last_nonzero_col_from_eob_8x4,   scan_8x4,    8,  4);
  340|      1|    init_tbl(last_nonzero_col_from_eob_8x16,  scan_8x16,   8, 16);
  341|      1|    init_tbl(last_nonzero_col_from_eob_16x8,  scan_16x8,  16,  8);
  342|      1|    init_tbl(last_nonzero_col_from_eob_16x32, scan_16x32, 16, 32);
  343|      1|    init_tbl(last_nonzero_col_from_eob_32x16, scan_32x16, 32, 16);
  344|      1|    init_tbl(last_nonzero_col_from_eob_4x16,  scan_4x16,   4, 16);
  345|      1|    init_tbl(last_nonzero_col_from_eob_16x4,  scan_16x4,  16,  4);
  346|      1|    init_tbl(last_nonzero_col_from_eob_8x32,  scan_8x32,   8, 32);
  347|      1|    init_tbl(last_nonzero_col_from_eob_32x8,  scan_32x8,  32,  8);
  348|      1|}
scan.c:init_tbl:
  321|     14|{
  322|     14|    int max_col = 0;
  323|    218|    for (int y = 0, n = 0; y < h; y++) {
  ------------------
  |  Branch (323:28): [True: 204, False: 14]
  ------------------
  324|  3.54k|        for (int x = 0; x < w; x++, n++) {
  ------------------
  |  Branch (324:25): [True: 3.34k, False: 204]
  ------------------
  325|  3.34k|            const int rc = scan[n];
  326|  3.34k|            const int rcx = rc & (h - 1);
  327|  3.34k|            max_col = imax(max_col, rcx);
  328|  3.34k|            last_nonzero_col_from_eob[n] = max_col;
  329|  3.34k|        }
  330|    204|    }
  331|     14|}

thread_task.c:dav1d_set_thread_name:
  152|  40.8k|static inline void dav1d_set_thread_name(const char *const name) {
  153|       |    prctl(PR_SET_NAME, name);
  154|  40.8k|}

dav1d_task_create_tile_sbrow:
  270|   463k|{
  271|   463k|    Dav1dTask *tasks = f->task_thread.tile_tasks[0];
  272|   463k|    const int uses_2pass = f->c->n_fc > 1;
  273|   463k|    const int n_tasks_per_pass = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
  274|   463k|    const int n_tasks = n_tasks_per_pass * (1 + uses_2pass);
  275|   463k|    if (pass < 2) {
  ------------------
  |  Branch (275:9): [True: 231k, False: 232k]
  ------------------
  276|   231k|        if (n_tasks > f->task_thread.num_tile_tasks) {
  ------------------
  |  Branch (276:13): [True: 15.8k, False: 215k]
  ------------------
  277|  15.8k|            const size_t size = sizeof(Dav1dTask) * n_tasks;
  278|  15.8k|            tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tile_tasks[0], size);
  ------------------
  |  |  133|  15.8k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
  279|  15.8k|            if (!tasks) return -1;
  ------------------
  |  Branch (279:17): [True: 0, False: 15.8k]
  ------------------
  280|  15.8k|            memset(tasks, 0, size);
  281|  15.8k|            f->task_thread.tile_tasks[0] = tasks;
  282|  15.8k|            f->task_thread.num_tile_tasks = n_tasks;
  283|  15.8k|        }
  284|   231k|        f->task_thread.tile_tasks[1] = tasks + n_tasks_per_pass;
  285|   231k|    }
  286|   463k|    assert(n_tasks <= f->task_thread.num_tile_tasks);
  ------------------
  |  Branch (286:5): [True: 463k, False: 47]
  ------------------
  287|       |
  288|   463k|    Dav1dTask *pf_t;
  289|   463k|    if (create_filter_sbrow(f, pass, &pf_t))
  ------------------
  |  Branch (289:9): [True: 0, False: 463k]
  ------------------
  290|      0|        return -1;
  291|       |
  292|   463k|    Dav1dTask *const p1_tasks = f->task_thread.tile_tasks[1];
  293|   463k|    Dav1dTask *prev_t = NULL;
  294|   463k|    if (pass == 2) {
  ------------------
  |  Branch (294:9): [True: 232k, False: 231k]
  ------------------
  295|   232k|        prev_t = &p1_tasks[n_tasks_per_pass - 1];
  296|       |        // PF task is scheduled after the last sby=0 TILE task
  297|   232k|        if (f->frame_hdr->tiling.rows == 1)
  ------------------
  |  Branch (297:13): [True: 229k, False: 3.01k]
  ------------------
  298|   229k|            prev_t = prev_t->next;
  299|   232k|    }
  300|   463k|    tasks += (pass & 1) * n_tasks_per_pass;
  301|   952k|    for (int tile_idx = 0; tile_idx < n_tasks_per_pass; tile_idx++) {
  ------------------
  |  Branch (301:28): [True: 488k, False: 463k]
  ------------------
  302|   488k|        Dav1dTileState *const ts = &f->ts[tile_idx];
  303|   488k|        Dav1dTask *t = &tasks[tile_idx];
  304|   488k|        t->sby = ts->tiling.row_start >> f->sb_shift;
  305|   488k|        if (pf_t && t->sby) {
  ------------------
  |  Branch (305:13): [True: 478k, False: 10.0k]
  |  Branch (305:21): [True: 6.03k, False: 472k]
  ------------------
  306|  6.03k|            prev_t->next = pf_t;
  307|  6.03k|            prev_t = pf_t;
  308|  6.03k|            pf_t = NULL;
  309|  6.03k|        }
  310|   488k|        t->recon_progress = 0;
  311|   488k|        t->deblock_progress = 0;
  312|   488k|        t->deps_skip = 0;
  313|   488k|        t->type = pass != 1 ? DAV1D_TASK_TYPE_TILE_RECONSTRUCTION :
  ------------------
  |  Branch (313:19): [True: 244k, False: 244k]
  ------------------
  314|   488k|                              DAV1D_TASK_TYPE_TILE_ENTROPY;
  315|   488k|        t->frame_idx = (int)(f - f->c->fc);
  316|   488k|        if (prev_t) prev_t->next = t;
  ------------------
  |  Branch (316:13): [True: 256k, False: 232k]
  ------------------
  317|   488k|        prev_t = t;
  318|   488k|    }
  319|   463k|    if (pf_t) {
  ------------------
  |  Branch (319:9): [True: 458k, False: 5.63k]
  ------------------
  320|   458k|        prev_t->next = pf_t;
  321|   458k|        prev_t = pf_t;
  322|   458k|    }
  323|   463k|    prev_t->next = NULL;
  324|       |
  325|   463k|    atomic_store(&f->task_thread.done[pass & 1], 0);
  326|       |
  327|       |    // XXX in theory this could be done locklessly, at this point they are no
  328|       |    // tasks in the frameQ, so no other runner should be using this lock, but
  329|       |    // we must add both passes at once
  330|   463k|    if (!(pass & 1)) {
  ------------------
  |  Branch (330:9): [True: 232k, False: 231k]
  ------------------
  331|   232k|        pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
  332|   232k|        assert(f->task_thread.pending_tasks.head == NULL);
  ------------------
  |  Branch (332:9): [True: 232k, False: 7]
  ------------------
  333|   232k|        f->task_thread.pending_tasks.head = f->task_thread.tile_tasks[pass == 2];
  334|   232k|        f->task_thread.pending_tasks.tail = prev_t;
  335|   232k|        atomic_store(&f->task_thread.pending_tasks.merge, 1);
  336|   232k|        atomic_store(&f->task_thread.init_done, 1);
  337|   232k|        pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
  338|   232k|    }
  339|   463k|    return 0;
  340|   463k|}
dav1d_task_frame_init:
  342|   282k|void dav1d_task_frame_init(Dav1dFrameContext *const f) {
  343|   282k|    const Dav1dContext *const c = f->c;
  344|       |
  345|   282k|    atomic_store(&f->task_thread.init_done, 0);
  346|       |    // schedule init task, which will schedule the remaining tasks
  347|   282k|    Dav1dTask *const t = &f->task_thread.init_task;
  348|   282k|    t->type = DAV1D_TASK_TYPE_INIT;
  349|   282k|    t->frame_idx = (int)(f - c->fc);
  350|   282k|    t->sby = 0;
  351|   282k|    t->recon_progress = t->deblock_progress = 0;
  352|   282k|    insert_task(f, t, 1);
  353|   282k|}
dav1d_task_delayed_fg:
  357|  6.80k|{
  358|  6.80k|    struct TaskThreadData *const ttd = &c->task_thread;
  359|  6.80k|    ttd->delayed_fg.in = in;
  360|  6.80k|    ttd->delayed_fg.out = out;
  361|  6.80k|    ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_PREP;
  362|  6.80k|    atomic_init(&ttd->delayed_fg.progress[0], 0);
  363|  6.80k|    atomic_init(&ttd->delayed_fg.progress[1], 0);
  364|  6.80k|    pthread_mutex_lock(&ttd->lock);
  365|  6.80k|    ttd->delayed_fg.exec = 1;
  366|  6.80k|    ttd->delayed_fg.finished = 0;
  367|  6.80k|    pthread_cond_signal(&ttd->cond);
  368|  6.80k|    do {
  369|  6.80k|        pthread_cond_wait(&ttd->delayed_fg.cond, &ttd->lock);
  370|  6.80k|    } while (!ttd->delayed_fg.finished);
  ------------------
  |  Branch (370:14): [True: 0, False: 6.80k]
  ------------------
  371|  6.80k|    pthread_mutex_unlock(&ttd->lock);
  372|  6.80k|}
dav1d_worker_task:
  556|  40.8k|void *dav1d_worker_task(void *data) {
  557|  40.8k|    Dav1dTaskContext *const tc = data;
  558|  40.8k|    const Dav1dContext *const c = tc->c;
  559|  40.8k|    struct TaskThreadData *const ttd = tc->task_thread.ttd;
  560|       |
  561|  40.8k|    dav1d_set_thread_name("dav1d-worker");
  562|       |
  563|  40.8k|    pthread_mutex_lock(&ttd->lock);
  564|  16.4M|    for (;;) {
  565|  16.4M|        if (tc->task_thread.die) break;
  ------------------
  |  Branch (565:13): [True: 40.8k, False: 16.4M]
  ------------------
  566|  16.4M|        if (atomic_load(c->flush)) goto park;
  ------------------
  |  Branch (566:13): [True: 9.48k, False: 16.4M]
  ------------------
  567|       |
  568|  16.4M|        merge_pending(c);
  569|  16.4M|        if (ttd->delayed_fg.exec) { // run delayed film grain first
  ------------------
  |  Branch (569:13): [True: 13.1k, False: 16.4M]
  ------------------
  570|  13.1k|            delayed_fg_task(c, ttd);
  571|  13.1k|            continue;
  572|  13.1k|        }
  573|  16.4M|        Dav1dFrameContext *f;
  574|  16.4M|        Dav1dTask *t, *prev_t = NULL;
  575|  16.4M|        if (c->n_fc > 1) { // run init tasks second
  ------------------
  |  Branch (575:13): [True: 16.4M, False: 0]
  ------------------
  576|  81.6M|            for (unsigned i = 0; i < c->n_fc; i++) {
  ------------------
  |  Branch (576:34): [True: 65.4M, False: 16.1M]
  ------------------
  577|  65.4M|                const unsigned first = atomic_load(&ttd->first);
  578|  65.4M|                f = &c->fc[(first + i) % c->n_fc];
  579|  65.4M|                if (atomic_load(&f->task_thread.init_done)) continue;
  ------------------
  |  Branch (579:21): [True: 57.3M, False: 8.16M]
  ------------------
  580|  8.16M|                t = f->task_thread.task_head;
  581|  8.16M|                if (!t) continue;
  ------------------
  |  Branch (581:21): [True: 7.13M, False: 1.02M]
  ------------------
  582|  1.02M|                if (t->type == DAV1D_TASK_TYPE_INIT) goto found;
  ------------------
  |  Branch (582:21): [True: 281k, False: 739k]
  ------------------
  583|   739k|                if (t->type == DAV1D_TASK_TYPE_INIT_CDF) {
  ------------------
  |  Branch (583:21): [True: 739k, False: 0]
  ------------------
  584|       |                    // XXX This can be a simple else, if adding tasks of both
  585|       |                    // passes at once (in dav1d_task_create_tile_sbrow).
  586|       |                    // Adding the tasks to the pending Q can result in a
  587|       |                    // thread merging them before setting init_done.
  588|       |                    // We will need to set init_done before adding to the
  589|       |                    // pending Q, so maybe return the tasks, set init_done,
  590|       |                    // and add to pending Q only then.
  591|   739k|                    const int p1 = f->in_cdf.progress ?
  ------------------
  |  Branch (591:36): [True: 739k, False: 0]
  ------------------
  592|   739k|                        atomic_load(f->in_cdf.progress) : 1;
  593|   739k|                    if (p1) {
  ------------------
  |  Branch (593:25): [True: 8.43k, False: 731k]
  ------------------
  594|  8.43k|                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
  595|  8.43k|                        goto found;
  596|  8.43k|                    }
  597|   739k|                }
  598|   739k|            }
  599|  16.4M|        }
  600|  18.1M|        while (ttd->cur < c->n_fc) { // run decoding tasks last
  ------------------
  |  Branch (600:16): [True: 17.3M, False: 776k]
  ------------------
  601|  17.3M|            const unsigned first = atomic_load(&ttd->first);
  602|  17.3M|            f = &c->fc[(first + ttd->cur) % c->n_fc];
  603|  17.3M|            merge_pending_frame(f);
  604|  17.3M|            prev_t = f->task_thread.task_cur_prev;
  605|  17.3M|            t = prev_t ? prev_t->next : f->task_thread.task_head;
  ------------------
  |  Branch (605:17): [True: 93.1k, False: 17.2M]
  ------------------
  606|  21.6M|            while (t) {
  ------------------
  |  Branch (606:20): [True: 19.6M, False: 2.01M]
  ------------------
  607|  19.6M|                if (t->type == DAV1D_TASK_TYPE_INIT_CDF) goto next;
  ------------------
  |  Branch (607:21): [True: 26.4k, False: 19.6M]
  ------------------
  608|  19.6M|                else if (t->type == DAV1D_TASK_TYPE_TILE_ENTROPY ||
  ------------------
  |  Branch (608:26): [True: 1.47M, False: 18.1M]
  ------------------
  609|  18.1M|                         t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION)
  ------------------
  |  Branch (609:26): [True: 1.71M, False: 16.4M]
  ------------------
  610|  3.19M|                {
  611|       |                    // if not bottom sbrow of tile, this task will be re-added
  612|       |                    // after it's finished
  613|  3.19M|                    if (!check_tile(t, f, c->n_fc > 1))
  ------------------
  |  Branch (613:25): [True: 2.86M, False: 328k]
  ------------------
  614|  2.86M|                        goto found;
  615|  16.4M|                } else if (t->recon_progress) {
  ------------------
  |  Branch (615:28): [True: 14.8M, False: 1.61M]
  ------------------
  616|  14.8M|                    const int p = t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS;
  617|  14.8M|                    int error = atomic_load(&f->task_thread.error);
  618|  14.8M|                    assert(!atomic_load(&f->task_thread.done[p]) || error);
  ------------------
  |  Branch (618:21): [True: 9.86M, False: 4.96M]
  |  Branch (618:21): [True: 4.96M, False: 0]
  ------------------
  619|  14.8M|                    const int tile_row_base = f->frame_hdr->tiling.cols *
  620|  14.8M|                                              f->frame_thread.next_tile_row[p];
  621|  14.8M|                    if (p) {
  ------------------
  |  Branch (621:25): [True: 6.51M, False: 8.30M]
  ------------------
  622|  6.51M|                        atomic_int *const prog = &f->frame_thread.entropy_progress;
  623|  6.51M|                        const int p1 = atomic_load(prog);
  624|  6.51M|                        if (p1 < t->sby) goto next;
  ------------------
  |  Branch (624:29): [True: 10.1k, False: 6.50M]
  ------------------
  625|  6.51M|                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
  626|  6.50M|                    }
  627|  28.1M|                    for (int tc = 0; tc < f->frame_hdr->tiling.cols; tc++) {
  ------------------
  |  Branch (627:38): [True: 15.7M, False: 12.3M]
  ------------------
  628|  15.7M|                        Dav1dTileState *const ts = &f->ts[tile_row_base + tc];
  629|  15.7M|                        const int p2 = atomic_load(&ts->progress[p]);
  630|  15.7M|                        if (p2 < t->recon_progress) goto next;
  ------------------
  |  Branch (630:29): [True: 2.48M, False: 13.2M]
  ------------------
  631|  15.7M|                        atomic_fetch_or(&f->task_thread.error, p2 == TILE_ERROR);
  632|  13.2M|                    }
  633|  12.3M|                    if (t->sby + 1 < f->sbh) {
  ------------------
  |  Branch (633:25): [True: 11.8M, False: 463k]
  ------------------
  634|       |                        // add sby+1 to list to replace this one
  635|  11.8M|                        Dav1dTask *next_t = &t[1];
  636|  11.8M|                        *next_t = *t;
  637|  11.8M|                        next_t->sby++;
  638|  11.8M|                        const int ntr = f->frame_thread.next_tile_row[p] + 1;
  639|  11.8M|                        const int start = f->frame_hdr->tiling.row_start_sb[ntr];
  640|  11.8M|                        if (next_t->sby == start)
  ------------------
  |  Branch (640:29): [True: 11.1k, False: 11.8M]
  ------------------
  641|  11.1k|                            f->frame_thread.next_tile_row[p] = ntr;
  642|  11.8M|                        next_t->recon_progress = next_t->sby + 1;
  643|  11.8M|                        insert_task(f, next_t, 0);
  644|  11.8M|                    }
  645|  12.3M|                    goto found;
  646|  14.8M|                } else if (t->type == DAV1D_TASK_TYPE_CDEF) {
  ------------------
  |  Branch (646:28): [True: 1.46M, False: 141k]
  ------------------
  647|  1.46M|                    atomic_uint *prog = f->frame_thread.copy_lpf_progress;
  648|  1.46M|                    const int p1 = atomic_load(&prog[(t->sby - 1) >> 5]);
  649|  1.46M|                    if (p1 & (1U << ((t->sby - 1) & 31)))
  ------------------
  |  Branch (649:25): [True: 152k, False: 1.31M]
  ------------------
  650|   152k|                        goto found;
  651|  1.46M|                } else {
  652|   141k|                    assert(t->deblock_progress);
  ------------------
  |  Branch (652:21): [True: 141k, False: 0]
  ------------------
  653|   141k|                    const int p1 = atomic_load(&f->frame_thread.deblock_progress);
  654|   141k|                    if (p1 >= t->deblock_progress) {
  ------------------
  |  Branch (654:25): [True: 10.1k, False: 131k]
  ------------------
  655|  10.1k|                        atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
  656|  10.1k|                        goto found;
  657|  10.1k|                    }
  658|   141k|                }
  659|  4.30M|            next:
  660|  4.30M|                prev_t = t;
  661|  4.30M|                t = t->next;
  662|  4.30M|                f->task_thread.task_cur_prev = prev_t;
  663|  4.30M|            }
  664|  2.01M|            ttd->cur++;
  665|  2.01M|        }
  666|   776k|        if (reset_task_cur(c, ttd, UINT_MAX)) continue;
  ------------------
  |  Branch (666:13): [True: 9.91k, False: 766k]
  ------------------
  667|   766k|        if (merge_pending(c)) continue;
  ------------------
  |  Branch (667:13): [True: 4.61k, False: 761k]
  ------------------
  668|   771k|    park:
  669|   771k|        tc->task_thread.flushed = 1;
  670|   771k|        pthread_cond_signal(&tc->task_thread.td.cond);
  671|       |        // we want to be woken up next time progress is signaled
  672|   771k|        atomic_store(&ttd->cond_signaled, 0);
  673|   771k|        pthread_cond_wait(&ttd->cond, &ttd->lock);
  674|   771k|        tc->task_thread.flushed = 0;
  675|   771k|        reset_task_cur(c, ttd, UINT_MAX);
  676|   771k|        continue;
  677|       |
  678|  15.6M|    found:
  679|       |        // remove t from list
  680|  15.6M|        if (prev_t) prev_t->next = t->next;
  ------------------
  |  Branch (680:13): [True: 2.42M, False: 13.2M]
  ------------------
  681|  13.2M|        else f->task_thread.task_head = t->next;
  682|  15.6M|        if (!t->next) f->task_thread.task_tail = prev_t;
  ------------------
  |  Branch (682:13): [True: 707k, False: 14.9M]
  ------------------
  683|  15.6M|        if (t->type > DAV1D_TASK_TYPE_INIT_CDF && !f->task_thread.task_head)
  ------------------
  |  Branch (683:13): [True: 15.3M, False: 289k]
  |  Branch (683:51): [True: 232k, False: 15.1M]
  ------------------
  684|   232k|            ttd->cur++;
  685|  15.6M|        t->next = NULL;
  686|       |        // we don't need to check cond_signaled here, since we found a task
  687|       |        // after the last signal so we want to re-signal the next waiting thread
  688|       |        // and again won't need to signal after that
  689|  15.6M|        atomic_store(&ttd->cond_signaled, 1);
  690|  15.6M|        pthread_cond_signal(&ttd->cond);
  691|  15.6M|        pthread_mutex_unlock(&ttd->lock);
  692|  25.9M|    found_unlocked:;
  693|  25.9M|        const int flush = atomic_load(c->flush);
  694|  25.9M|        int error = atomic_fetch_or(&f->task_thread.error, flush) | flush;
  695|       |
  696|       |        // run it
  697|  25.9M|        tc->f = f;
  698|  25.9M|        int sby = t->sby;
  699|  25.9M|        switch (t->type) {
  700|   281k|        case DAV1D_TASK_TYPE_INIT: {
  ------------------
  |  Branch (700:9): [True: 281k, False: 25.6M]
  ------------------
  701|   281k|            assert(c->n_fc > 1);
  ------------------
  |  Branch (701:13): [True: 281k, False: 5]
  ------------------
  702|   281k|            int res = dav1d_decode_frame_init(f);
  703|   281k|            int p1 = f->in_cdf.progress ? atomic_load(f->in_cdf.progress) : 1;
  ------------------
  |  Branch (703:22): [True: 54.2k, False: 227k]
  ------------------
  704|   281k|            if (res || p1 == TILE_ERROR) {
  ------------------
  |  |   36|   280k|#define TILE_ERROR (INT_MAX - 1)
  ------------------
  |  Branch (704:17): [True: 386, False: 280k]
  |  Branch (704:24): [True: 36.9k, False: 244k]
  ------------------
  705|  36.9k|                pthread_mutex_lock(&ttd->lock);
  706|  36.9k|                abort_frame(f, res ? res : DAV1D_ERR(EINVAL));
  ------------------
  |  |   58|  36.9k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (706:32): [True: 0, False: 36.9k]
  ------------------
  707|  36.9k|                reset_task_cur(c, ttd, t->frame_idx);
  708|   244k|            } else {
  709|   244k|                t->type = DAV1D_TASK_TYPE_INIT_CDF;
  710|   244k|                if (p1) goto found_unlocked;
  ------------------
  |  Branch (710:21): [True: 235k, False: 8.80k]
  ------------------
  711|  8.80k|                add_pending(f, t);
  712|  8.80k|                pthread_mutex_lock(&ttd->lock);
  713|  8.80k|            }
  714|  45.7k|            continue;
  715|   281k|        }
  716|   244k|        case DAV1D_TASK_TYPE_INIT_CDF: {
  ------------------
  |  Branch (716:9): [True: 244k, False: 25.7M]
  ------------------
  717|   244k|            assert(c->n_fc > 1);
  ------------------
  |  Branch (717:13): [True: 244k, False: 1]
  ------------------
  718|   244k|            int res = DAV1D_ERR(EINVAL);
  ------------------
  |  |   58|   244k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  719|   244k|            if (!atomic_load(&f->task_thread.error))
  ------------------
  |  Branch (719:17): [True: 239k, False: 4.43k]
  ------------------
  720|   239k|                res = dav1d_decode_frame_init_cdf(f);
  721|   244k|            if (f->frame_hdr->refresh_context && !f->task_thread.update_set)
  ------------------
  |  Branch (721:17): [True: 20.8k, False: 223k]
  |  Branch (721:50): [True: 1.57k, False: 19.2k]
  ------------------
  722|   244k|                atomic_store(f->out_cdf.progress, res < 0 ? TILE_ERROR : 1);
  ------------------
  |  Branch (722:17): [True: 1.57k, False: 0]
  ------------------
  723|   708k|            for (int p = 1; p <= 2 && !res; p++)
  ------------------
  |  Branch (723:29): [True: 475k, False: 232k]
  |  Branch (723:39): [True: 463k, False: 12.1k]
  ------------------
  724|   463k|                res = dav1d_task_create_tile_sbrow(f, p, 0);
  725|   244k|            pthread_mutex_lock(&ttd->lock);
  726|   244k|            if (res) {
  ------------------
  |  Branch (726:17): [True: 12.1k, False: 232k]
  ------------------
  727|  12.1k|                abort_frame(f, DAV1D_ERR(ENOMEM));
  ------------------
  |  |   58|  12.1k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  728|  12.1k|                reset_task_cur(c, ttd, t->frame_idx);
  729|  12.1k|                atomic_store(&f->task_thread.init_done, 1);
  730|  12.1k|            }
  731|   244k|            continue;
  732|   244k|        }
  733|  6.48M|        case DAV1D_TASK_TYPE_TILE_ENTROPY:
  ------------------
  |  Branch (733:9): [True: 6.48M, False: 19.4M]
  ------------------
  734|  12.9M|        case DAV1D_TASK_TYPE_TILE_RECONSTRUCTION: {
  ------------------
  |  Branch (734:9): [True: 6.48M, False: 19.4M]
  ------------------
  735|  12.9M|            const int p = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
  736|  12.9M|            const int tile_idx = (int)(t - f->task_thread.tile_tasks[p]);
  737|  12.9M|            Dav1dTileState *const ts = &f->ts[tile_idx];
  738|       |
  739|  12.9M|            tc->ts = ts;
  740|  12.9M|            tc->by = sby << f->sb_shift;
  741|  12.9M|            const int uses_2pass = c->n_fc > 1;
  742|  12.9M|            tc->frame_thread.pass = !uses_2pass ? 0 :
  ------------------
  |  Branch (742:37): [True: 0, False: 12.9M]
  ------------------
  743|  12.9M|                1 + (t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION);
  744|  12.9M|            if (!error) error = dav1d_decode_tile_sbrow(tc);
  ------------------
  |  Branch (744:17): [True: 2.61M, False: 10.3M]
  ------------------
  745|  12.9M|            const int progress = error ? TILE_ERROR : 1 + sby;
  ------------------
  |  |   36|  10.4M|#define TILE_ERROR (INT_MAX - 1)
  ------------------
  |  Branch (745:34): [True: 10.4M, False: 2.54M]
  ------------------
  746|       |
  747|       |            // signal progress
  748|  12.9M|            atomic_fetch_or(&f->task_thread.error, error);
  749|  12.9M|            if (((sby + 1) << f->sb_shift) < ts->tiling.row_end) {
  ------------------
  |  Branch (749:17): [True: 12.4M, False: 496k]
  ------------------
  750|  12.4M|                t->sby++;
  751|  12.4M|                t->deps_skip = 0;
  752|  12.4M|                if (!check_tile(t, f, uses_2pass)) {
  ------------------
  |  Branch (752:21): [True: 10.0M, False: 2.38M]
  ------------------
  753|  10.0M|                    atomic_store(&ts->progress[p], progress);
  754|  10.0M|                    reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
  755|  10.0M|                    if (!atomic_fetch_or(&ttd->cond_signaled, 1))
  ------------------
  |  Branch (755:25): [True: 73, False: 10.0M]
  ------------------
  756|     73|                        pthread_cond_signal(&ttd->cond);
  757|  10.0M|                    goto found_unlocked;
  758|  10.0M|                }
  759|  12.4M|                atomic_store(&ts->progress[p], progress);
  760|  2.38M|                add_pending(f, t);
  761|  2.38M|                pthread_mutex_lock(&ttd->lock);
  762|  2.38M|            } else {
  763|   496k|                pthread_mutex_lock(&ttd->lock);
  764|   496k|                atomic_store(&ts->progress[p], progress);
  765|   496k|                reset_task_cur(c, ttd, t->frame_idx);
  766|   496k|                error = atomic_load(&f->task_thread.error);
  767|   496k|                if (f->frame_hdr->refresh_context &&
  ------------------
  |  Branch (767:21): [True: 40.8k, False: 456k]
  ------------------
  768|  40.8k|                    tc->frame_thread.pass <= 1 && f->task_thread.update_set &&
  ------------------
  |  Branch (768:21): [True: 20.4k, False: 20.4k]
  |  Branch (768:51): [True: 20.4k, False: 0]
  ------------------
  769|  20.4k|                    f->frame_hdr->tiling.update == tile_idx)
  ------------------
  |  Branch (769:21): [True: 16.4k, False: 4.03k]
  ------------------
  770|  16.4k|                {
  771|  16.4k|                    if (!error)
  ------------------
  |  Branch (771:25): [True: 8.53k, False: 7.89k]
  ------------------
  772|  8.53k|                        dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
  773|  8.53k|                                                &f->ts[f->frame_hdr->tiling.update].cdf);
  774|  16.4k|                    if (c->n_fc > 1)
  ------------------
  |  Branch (774:25): [True: 16.4k, False: 0]
  ------------------
  775|  16.4k|                        atomic_store(f->out_cdf.progress, error ? TILE_ERROR : 1);
  ------------------
  |  Branch (775:25): [True: 7.89k, False: 8.53k]
  ------------------
  776|  16.4k|                }
  777|   496k|                if (atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1 == 0 &&
  ------------------
  |  Branch (777:21): [True: 6.51k, False: 490k]
  ------------------
  778|   496k|                    atomic_load(&f->task_thread.done[0]) &&
  ------------------
  |  Branch (778:21): [True: 6.51k, False: 0]
  ------------------
  779|  6.51k|                    (!uses_2pass || atomic_load(&f->task_thread.done[1])))
  ------------------
  |  Branch (779:22): [True: 0, False: 6.51k]
  |  Branch (779:37): [True: 6.51k, False: 0]
  ------------------
  780|  6.51k|                {
  781|  6.51k|                    error = atomic_load(&f->task_thread.error);
  782|  6.51k|                    dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
  ------------------
  |  |   58|  6.51k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (782:48): [True: 6.51k, False: 0]
  ------------------
  783|  6.51k|                                            error ? DAV1D_ERR(ENOMEM) : 0);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (783:45): [True: 0, False: 0]
  ------------------
  784|  6.51k|                    f->n_tile_data = 0;
  785|  6.51k|                    pthread_cond_signal(&f->task_thread.cond);
  786|  6.51k|                }
  787|   496k|                assert(atomic_load(&f->task_thread.task_counter) >= 0);
  ------------------
  |  Branch (787:17): [True: 487k, False: 9.32k]
  ------------------
  788|   487k|                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
  ------------------
  |  Branch (788:21): [True: 90.3k, False: 397k]
  ------------------
  789|  90.3k|                    pthread_cond_signal(&ttd->cond);
  790|   487k|            }
  791|  2.86M|            continue;
  792|  12.9M|        }
  793|  2.86M|        case DAV1D_TASK_TYPE_DEBLOCK_COLS:
  ------------------
  |  Branch (793:9): [True: 1.57M, False: 24.3M]
  ------------------
  794|  1.57M|            if (!atomic_load(&f->task_thread.error))
  ------------------
  |  Branch (794:17): [True: 693k, False: 886k]
  ------------------
  795|   693k|                f->bd_fn.filter_sbrow_deblock_cols(f, sby);
  796|  1.57M|            if (ensure_progress(ttd, f, t, DAV1D_TASK_TYPE_DEBLOCK_ROWS,
  ------------------
  |  Branch (796:17): [True: 10.1k, False: 1.56M]
  ------------------
  797|  1.57M|                                &f->frame_thread.deblock_progress,
  798|  1.57M|                                &t->deblock_progress)) continue;
  799|       |            // fall-through
  800|  5.81M|        case DAV1D_TASK_TYPE_DEBLOCK_ROWS:
  ------------------
  |  Branch (800:9): [True: 4.24M, False: 21.7M]
  ------------------
  801|  5.81M|            if (!atomic_load(&f->task_thread.error))
  ------------------
  |  Branch (801:17): [True: 907k, False: 4.90M]
  ------------------
  802|   907k|                f->bd_fn.filter_sbrow_deblock_rows(f, sby);
  803|       |            // signal deblock progress
  804|  5.81M|            if (f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (804:17): [True: 1.20M, False: 4.60M]
  ------------------
  805|  4.60M|                f->frame_hdr->loopfilter.level_y[1])
  ------------------
  |  Branch (805:17): [True: 373k, False: 4.23M]
  ------------------
  806|  1.57M|            {
  807|  1.57M|                error = atomic_load(&f->task_thread.error);
  808|  1.57M|                atomic_store(&f->frame_thread.deblock_progress,
  ------------------
  |  Branch (808:17): [True: 886k, False: 692k]
  ------------------
  809|  1.57M|                             error ? TILE_ERROR : sby + 1);
  810|  1.57M|                reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
  811|  1.57M|                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
  ------------------
  |  Branch (811:21): [True: 11.3k, False: 1.56M]
  ------------------
  812|  11.3k|                    pthread_cond_signal(&ttd->cond);
  813|  4.23M|            } else if (f->seq_hdr->cdef || f->lf.restore_planes) {
  ------------------
  |  Branch (813:24): [True: 4.16M, False: 63.2k]
  |  Branch (813:44): [True: 63.2k, False: 0]
  ------------------
  814|  4.23M|                atomic_fetch_or(&f->frame_thread.copy_lpf_progress[sby >> 5],
  815|  4.23M|                                1U << (sby & 31));
  816|       |                // CDEF needs the top buffer to be saved by lr_copy_lpf of the
  817|       |                // previous sbrow
  818|  4.23M|                if (sby) {
  ------------------
  |  Branch (818:21): [True: 4.08M, False: 142k]
  ------------------
  819|  4.08M|                    int prog = atomic_load(&f->frame_thread.copy_lpf_progress[(sby - 1) >> 5]);
  820|  4.08M|                    if (~prog & (1U << ((sby - 1) & 31))) {
  ------------------
  |  Branch (820:25): [True: 152k, False: 3.93M]
  ------------------
  821|   152k|                        t->type = DAV1D_TASK_TYPE_CDEF;
  822|   152k|                        t->recon_progress = t->deblock_progress = 0;
  823|   152k|                        add_pending(f, t);
  824|   152k|                        pthread_mutex_lock(&ttd->lock);
  825|   152k|                        continue;
  826|   152k|                    }
  827|  4.08M|                }
  828|  4.23M|            }
  829|       |            // fall-through
  830|  5.81M|        case DAV1D_TASK_TYPE_CDEF:
  ------------------
  |  Branch (830:9): [True: 152k, False: 25.8M]
  ------------------
  831|  5.81M|            if (f->seq_hdr->cdef) {
  ------------------
  |  Branch (831:17): [True: 4.55M, False: 1.25M]
  ------------------
  832|  4.55M|                if (!atomic_load(&f->task_thread.error))
  ------------------
  |  Branch (832:21): [True: 508k, False: 4.04M]
  ------------------
  833|   508k|                    f->bd_fn.filter_sbrow_cdef(tc, sby);
  834|  4.55M|                reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
  835|  4.55M|                if (!atomic_fetch_or(&ttd->cond_signaled, 1))
  ------------------
  |  Branch (835:21): [True: 30.2k, False: 4.52M]
  ------------------
  836|  30.2k|                    pthread_cond_signal(&ttd->cond);
  837|  4.55M|            }
  838|       |            // fall-through
  839|  5.81M|        case DAV1D_TASK_TYPE_SUPER_RESOLUTION:
  ------------------
  |  Branch (839:9): [True: 2.48k, False: 25.9M]
  ------------------
  840|  5.81M|            if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
  ------------------
  |  Branch (840:17): [True: 716k, False: 5.09M]
  ------------------
  841|   716k|                if (!atomic_load(&f->task_thread.error))
  ------------------
  |  Branch (841:21): [True: 32.9k, False: 683k]
  ------------------
  842|  32.9k|                    f->bd_fn.filter_sbrow_resize(f, sby);
  843|       |            // fall-through
  844|  5.81M|        case DAV1D_TASK_TYPE_LOOP_RESTORATION:
  ------------------
  |  Branch (844:9): [True: 0, False: 25.9M]
  ------------------
  845|  5.81M|            if (!atomic_load(&f->task_thread.error) && f->lf.restore_planes)
  ------------------
  |  Branch (845:17): [True: 908k, False: 4.90M]
  |  Branch (845:56): [True: 184k, False: 723k]
  ------------------
  846|   184k|                f->bd_fn.filter_sbrow_lr(f, sby);
  847|       |            // fall-through
  848|  6.16M|        case DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS:
  ------------------
  |  Branch (848:9): [True: 350k, False: 25.6M]
  ------------------
  849|       |            // dummy to cover for no post-filters
  850|  12.3M|        case DAV1D_TASK_TYPE_ENTROPY_PROGRESS:
  ------------------
  |  Branch (850:9): [True: 6.16M, False: 19.8M]
  ------------------
  851|       |            // dummy to convert tile progress to frame
  852|  12.3M|            break;
  853|      0|        default: abort();
  ------------------
  |  Branch (853:9): [True: 0, False: 25.9M]
  ------------------
  854|  25.9M|        }
  855|       |        // if task completed [typically LR], signal picture progress as per below
  856|  12.3M|        const int uses_2pass = c->n_fc > 1;
  857|  12.3M|        const int sbh = f->sbh;
  858|  12.3M|        const int sbsz = f->sb_step * 4;
  859|  12.3M|        if (t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS) {
  ------------------
  |  Branch (859:13): [True: 6.16M, False: 6.16M]
  ------------------
  860|  6.16M|            error = atomic_load(&f->task_thread.error);
  861|  6.16M|            const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
  ------------------
  |  Branch (861:32): [True: 231k, False: 5.93M]
  ------------------
  862|  6.16M|            assert(c->n_fc > 1);
  ------------------
  |  Branch (862:13): [True: 6.16M, False: 18.4E]
  ------------------
  863|  6.16M|            if (f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
  ------------------
  |  Branch (863:17): [True: 6.16M, False: 57]
  ------------------
  864|  6.16M|                atomic_store(&f->sr_cur.progress[0], error ? FRAME_ERROR : y);
  ------------------
  |  Branch (864:17): [True: 5.09M, False: 1.06M]
  ------------------
  865|  6.16M|            atomic_store(&f->frame_thread.entropy_progress,
  ------------------
  |  Branch (865:13): [True: 5.10M, False: 1.06M]
  ------------------
  866|  6.16M|                         error ? TILE_ERROR : sby + 1);
  867|  6.16M|            if (sby + 1 == sbh)
  ------------------
  |  Branch (867:17): [True: 231k, False: 5.93M]
  ------------------
  868|  6.16M|                atomic_store(&f->task_thread.done[1], 1);
  869|  6.16M|            pthread_mutex_lock(&ttd->lock);
  870|  6.16M|            const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
  871|  6.16M|            if (sby + 1 < sbh && num_tasks) {
  ------------------
  |  Branch (871:17): [True: 5.93M, False: 231k]
  |  Branch (871:34): [True: 5.92M, False: 5.96k]
  ------------------
  872|  5.92M|                reset_task_cur(c, ttd, t->frame_idx);
  873|  5.92M|                continue;
  874|  5.92M|            }
  875|   237k|            if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
  ------------------
  |  Branch (875:17): [True: 29.5k, False: 207k]
  |  Branch (875:31): [True: 29.5k, False: 0]
  ------------------
  876|   237k|                atomic_load(&f->task_thread.done[1]))
  ------------------
  |  Branch (876:17): [True: 29.5k, False: 0]
  ------------------
  877|  29.5k|            {
  878|  29.5k|                error = atomic_load(&f->task_thread.error);
  879|  29.5k|                dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
  ------------------
  |  |   58|  21.4k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (879:44): [True: 21.4k, False: 8.10k]
  ------------------
  880|  29.5k|                                        error ? DAV1D_ERR(ENOMEM) : 0);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (880:41): [True: 0, False: 8.10k]
  ------------------
  881|  29.5k|                f->n_tile_data = 0;
  882|  29.5k|                pthread_cond_signal(&f->task_thread.cond);
  883|  29.5k|            }
  884|   237k|            reset_task_cur(c, ttd, t->frame_idx);
  885|   237k|            continue;
  886|  6.16M|        }
  887|       |    // t->type != DAV1D_TASK_TYPE_ENTROPY_PROGRESS
  888|  12.3M|        atomic_fetch_or(&f->frame_thread.frame_progress[sby >> 5],
  889|  6.16M|                        1U << (sby & 31));
  890|  6.16M|        pthread_mutex_lock(&f->task_thread.lock);
  891|  6.16M|        sby = get_frame_progress(c, f);
  892|  6.16M|        error = atomic_load(&f->task_thread.error);
  893|  6.16M|        const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
  ------------------
  |  Branch (893:28): [True: 5.19M, False: 967k]
  ------------------
  894|  6.16M|        if (c->n_fc > 1 && f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
  ------------------
  |  Branch (894:13): [True: 6.16M, False: 18.4E]
  |  Branch (894:28): [True: 6.16M, False: 18.4E]
  ------------------
  895|  6.16M|            atomic_store(&f->sr_cur.progress[1], error ? FRAME_ERROR : y);
  ------------------
  |  Branch (895:13): [True: 5.11M, False: 1.04M]
  ------------------
  896|  6.16M|        pthread_mutex_unlock(&f->task_thread.lock);
  897|  6.16M|        if (sby + 1 == sbh)
  ------------------
  |  Branch (897:13): [True: 5.19M, False: 967k]
  ------------------
  898|  6.16M|            atomic_store(&f->task_thread.done[0], 1);
  899|  6.16M|        pthread_mutex_lock(&ttd->lock);
  900|  6.16M|        const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
  901|  6.16M|        if (sby + 1 < sbh && num_tasks) {
  ------------------
  |  Branch (901:13): [True: 969k, False: 5.19M]
  |  Branch (901:30): [True: 966k, False: 3.11k]
  ------------------
  902|   966k|            reset_task_cur(c, ttd, t->frame_idx);
  903|   966k|            continue;
  904|   966k|        }
  905|  5.19M|        if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
  ------------------
  |  Branch (905:13): [True: 195k, False: 5.00M]
  |  Branch (905:27): [True: 195k, False: 0]
  ------------------
  906|   195k|            (!uses_2pass || atomic_load(&f->task_thread.done[1])))
  ------------------
  |  Branch (906:14): [True: 0, False: 195k]
  |  Branch (906:29): [True: 195k, False: 0]
  ------------------
  907|   195k|        {
  908|   195k|            error = atomic_load(&f->task_thread.error);
  909|   195k|            dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
  ------------------
  |  |   58|  69.1k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (909:40): [True: 69.1k, False: 126k]
  ------------------
  910|   195k|                                    error ? DAV1D_ERR(ENOMEM) : 0);
  ------------------
  |  |   58|      0|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (910:37): [True: 0, False: 126k]
  ------------------
  911|   195k|            f->n_tile_data = 0;
  912|   195k|            pthread_cond_signal(&f->task_thread.cond);
  913|   195k|        }
  914|  5.19M|        reset_task_cur(c, ttd, t->frame_idx);
  915|  5.19M|    }
  916|  42.0k|    pthread_mutex_unlock(&ttd->lock);
  917|       |
  918|       |    return NULL;
  919|  40.8k|}
thread_task.c:create_filter_sbrow:
  215|   463k|{
  216|   463k|    const int has_deblock = f->frame_hdr->loopfilter.level_y[0] ||
  ------------------
  |  Branch (216:29): [True: 86.5k, False: 377k]
  ------------------
  217|   377k|                            f->frame_hdr->loopfilter.level_y[1];
  ------------------
  |  Branch (217:29): [True: 3.93k, False: 373k]
  ------------------
  218|   463k|    const int has_cdef = f->seq_hdr->cdef;
  219|   463k|    const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
  220|   463k|    const int has_lr = f->lf.restore_planes;
  221|       |
  222|   463k|    Dav1dTask *tasks = f->task_thread.tasks;
  223|   463k|    const int uses_2pass = f->c->n_fc > 1;
  224|   463k|    int num_tasks = f->sbh * (1 + uses_2pass);
  225|   463k|    if (num_tasks > f->task_thread.num_tasks) {
  ------------------
  |  Branch (225:9): [True: 15.8k, False: 447k]
  ------------------
  226|  15.8k|        const size_t size = sizeof(Dav1dTask) * num_tasks;
  227|  15.8k|        tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tasks, size);
  ------------------
  |  |  133|  15.8k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
  228|  15.8k|        if (!tasks) return -1;
  ------------------
  |  Branch (228:13): [True: 0, False: 15.8k]
  ------------------
  229|  15.8k|        memset(tasks, 0, size);
  230|  15.8k|        f->task_thread.tasks = tasks;
  231|  15.8k|        f->task_thread.num_tasks = num_tasks;
  232|  15.8k|    }
  233|   463k|    tasks += f->sbh * (pass & 1);
  234|       |
  235|   463k|    if (pass & 1) {
  ------------------
  |  Branch (235:9): [True: 231k, False: 231k]
  ------------------
  236|   231k|        f->frame_thread.entropy_progress = 0;
  237|   231k|    } else {
  238|   231k|        const int prog_sz = ((f->sbh + 31) & ~31) >> 5;
  239|   231k|        if (prog_sz > f->frame_thread.prog_sz) {
  ------------------
  |  Branch (239:13): [True: 16.1k, False: 215k]
  ------------------
  240|  16.1k|            atomic_uint *const prog = dav1d_realloc(ALLOC_COMMON_CTX, f->frame_thread.frame_progress,
  ------------------
  |  |  133|  16.1k|#define dav1d_realloc(type, ptr, sz) realloc(ptr, sz)
  ------------------
  241|  16.1k|                                                    2 * prog_sz * sizeof(*prog));
  242|  16.1k|            if (!prog) return -1;
  ------------------
  |  Branch (242:17): [True: 0, False: 16.1k]
  ------------------
  243|  16.1k|            f->frame_thread.frame_progress = prog;
  244|  16.1k|            f->frame_thread.copy_lpf_progress = prog + prog_sz;
  245|  16.1k|        }
  246|   231k|        f->frame_thread.prog_sz = prog_sz;
  247|   231k|        memset(f->frame_thread.frame_progress, 0, prog_sz * sizeof(atomic_uint));
  248|   231k|        memset(f->frame_thread.copy_lpf_progress, 0, prog_sz * sizeof(atomic_uint));
  249|   231k|        atomic_store(&f->frame_thread.deblock_progress, 0);
  250|   231k|    }
  251|   463k|    f->frame_thread.next_tile_row[pass & 1] = 0;
  252|       |
  253|   463k|    Dav1dTask *t = &tasks[0];
  254|   463k|    t->sby = 0;
  255|   463k|    t->recon_progress = 1;
  256|   463k|    t->deblock_progress = 0;
  257|   463k|    t->type = pass == 1 ? DAV1D_TASK_TYPE_ENTROPY_PROGRESS :
  ------------------
  |  Branch (257:15): [True: 232k, False: 231k]
  ------------------
  258|   463k|              has_deblock ? DAV1D_TASK_TYPE_DEBLOCK_COLS :
  ------------------
  |  Branch (258:15): [True: 45.2k, False: 186k]
  ------------------
  259|   231k|              has_cdef || has_lr /* i.e. LR backup */ ? DAV1D_TASK_TYPE_DEBLOCK_ROWS :
  ------------------
  |  Branch (259:15): [True: 141k, False: 44.7k]
  |  Branch (259:27): [True: 715, False: 44.0k]
  ------------------
  260|   186k|              has_resize ? DAV1D_TASK_TYPE_SUPER_RESOLUTION :
  ------------------
  |  Branch (260:15): [True: 949, False: 42.5k]
  ------------------
  261|  43.4k|              DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS;
  262|   463k|    t->frame_idx = (int)(f - f->c->fc);
  263|       |
  264|   463k|    *res_t = t;
  265|   463k|    return 0;
  266|   463k|}
thread_task.c:insert_task:
  172|  15.6M|{
  173|  15.6M|    insert_tasks(f, t, t, cond_signal);
  174|  15.6M|}
thread_task.c:insert_tasks:
  118|  15.6M|{
  119|       |    // insert task back into task queue
  120|  15.6M|    Dav1dTask *t_ptr, *prev_t = NULL;
  121|  15.6M|    for (t_ptr = f->task_thread.task_head;
  122|  44.7M|         t_ptr; prev_t = t_ptr, t_ptr = t_ptr->next)
  ------------------
  |  Branch (122:10): [True: 31.1M, False: 13.6M]
  ------------------
  123|  31.1M|    {
  124|       |        // entropy coding precedes other steps
  125|  31.1M|        if (t_ptr->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
  ------------------
  |  Branch (125:13): [True: 1.33M, False: 29.7M]
  ------------------
  126|  1.33M|            if (first->type > DAV1D_TASK_TYPE_TILE_ENTROPY) continue;
  ------------------
  |  Branch (126:17): [True: 1.05M, False: 278k]
  ------------------
  127|       |            // both are entropy
  128|   278k|            if (first->sby > t_ptr->sby) continue;
  ------------------
  |  Branch (128:17): [True: 172k, False: 106k]
  ------------------
  129|   106k|            if (first->sby < t_ptr->sby) {
  ------------------
  |  Branch (129:17): [True: 22.0k, False: 84.5k]
  ------------------
  130|  22.0k|                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
  131|  22.0k|                return;
  132|  22.0k|            }
  133|       |            // same sby
  134|  29.7M|        } else {
  135|  29.7M|            if (first->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
  ------------------
  |  Branch (135:17): [True: 1.17M, False: 28.5M]
  ------------------
  136|  1.17M|                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
  137|  1.17M|                return;
  138|  1.17M|            }
  139|  28.5M|            if (first->sby > t_ptr->sby) continue;
  ------------------
  |  Branch (139:17): [True: 20.8M, False: 7.71M]
  ------------------
  140|  7.71M|            if (first->sby < t_ptr->sby) {
  ------------------
  |  Branch (140:17): [True: 742k, False: 6.96M]
  ------------------
  141|   742k|                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
  142|   742k|                return;
  143|   742k|            }
  144|       |            // same sby
  145|  6.96M|            if (first->type > t_ptr->type) continue;
  ------------------
  |  Branch (145:17): [True: 6.89M, False: 71.3k]
  ------------------
  146|  71.3k|            if (first->type < t_ptr->type) {
  ------------------
  |  Branch (146:17): [True: 26.0k, False: 45.2k]
  ------------------
  147|  26.0k|                insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
  148|  26.0k|                return;
  149|  26.0k|            }
  150|       |            // same task type
  151|  71.3k|        }
  152|       |
  153|       |        // sort by tile-id
  154|  31.1M|        assert(first->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION ||
  ------------------
  |  Branch (154:9): [True: 45.2k, False: 84.5k]
  |  Branch (154:9): [True: 84.5k, False: 0]
  ------------------
  155|   129k|               first->type == DAV1D_TASK_TYPE_TILE_ENTROPY);
  156|   129k|        assert(first->type == t_ptr->type);
  ------------------
  |  Branch (156:9): [True: 129k, False: 0]
  ------------------
  157|   129k|        assert(t_ptr->sby == first->sby);
  ------------------
  |  Branch (157:9): [True: 129k, False: 0]
  ------------------
  158|   129k|        const int p = first->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
  159|   129k|        const int t_tile_idx = (int) (first - f->task_thread.tile_tasks[p]);
  160|   129k|        const int p_tile_idx = (int) (t_ptr - f->task_thread.tile_tasks[p]);
  161|   129k|        assert(t_tile_idx != p_tile_idx);
  ------------------
  |  Branch (161:9): [True: 129k, False: 0]
  ------------------
  162|   129k|        if (t_tile_idx > p_tile_idx) continue;
  ------------------
  |  Branch (162:13): [True: 117k, False: 12.6k]
  ------------------
  163|  12.6k|        insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
  164|  12.6k|        return;
  165|   129k|    }
  166|       |    // append at the end
  167|  13.6M|    insert_tasks_between(f, first, last, prev_t, NULL, cond_signal);
  168|  13.6M|}
thread_task.c:insert_tasks_between:
  102|  15.6M|{
  103|  15.6M|    struct TaskThreadData *const ttd = f->task_thread.ttd;
  104|  15.6M|    if (atomic_load(f->c->flush)) return;
  ------------------
  |  Branch (104:9): [True: 25, False: 15.6M]
  ------------------
  105|  15.6M|    assert(!a || a->next == b);
  ------------------
  |  Branch (105:5): [True: 1.79M, False: 13.8M]
  |  Branch (105:5): [True: 13.8M, False: 0]
  ------------------
  106|  15.6M|    if (!a) f->task_thread.task_head = first;
  ------------------
  |  Branch (106:9): [True: 1.79M, False: 13.8M]
  ------------------
  107|  13.8M|    else a->next = first;
  108|  15.6M|    if (!b) f->task_thread.task_tail = last;
  ------------------
  |  Branch (108:9): [True: 13.6M, False: 1.97M]
  ------------------
  109|  15.6M|    last->next = b;
  110|  15.6M|    reset_task_cur(f->c, ttd, first->frame_idx);
  111|  15.6M|    if (cond_signal && !atomic_fetch_or(&ttd->cond_signaled, 1))
  ------------------
  |  Branch (111:9): [True: 282k, False: 15.3M]
  |  Branch (111:24): [True: 97.4k, False: 184k]
  ------------------
  112|  97.4k|        pthread_cond_signal(&ttd->cond);
  113|  15.6M|}
thread_task.c:merge_pending:
  206|  17.2M|static inline int merge_pending(const Dav1dContext *const c) {
  207|  17.2M|    int res = 0;
  208|  86.0M|    for (unsigned i = 0; i < c->n_fc; i++)
  ------------------
  |  Branch (208:26): [True: 68.8M, False: 17.2M]
  ------------------
  209|  68.8M|        res |= merge_pending_frame(&c->fc[i]);
  210|  17.2M|    return res;
  211|  17.2M|}
thread_task.c:delayed_fg_task:
  473|  13.1k|{
  474|  13.1k|    const Dav1dPicture *const in = ttd->delayed_fg.in;
  475|  13.1k|    Dav1dPicture *const out = ttd->delayed_fg.out;
  476|  13.1k|#if CONFIG_16BPC
  477|  13.1k|    int off;
  478|  13.1k|    if (out->p.bpc != 8)
  ------------------
  |  Branch (478:9): [True: 3.20k, False: 9.98k]
  ------------------
  479|  3.20k|        off = (out->p.bpc >> 1) - 4;
  480|  13.1k|#endif
  481|  13.1k|    switch (ttd->delayed_fg.type) {
  482|  6.80k|    case DAV1D_TASK_TYPE_FG_PREP:
  ------------------
  |  Branch (482:5): [True: 6.80k, False: 6.38k]
  ------------------
  483|  6.80k|        ttd->delayed_fg.exec = 0;
  484|  6.80k|        if (atomic_load(&ttd->cond_signaled))
  ------------------
  |  Branch (484:13): [True: 2.58k, False: 4.22k]
  ------------------
  485|  2.58k|            pthread_cond_signal(&ttd->cond);
  486|  6.80k|        pthread_mutex_unlock(&ttd->lock);
  487|  6.80k|        switch (out->p.bpc) {
  488|      0|#if CONFIG_8BPC
  489|  4.97k|        case 8:
  ------------------
  |  Branch (489:9): [True: 4.97k, False: 1.83k]
  ------------------
  490|  4.97k|            dav1d_prep_grain_8bpc(&c->dsp[0].fg, out, in,
  491|  4.97k|                                  ttd->delayed_fg.scaling_8bpc,
  492|  4.97k|                                  ttd->delayed_fg.grain_lut_8bpc);
  493|  4.97k|            break;
  494|      0|#endif
  495|      0|#if CONFIG_16BPC
  496|  1.54k|        case 10:
  ------------------
  |  Branch (496:9): [True: 1.54k, False: 5.26k]
  ------------------
  497|  1.83k|        case 12:
  ------------------
  |  Branch (497:9): [True: 291, False: 6.51k]
  ------------------
  498|  1.83k|            dav1d_prep_grain_16bpc(&c->dsp[off].fg, out, in,
  499|  1.83k|                                   ttd->delayed_fg.scaling_16bpc,
  500|  1.83k|                                   ttd->delayed_fg.grain_lut_16bpc);
  501|  1.83k|            break;
  502|      0|#endif
  503|      0|        default: abort();
  ------------------
  |  Branch (503:9): [True: 0, False: 6.80k]
  ------------------
  504|  6.80k|        }
  505|  6.80k|        ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_APPLY;
  506|  6.80k|        pthread_mutex_lock(&ttd->lock);
  507|  6.80k|        ttd->delayed_fg.exec = 1;
  508|       |        // fall-through
  509|  13.1k|    case DAV1D_TASK_TYPE_FG_APPLY:;
  ------------------
  |  Branch (509:5): [True: 6.38k, False: 6.80k]
  ------------------
  510|  13.1k|        int row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
  511|  13.1k|        pthread_mutex_unlock(&ttd->lock);
  512|  13.1k|        int progmax = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  13.1k|#define FG_BLOCK_SIZE 32
  ------------------
                      int progmax = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
  ------------------
  |  |   37|  13.1k|#define FG_BLOCK_SIZE 32
  ------------------
  513|  44.2k|        while (row < progmax) {
  ------------------
  |  Branch (513:16): [True: 31.1k, False: 13.1k]
  ------------------
  514|  31.1k|            if (row + 1 < progmax)
  ------------------
  |  Branch (514:17): [True: 24.3k, False: 6.79k]
  ------------------
  515|  24.3k|                pthread_cond_signal(&ttd->cond);
  516|  6.79k|            else {
  517|  6.79k|                pthread_mutex_lock(&ttd->lock);
  518|  6.79k|                ttd->delayed_fg.exec = 0;
  519|  6.79k|                pthread_mutex_unlock(&ttd->lock);
  520|  6.79k|            }
  521|  31.1k|            switch (out->p.bpc) {
  522|      0|#if CONFIG_8BPC
  523|  23.7k|            case 8:
  ------------------
  |  Branch (523:13): [True: 23.7k, False: 7.43k]
  ------------------
  524|  23.7k|                dav1d_apply_grain_row_8bpc(&c->dsp[0].fg, out, in,
  525|  23.7k|                                           ttd->delayed_fg.scaling_8bpc,
  526|  23.7k|                                           ttd->delayed_fg.grain_lut_8bpc, row);
  527|  23.7k|                break;
  528|      0|#endif
  529|      0|#if CONFIG_16BPC
  530|  6.18k|            case 10:
  ------------------
  |  Branch (530:13): [True: 6.18k, False: 24.9k]
  ------------------
  531|  7.44k|            case 12:
  ------------------
  |  Branch (531:13): [True: 1.26k, False: 29.8k]
  ------------------
  532|  7.44k|                dav1d_apply_grain_row_16bpc(&c->dsp[off].fg, out, in,
  533|  7.44k|                                            ttd->delayed_fg.scaling_16bpc,
  534|  7.44k|                                            ttd->delayed_fg.grain_lut_16bpc, row);
  535|  7.44k|                break;
  536|      0|#endif
  537|      0|            default: abort();
  ------------------
  |  Branch (537:13): [True: 0, False: 31.1k]
  ------------------
  538|  31.1k|            }
  539|  31.0k|            row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
  540|  31.0k|            atomic_fetch_add(&ttd->delayed_fg.progress[1], 1);
  541|  31.0k|        }
  542|  13.1k|        pthread_mutex_lock(&ttd->lock);
  543|  13.1k|        ttd->delayed_fg.exec = 0;
  544|  13.1k|        int done = atomic_fetch_add(&ttd->delayed_fg.progress[1], 1) + 1;
  545|  13.1k|        progmax = atomic_load(&ttd->delayed_fg.progress[0]);
  546|       |        // signal for completion only once the last runner reaches this
  547|  13.1k|        if (done >= progmax) {
  ------------------
  |  Branch (547:13): [True: 6.80k, False: 6.31k]
  ------------------
  548|  6.80k|            ttd->delayed_fg.finished = 1;
  549|  6.80k|            pthread_cond_signal(&ttd->delayed_fg.cond);
  550|  6.80k|        }
  551|  13.1k|        break;
  552|      0|    default: abort();
  ------------------
  |  Branch (552:5): [True: 0, False: 13.1k]
  ------------------
  553|  13.1k|    }
  554|  13.1k|}
thread_task.c:merge_pending_frame:
  188|  86.1M|static inline int merge_pending_frame(Dav1dFrameContext *const f) {
  189|  86.1M|    int const merge = atomic_load(&f->task_thread.pending_tasks.merge);
  190|  86.1M|    if (merge) {
  ------------------
  |  Branch (190:9): [True: 2.69M, False: 83.4M]
  ------------------
  191|  2.69M|        pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
  192|  2.69M|        Dav1dTask *t = f->task_thread.pending_tasks.head;
  193|  2.69M|        f->task_thread.pending_tasks.head = NULL;
  194|  2.69M|        f->task_thread.pending_tasks.tail = NULL;
  195|  2.69M|        atomic_store(&f->task_thread.pending_tasks.merge, 0);
  196|  2.69M|        pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
  197|  6.20M|        while (t) {
  ------------------
  |  Branch (197:16): [True: 3.50M, False: 2.69M]
  ------------------
  198|  3.50M|            Dav1dTask *const tmp = t->next;
  199|  3.50M|            insert_task(f, t, 0);
  200|  3.50M|            t = tmp;
  201|  3.50M|        }
  202|  2.69M|    }
  203|  86.1M|    return merge;
  204|  86.1M|}
thread_task.c:check_tile:
  395|  15.6M|{
  396|  15.6M|    const int tp = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
  397|  15.6M|    const int tile_idx = (int)(t - f->task_thread.tile_tasks[tp]);
  398|  15.6M|    Dav1dTileState *const ts = &f->ts[tile_idx];
  399|  15.6M|    const int p1 = atomic_load(&ts->progress[tp]);
  400|  15.6M|    if (p1 < t->sby) return 1;
  ------------------
  |  Branch (400:9): [True: 2.37M, False: 13.2M]
  ------------------
  401|  13.2M|    int error = p1 == TILE_ERROR;
  ------------------
  |  |   36|  13.2M|#define TILE_ERROR (INT_MAX - 1)
  ------------------
  402|  13.2M|    error |= atomic_fetch_or(&f->task_thread.error, error);
  403|  13.2M|    if (!error && frame_mt && !tp) {
  ------------------
  |  Branch (403:9): [True: 2.96M, False: 10.2M]
  |  Branch (403:19): [True: 2.96M, False: 0]
  |  Branch (403:31): [True: 1.55M, False: 1.41M]
  ------------------
  404|  1.55M|        const int p2 = atomic_load(&ts->progress[1]);
  405|  1.55M|        if (p2 <= t->sby) return 1;
  ------------------
  |  Branch (405:13): [True: 208k, False: 1.34M]
  ------------------
  406|  1.34M|        error = p2 == TILE_ERROR;
  ------------------
  |  |   36|  1.34M|#define TILE_ERROR (INT_MAX - 1)
  ------------------
  407|  1.34M|        error |= atomic_fetch_or(&f->task_thread.error, error);
  408|  1.34M|    }
  409|  13.0M|    if (!error && frame_mt && !IS_KEY_OR_INTRA(f->frame_hdr)) {
  ------------------
  |  |   43|  2.75M|    (!IS_INTER_OR_SWITCH(frame_header))
  |  |  ------------------
  |  |  |  |   36|  2.75M|    ((frame_header)->frame_type & 1)
  |  |  ------------------
  ------------------
  |  Branch (409:9): [True: 2.75M, False: 10.2M]
  |  Branch (409:19): [True: 2.75M, False: 0]
  |  Branch (409:31): [True: 1.93M, False: 820k]
  ------------------
  410|       |        // check reference state
  411|  1.93M|        const Dav1dThreadPicture *p = &f->sr_cur;
  412|  1.93M|        const int ss_ver = p->p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
  413|  1.93M|        const unsigned p_b = (t->sby + 1) << (f->sb_shift + 2);
  414|  1.93M|        const int tile_sby = t->sby - (ts->tiling.row_start >> f->sb_shift);
  415|  1.93M|        const int (*const lowest_px)[2] = ts->lowest_pixel[tile_sby];
  416|  14.6M|        for (int n = t->deps_skip; n < 7; n++, t->deps_skip++) {
  ------------------
  |  Branch (416:36): [True: 12.8M, False: 1.81M]
  ------------------
  417|  12.8M|            unsigned lowest;
  418|  12.8M|            if (tp) {
  ------------------
  |  Branch (418:17): [True: 6.56M, False: 6.27M]
  ------------------
  419|       |                // if temporal mv refs are disabled, we only need this
  420|       |                // for the primary ref; if segmentation is disabled, we
  421|       |                // don't even need that
  422|  6.56M|                lowest = p_b;
  423|  6.56M|            } else {
  424|       |                // +8 is postfilter-induced delay
  425|  6.27M|                const int y = lowest_px[n][0] == INT_MIN ? INT_MIN :
  ------------------
  |  Branch (425:31): [True: 5.26M, False: 1.01M]
  ------------------
  426|  6.27M|                              lowest_px[n][0] + 8;
  427|  6.27M|                const int uv = lowest_px[n][1] == INT_MIN ? INT_MIN :
  ------------------
  |  Branch (427:32): [True: 6.13M, False: 144k]
  ------------------
  428|  6.27M|                               lowest_px[n][1] * (1 << ss_ver) + 8;
  429|  6.27M|                const int max = imax(y, uv);
  430|  6.27M|                if (max == INT_MIN) continue;
  ------------------
  |  Branch (430:21): [True: 5.26M, False: 1.01M]
  ------------------
  431|  1.01M|                lowest = iclip(max, 1, f->refp[n].p.p.h);
  432|  1.01M|            }
  433|  7.57M|            const unsigned p3 = atomic_load(&f->refp[n].progress[!tp]);
  434|  7.57M|            if (p3 < lowest) return 1;
  ------------------
  |  Branch (434:17): [True: 120k, False: 7.45M]
  ------------------
  435|  7.57M|            atomic_fetch_or(&f->task_thread.error, p3 == FRAME_ERROR);
  436|  7.45M|        }
  437|  1.93M|    }
  438|  12.9M|    return 0;
  439|  13.0M|}
thread_task.c:reset_task_cur:
   50|  30.0M|{
   51|  30.0M|    const unsigned first = atomic_load(&ttd->first);
   52|  30.0M|    unsigned reset_frame_idx = atomic_exchange(&ttd->reset_task_cur, UINT_MAX);
   53|  30.0M|    if (reset_frame_idx < first) {
  ------------------
  |  Branch (53:9): [True: 0, False: 30.0M]
  ------------------
   54|      0|        if (frame_idx == UINT_MAX) return 0;
  ------------------
  |  Branch (54:13): [True: 0, False: 0]
  ------------------
   55|      0|        reset_frame_idx = UINT_MAX;
   56|      0|    }
   57|  30.0M|    if (!ttd->cur && c->fc[first].task_thread.task_cur_prev == NULL)
  ------------------
  |  Branch (57:9): [True: 21.5M, False: 8.52M]
  |  Branch (57:22): [True: 19.5M, False: 1.96M]
  ------------------
   58|  19.5M|        return 0;
   59|  10.4M|    if (reset_frame_idx != UINT_MAX) {
  ------------------
  |  Branch (59:9): [True: 1.32M, False: 9.15M]
  ------------------
   60|  1.32M|        if (frame_idx == UINT_MAX) {
  ------------------
  |  Branch (60:13): [True: 37.6k, False: 1.29M]
  ------------------
   61|  37.6k|            if (reset_frame_idx > first + ttd->cur)
  ------------------
  |  Branch (61:17): [True: 281, False: 37.3k]
  ------------------
   62|    281|                return 0;
   63|  37.3k|            ttd->cur = reset_frame_idx - first;
   64|  37.3k|            goto cur_found;
   65|  37.6k|        }
   66|  9.15M|    } else if (frame_idx == UINT_MAX)
  ------------------
  |  Branch (66:16): [True: 1.24M, False: 7.91M]
  ------------------
   67|  1.24M|        return 0;
   68|  9.20M|    if (frame_idx < first) frame_idx += c->n_fc;
  ------------------
  |  Branch (68:9): [True: 2.41M, False: 6.78M]
  ------------------
   69|  9.20M|    const unsigned min_frame_idx = umin(reset_frame_idx, frame_idx);
   70|  9.20M|    const unsigned cur_frame_idx = first + ttd->cur;
   71|  9.20M|    if (ttd->cur < c->n_fc && cur_frame_idx < min_frame_idx)
  ------------------
  |  Branch (71:9): [True: 8.66M, False: 544k]
  |  Branch (71:31): [True: 607k, False: 8.05M]
  ------------------
   72|   607k|        return 0;
   73|  9.15M|    for (ttd->cur = min_frame_idx - first; ttd->cur < c->n_fc; ttd->cur++)
  ------------------
  |  Branch (73:44): [True: 9.04M, False: 107k]
  ------------------
   74|  9.04M|        if (c->fc[(first + ttd->cur) % c->n_fc].task_thread.task_head)
  ------------------
  |  Branch (74:13): [True: 8.49M, False: 555k]
  ------------------
   75|  8.49M|            break;
   76|  8.63M|cur_found:
   77|  33.7M|    for (unsigned i = ttd->cur; i < c->n_fc; i++)
  ------------------
  |  Branch (77:33): [True: 25.1M, False: 8.63M]
  ------------------
   78|  25.1M|        c->fc[(first + i) % c->n_fc].task_thread.task_cur_prev = NULL;
   79|  8.63M|    return 1;
   80|  8.59M|}
thread_task.c:abort_frame:
  459|  49.1k|static inline void abort_frame(Dav1dFrameContext *const f, const int error) {
  460|  49.1k|    atomic_store(&f->task_thread.error, error == DAV1D_ERR(EINVAL) ? 1 : -1);
  ------------------
  |  Branch (460:5): [True: 36.9k, False: 12.1k]
  ------------------
  461|  49.1k|    atomic_store(&f->task_thread.task_counter, 0);
  462|  49.1k|    atomic_store(&f->task_thread.done[0], 1);
  463|  49.1k|    atomic_store(&f->task_thread.done[1], 1);
  464|  49.1k|    atomic_store(&f->sr_cur.progress[0], FRAME_ERROR);
  465|       |    atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
  466|  49.1k|    dav1d_decode_frame_exit(f, error);
  467|  49.1k|    f->n_tile_data = 0;
  468|  49.1k|    pthread_cond_signal(&f->task_thread.cond);
  469|  49.1k|}
thread_task.c:add_pending:
  176|  2.54M|static inline void add_pending(Dav1dFrameContext *const f, Dav1dTask *const t) {
  177|  2.54M|    pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
  178|  2.54M|    t->next = NULL;
  179|  2.54M|    if (!f->task_thread.pending_tasks.head)
  ------------------
  |  Branch (179:9): [True: 2.46M, False: 81.5k]
  ------------------
  180|  2.46M|        f->task_thread.pending_tasks.head = t;
  181|  81.5k|    else
  182|  81.5k|        f->task_thread.pending_tasks.tail->next = t;
  183|  2.54M|    f->task_thread.pending_tasks.tail = t;
  184|       |    atomic_store(&f->task_thread.pending_tasks.merge, 1);
  185|  2.54M|    pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
  186|  2.54M|}
thread_task.c:reset_task_cur_async:
   84|  16.2M|{
   85|  16.2M|    const unsigned first = atomic_load(&ttd->first);
   86|  16.2M|    if (frame_idx < first) frame_idx += n_frames;
  ------------------
  |  Branch (86:9): [True: 2.87M, False: 13.3M]
  ------------------
   87|  16.2M|    unsigned last_idx = frame_idx;
   88|  16.3M|    do {
   89|  16.3M|        frame_idx = last_idx;
   90|  16.3M|        last_idx = atomic_exchange(&ttd->reset_task_cur, frame_idx);
   91|  16.3M|    } while (last_idx < frame_idx);
  ------------------
  |  Branch (91:14): [True: 84.9k, False: 16.2M]
  ------------------
   92|  16.2M|    if (frame_idx == first && atomic_load(&ttd->first) != first) {
  ------------------
  |  Branch (92:9): [True: 6.31M, False: 9.91M]
  |  Branch (92:31): [True: 0, False: 6.31M]
  ------------------
   93|      0|        unsigned expected = frame_idx;
   94|       |        atomic_compare_exchange_strong(&ttd->reset_task_cur, &expected, UINT_MAX);
   95|      0|    }
   96|  16.2M|}
thread_task.c:ensure_progress:
  378|  1.57M|{
  379|       |    // deblock_rows (non-LR portion) depends on deblock of previous sbrow,
  380|       |    // so ensure that completed. if not, re-add to task-queue; else, fall-through
  381|  1.57M|    int p1 = atomic_load(state);
  382|  1.57M|    if (p1 < t->sby) {
  ------------------
  |  Branch (382:9): [True: 10.1k, False: 1.56M]
  ------------------
  383|  10.1k|        t->type = type;
  384|  10.1k|        t->recon_progress = t->deblock_progress = 0;
  385|  10.1k|        *target = t->sby;
  386|  10.1k|        add_pending(f, t);
  387|  10.1k|        pthread_mutex_lock(&ttd->lock);
  388|  10.1k|        return 1;
  389|  10.1k|    }
  390|  1.56M|    return 0;
  391|  1.57M|}
thread_task.c:get_frame_progress:
  443|  6.16M|{
  444|  18.4E|    unsigned frame_prog = c->n_fc > 1 ? atomic_load(&f->sr_cur.progress[1]) : 0;
  ------------------
  |  Branch (444:27): [True: 6.16M, False: 18.4E]
  ------------------
  445|  6.16M|    if (frame_prog >= FRAME_ERROR)
  ------------------
  |  |   35|  6.16M|#define FRAME_ERROR (UINT_MAX - 1)
  ------------------
  |  Branch (445:9): [True: 5.02M, False: 1.14M]
  ------------------
  446|  5.02M|        return f->sbh - 1;
  447|  1.14M|    int idx = frame_prog >> (f->sb_shift + 7);
  448|  1.14M|    int prog;
  449|  1.16M|    do {
  450|  1.16M|        atomic_uint *state = &f->frame_thread.frame_progress[idx];
  451|  1.16M|        const unsigned val = ~atomic_load(state);
  452|  1.16M|        prog = val ? ctz(val) : 32;
  ------------------
  |  Branch (452:16): [True: 1.14M, False: 27.7k]
  ------------------
  453|  1.16M|        if (prog != 32) break;
  ------------------
  |  Branch (453:13): [True: 1.14M, False: 27.7k]
  ------------------
  454|  27.7k|        prog = 0;
  455|  27.7k|    } while (++idx < f->frame_thread.prog_sz);
  ------------------
  |  Branch (455:14): [True: 26.5k, False: 1.25k]
  ------------------
  456|  1.14M|    return ((idx << 5) | prog) - 1;
  457|  6.16M|}

dav1d_get_shear_params:
   80|   309k|int dav1d_get_shear_params(Dav1dWarpedMotionParams *const wm) {
   81|   309k|    const int32_t *const mat = wm->matrix;
   82|       |
   83|   309k|    if (mat[2] <= 0) return 1;
  ------------------
  |  Branch (83:9): [True: 0, False: 309k]
  ------------------
   84|       |
   85|   309k|    wm->u.p.alpha = iclip_wmp(mat[2] - 0x10000);
   86|   309k|    wm->u.p.beta = iclip_wmp(mat[3]);
   87|       |
   88|   309k|    int shift;
   89|   309k|    const int y = apply_sign(resolve_divisor_32(abs(mat[2]), &shift), mat[2]);
   90|   309k|    const int64_t v1 = ((int64_t) mat[4] * 0x10000) * y;
   91|   309k|    const int rnd = (1 << shift) >> 1;
   92|   309k|    wm->u.p.gamma = iclip_wmp(apply_sign64((int) ((llabs(v1) + rnd) >> shift), v1));
   93|   309k|    const int64_t v2 = ((int64_t) mat[3] * mat[4]) * y;
   94|   309k|    wm->u.p.delta = iclip_wmp(mat[5] -
   95|   309k|                          apply_sign64((int) ((llabs(v2) + rnd) >> shift), v2) -
   96|   309k|                          0x10000);
   97|       |
   98|   309k|    return (4 * abs(wm->u.p.alpha) + 7 * abs(wm->u.p.beta) >= 0x10000) ||
  ------------------
  |  Branch (98:12): [True: 8.07k, False: 301k]
  ------------------
   99|   301k|           (4 * abs(wm->u.p.gamma) + 4 * abs(wm->u.p.delta) >= 0x10000);
  ------------------
  |  Branch (99:12): [True: 1.21k, False: 300k]
  ------------------
  100|   309k|}
dav1d_set_affine_mv2d:
  136|   129k|{
  137|   129k|    int32_t *const mat = wm->matrix;
  138|   129k|    const int rsuy = 2 * bh4 - 1;
  139|   129k|    const int rsux = 2 * bw4 - 1;
  140|   129k|    const int isuy = by4 * 4 + rsuy;
  141|   129k|    const int isux = bx4 * 4 + rsux;
  142|       |
  143|   129k|    mat[0] = iclip(mv.x * 0x2000 - (isux * (mat[2] - 0x10000) + isuy * mat[3]),
  144|   129k|                   -0x800000, 0x7fffff);
  145|   129k|    mat[1] = iclip(mv.y * 0x2000 - (isux * mat[4] + isuy * (mat[5] - 0x10000)),
  146|   129k|                   -0x800000, 0x7fffff);
  147|   129k|}
dav1d_find_affine_int:
  153|   169k|{
  154|   169k|    int32_t *const mat = wm->matrix;
  155|   169k|    int a[2][2] = { { 0, 0 }, { 0, 0 } };
  156|   169k|    int bx[2] = { 0, 0 };
  157|   169k|    int by[2] = { 0, 0 };
  158|   169k|    const int rsuy = 2 * bh4 - 1;
  159|   169k|    const int rsux = 2 * bw4 - 1;
  160|   169k|    const int suy = rsuy * 8;
  161|   169k|    const int sux = rsux * 8;
  162|   169k|    const int duy = suy + mv.y;
  163|   169k|    const int dux = sux + mv.x;
  164|   169k|    const int isuy = by4 * 4 + rsuy;
  165|   169k|    const int isux = bx4 * 4 + rsux;
  166|       |
  167|   510k|    for (int i = 0; i < np; i++) {
  ------------------
  |  Branch (167:21): [True: 341k, False: 169k]
  ------------------
  168|   341k|        const int dx = pts[i][1][0] - dux;
  169|   341k|        const int dy = pts[i][1][1] - duy;
  170|   341k|        const int sx = pts[i][0][0] - sux;
  171|   341k|        const int sy = pts[i][0][1] - suy;
  172|   341k|        if (abs(sx - dx) < 256 && abs(sy - dy) < 256) {
  ------------------
  |  Branch (172:13): [True: 337k, False: 3.55k]
  |  Branch (172:35): [True: 334k, False: 3.44k]
  ------------------
  173|   334k|            a[0][0] += ((sx * sx) >> 2) + sx * 2 + 8;
  174|   334k|            a[0][1] += ((sx * sy) >> 2) + sx + sy + 4;
  175|   334k|            a[1][1] += ((sy * sy) >> 2) + sy * 2 + 8;
  176|   334k|            bx[0] += ((sx * dx) >> 2) + sx + dx + 8;
  177|   334k|            bx[1] += ((sy * dx) >> 2) + sy + dx + 4;
  178|   334k|            by[0] += ((sx * dy) >> 2) + sx + dy + 4;
  179|   334k|            by[1] += ((sy * dy) >> 2) + sy + dy + 8;
  180|   334k|        }
  181|   341k|    }
  182|       |
  183|       |    // compute determinant of a
  184|   169k|    const int64_t det = (int64_t) a[0][0] * a[1][1] - (int64_t) a[0][1] * a[0][1];
  185|   169k|    if (det == 0) return 1;
  ------------------
  |  Branch (185:9): [True: 6.95k, False: 162k]
  ------------------
  186|   162k|    int shift, idet = apply_sign64(resolve_divisor_64(llabs(det), &shift), det);
  187|   162k|    shift -= 16;
  188|   162k|    if (shift < 0) {
  ------------------
  |  Branch (188:9): [True: 0, False: 162k]
  ------------------
  189|      0|        idet <<= -shift;
  190|      0|        shift = 0;
  191|      0|    }
  192|       |
  193|       |    // solve the least-squares
  194|   162k|    mat[2] = get_mult_shift_diag((int64_t) a[1][1] * bx[0] -
  195|   162k|                                 (int64_t) a[0][1] * bx[1], idet, shift);
  196|   162k|    mat[3] = get_mult_shift_ndiag((int64_t) a[0][0] * bx[1] -
  197|   162k|                                  (int64_t) a[0][1] * bx[0], idet, shift);
  198|   162k|    mat[4] = get_mult_shift_ndiag((int64_t) a[1][1] * by[0] -
  199|   162k|                                  (int64_t) a[0][1] * by[1], idet, shift);
  200|   162k|    mat[5] = get_mult_shift_diag((int64_t) a[0][0] * by[1] -
  201|   162k|                                 (int64_t) a[0][1] * by[0], idet, shift);
  202|       |
  203|   162k|    mat[0] = iclip(mv.x * 0x2000 - (isux * (mat[2] - 0x10000) + isuy * mat[3]),
  204|   162k|                   -0x800000, 0x7fffff);
  205|   162k|    mat[1] = iclip(mv.y * 0x2000 - (isux * mat[4] + isuy * (mat[5] - 0x10000)),
  206|   162k|                   -0x800000, 0x7fffff);
  207|       |
  208|   162k|    return 0;
  209|   169k|}
warpmv.c:iclip_wmp:
   63|  1.23M|static inline int iclip_wmp(const int v) {
   64|  1.23M|    const int cv = iclip(v, INT16_MIN, INT16_MAX);
   65|       |
   66|  1.23M|    return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
   67|  1.23M|}
warpmv.c:resolve_divisor_32:
   69|   309k|static inline int resolve_divisor_32(const unsigned d, int *const shift) {
   70|   309k|    *shift = ulog2(d);
   71|   309k|    const int e = d - (1 << *shift);
   72|   309k|    const int f = *shift > 8 ? (e + (1 << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (72:19): [True: 309k, False: 25]
  ------------------
   73|   309k|                               e << (8 - *shift);
   74|   309k|    assert(f <= 256);
  ------------------
  |  Branch (74:5): [True: 309k, False: 16]
  ------------------
   75|   309k|    *shift += 14;
   76|       |    // Use f as lookup into the precomputed table of multipliers
   77|   309k|    return div_lut[f];
   78|   309k|}
warpmv.c:resolve_divisor_64:
  102|   162k|static int resolve_divisor_64(const uint64_t d, int *const shift) {
  103|   162k|    *shift = u64log2(d);
  104|   162k|    const int64_t e = d - (1LL << *shift);
  105|   162k|    const int64_t f = *shift > 8 ? (e + (1LL << (*shift - 9))) >> (*shift - 8) :
  ------------------
  |  Branch (105:23): [True: 162k, False: 18.4E]
  ------------------
  106|  18.4E|                                   e << (8 - *shift);
  107|   162k|    assert(f <= 256);
  ------------------
  |  Branch (107:5): [True: 162k, False: 18.4E]
  ------------------
  108|   162k|    *shift += 14;
  109|       |    // Use f as lookup into the precomputed table of multipliers
  110|   162k|    return div_lut[f];
  111|   162k|}
warpmv.c:get_mult_shift_diag:
  125|   324k|{
  126|   324k|    const int64_t v1 = px * idet;
  127|   324k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  128|   324k|                                        ((1LL << shift) >> 1)) >> shift),
  129|   324k|                                v1);
  130|   324k|    return iclip(v2, 0xe001, 0x11fff);
  131|   324k|}
warpmv.c:get_mult_shift_ndiag:
  115|   324k|{
  116|   324k|    const int64_t v1 = px * idet;
  117|   324k|    const int v2 = apply_sign64((int) ((llabs(v1) +
  118|   324k|                                        ((1LL << shift) >> 1)) >> shift),
  119|   324k|                                v1);
  120|   324k|    return iclip(v2, -0x1fff, 0x1fff);
  121|   324k|}

dav1d_init_ii_wedge_masks:
  207|      1|COLD void dav1d_init_ii_wedge_masks(void) {
  208|       |    // This function is guaranteed to be called only once
  209|       |
  210|      1|    enum WedgeMasterLineType {
  211|      1|        WEDGE_MASTER_LINE_ODD,
  212|      1|        WEDGE_MASTER_LINE_EVEN,
  213|      1|        WEDGE_MASTER_LINE_VERT,
  214|      1|        N_WEDGE_MASTER_LINES,
  215|      1|    };
  216|      1|    static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
  217|      1|        [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
  218|      1|        [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
  219|      1|        [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
  220|      1|    };
  221|      1|    uint8_t master[6][64 * 64];
  222|       |
  223|       |    // create master templates
  224|     65|    for (int y = 0, off = 0; y < 64; y++, off += 64)
  ------------------
  |  Branch (224:30): [True: 64, False: 1]
  ------------------
  225|     64|        insert_border(&master[WEDGE_VERTICAL][off],
  226|     64|                      wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
  227|     33|    for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
  ------------------
  |  Branch (227:40): [True: 32, False: 1]
  ------------------
  228|     32|    {
  229|     32|        insert_border(&master[WEDGE_OBLIQUE63][off],
  230|     32|                      wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
  231|     32|        insert_border(&master[WEDGE_OBLIQUE63][off + 64],
  232|     32|                      wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
  233|     32|    }
  234|       |
  235|      1|    transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
  236|      1|    transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
  237|      1|    hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
  238|      1|    hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
  239|       |
  240|      1|#define fill(w, h, sz_422, sz_420, hvsw, signs) \
  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  242|      1|                master, wedge_codebook_16_##hvsw, \
  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  246|       |
  247|      1|    fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  248|      1|    fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  249|      1|    fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  250|      1|    fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  251|      1|    fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  252|      1|    fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  253|      1|    fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  254|      1|    fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  255|      1|    fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
  ------------------
  |  |  241|      1|    fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
  |  |  242|      1|                master, wedge_codebook_16_##hvsw, \
  |  |  243|      1|                dav1d_masks.wedge_444_##w##x##h, \
  |  |  244|      1|                dav1d_masks.wedge_422_##sz_422, \
  |  |  245|      1|                dav1d_masks.wedge_420_##sz_420, signs)
  ------------------
  256|      1|#undef fill
  257|       |
  258|      1|    memset(dav1d_masks.ii_dc, 32, 32 * 32);
  259|      4|    for (int c = 0; c < 3; c++) {
  ------------------
  |  Branch (259:21): [True: 3, False: 1]
  ------------------
  260|      3|        dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
  261|      3|        dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
  262|      3|        dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
  263|      3|        dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
  264|      3|        dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
  265|      3|        dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
  266|      3|        dav1d_masks.offsets[c][BS_8x8  -BS_32x32].ii[II_DC_PRED] =
  267|      3|            MASK_OFFSET(dav1d_masks.ii_dc);
  ------------------
  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  268|      3|    }
  269|       |
  270|      1|#define BUILD_NONDC_II_MASKS(w, h, step) \
  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  272|       |
  273|      1|#define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
  274|      1|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  275|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  276|      1|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  277|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  278|      1|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  279|      1|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  280|       |
  281|      1|    BUILD_NONDC_II_MASKS(32, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  282|      1|    BUILD_NONDC_II_MASKS(16, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  283|      1|    BUILD_NONDC_II_MASKS(16, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  284|      1|    BUILD_NONDC_II_MASKS( 8, 32, 1);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  285|      1|    BUILD_NONDC_II_MASKS( 8, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  286|      1|    BUILD_NONDC_II_MASKS( 8,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  287|      1|    BUILD_NONDC_II_MASKS( 4, 16, 2);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  288|      1|    BUILD_NONDC_II_MASKS( 4,  8, 4);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  289|      1|    BUILD_NONDC_II_MASKS( 4,  4, 8);
  ------------------
  |  |  271|      1|    build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
  ------------------
  290|      4|    for (int p = 0; p < 3; p++) {
  ------------------
  |  Branch (290:21): [True: 3, False: 1]
  ------------------
  291|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  292|      3|        ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  293|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32,  8, 32,  8, 16);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  294|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16,  8, 16,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  295|      3|        ASSIGN_NONDC_II_OFFSET(BS_16x8,  16, 16,  8,  8,  8,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  296|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x16,   8, 16,  4, 16,  4,  8);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  297|      3|        ASSIGN_NONDC_II_OFFSET(BS_8x8,    8,  8,  4,  8,  4,  4);
  ------------------
  |  |  274|      3|    dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
  |  |  275|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  276|      3|    dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
  |  |  277|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  |  |  278|      3|    dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
  |  |  279|      3|        MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
  |  |  ------------------
  |  |  |  |  129|      3|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  |  |  ------------------
  ------------------
  298|      3|    }
  299|      1|}
wedge.c:insert_border:
   90|    128|{
   91|    128|    if (ctr > 4) memset(dst, 0, ctr - 4);
  ------------------
  |  Branch (91:9): [True: 128, False: 0]
  ------------------
   92|    128|    memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
   93|    128|    if (ctr < 64 - 4)
  ------------------
  |  Branch (93:9): [True: 128, False: 0]
  ------------------
   94|    128|        memset(dst + ctr + 4, 64, 64 - 4 - ctr);
   95|    128|}
wedge.c:transpose:
   97|      2|static void transpose(uint8_t *const dst, const uint8_t *const src) {
   98|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (98:32): [True: 128, False: 2]
  ------------------
   99|  8.32k|        for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
  ------------------
  |  Branch (99:36): [True: 8.19k, False: 128]
  ------------------
  100|  8.19k|            dst[x_off + y] = src[y_off + x];
  101|      2|}
wedge.c:hflip:
  103|      2|static void hflip(uint8_t *const dst, const uint8_t *const src) {
  104|    130|    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
  ------------------
  |  Branch (104:32): [True: 128, False: 2]
  ------------------
  105|  8.32k|        for (int x = 0; x < 64; x++)
  ------------------
  |  Branch (105:25): [True: 8.19k, False: 128]
  ------------------
  106|  8.19k|            dst[y_off + 64 - 1 - x] = src[y_off + x];
  107|      2|}
wedge.c:fill2d_16x2:
  153|      9|{
  154|      9|    const int n_stride_444 = (w * h);
  155|      9|    const int n_stride_422 = n_stride_444 >> 1;
  156|      9|    const int n_stride_420 = n_stride_444 >> 2;
  157|      9|    const int sign_stride_422 = 16 * n_stride_422;
  158|      9|    const int sign_stride_420 = 16 * n_stride_420;
  159|       |
  160|       |    // assign pointer offsets in lookup table
  161|    153|    for (int n = 0; n < 16; n++) {
  ------------------
  |  Branch (161:21): [True: 144, False: 9]
  ------------------
  162|    144|        const int sign = signs & 1;
  163|       |
  164|    144|        copy2d(masks_444, master[cb[n].direction], sign, w, h,
  165|    144|               32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
  166|       |
  167|       |        // not using !sign is intentional here, since 444 does not require
  168|       |        // any rounding since no chroma subsampling is applied.
  169|    144|        dav1d_masks.offsets[0][bs].wedge[0][n] =
  170|    144|        dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
  ------------------
  |  |  129|    144|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  171|       |
  172|    144|        dav1d_masks.offsets[1][bs].wedge[0][n] =
  173|    144|            init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
  174|    144|        dav1d_masks.offsets[1][bs].wedge[1][n] =
  175|    144|            init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
  176|    144|        dav1d_masks.offsets[2][bs].wedge[0][n] =
  177|    144|            init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
  178|    144|        dav1d_masks.offsets[2][bs].wedge[1][n] =
  179|    144|            init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
  180|       |
  181|    144|        signs >>= 1;
  182|    144|        masks_444 += n_stride_444;
  183|    144|        masks_422 += n_stride_422;
  184|    144|        masks_420 += n_stride_420;
  185|    144|    }
  186|      9|}
wedge.c:copy2d:
  111|    144|{
  112|    144|    src += y_off * 64 + x_off;
  113|    144|    if (sign) {
  ------------------
  |  Branch (113:9): [True: 109, False: 35]
  ------------------
  114|  2.14k|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (114:25): [True: 2.03k, False: 109]
  ------------------
  115|  40.4k|            for (int x = 0; x < w; x++)
  ------------------
  |  Branch (115:29): [True: 38.4k, False: 2.03k]
  ------------------
  116|  38.4k|                dst[x] = 64 - src[x];
  117|  2.03k|            src += 64;
  118|  2.03k|            dst += w;
  119|  2.03k|        }
  120|    109|    } else {
  121|    691|        for (int y = 0; y < h; y++) {
  ------------------
  |  Branch (121:25): [True: 656, False: 35]
  ------------------
  122|    656|            memcpy(dst, src, w);
  123|    656|            src += 64;
  124|    656|            dst += w;
  125|    656|        }
  126|     35|    }
  127|    144|}
wedge.c:init_chroma:
  134|    576|{
  135|    576|    const uint16_t offset = MASK_OFFSET(chroma);
  ------------------
  |  |  129|    576|#define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
  ------------------
  136|  8.64k|    for (int y = 0; y < h; y += 1 + ss_ver) {
  ------------------
  |  Branch (136:21): [True: 8.06k, False: 576]
  ------------------
  137|  83.3k|        for (int x = 0; x < w; x += 2) {
  ------------------
  |  Branch (137:25): [True: 75.2k, False: 8.06k]
  ------------------
  138|  75.2k|            int sum = luma[x] + luma[x + 1] + 1;
  139|  75.2k|            if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
  ------------------
  |  Branch (139:17): [True: 25.0k, False: 50.1k]
  ------------------
  140|  75.2k|            chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
  141|  75.2k|        }
  142|  8.06k|        luma += w << ss_ver;
  143|  8.06k|        chroma += w >> 1;
  144|  8.06k|    }
  145|    576|    return offset;
  146|    576|}
wedge.c:build_nondc_ii_masks:
  190|      9|{
  191|      9|    static const uint8_t ii_weights_1d[32] = {
  192|      9|        60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
  193|      9|         6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
  194|      9|    };
  195|       |
  196|      9|    uint8_t *const mask_h  = &mask_v[w * h];
  197|      9|    uint8_t *const mask_sm = &mask_h[w * h];
  198|    173|    for (int y = 0, off = 0; y < h; y++, off += w) {
  ------------------
  |  Branch (198:30): [True: 164, False: 9]
  ------------------
  199|    164|        memset(&mask_v[off], ii_weights_1d[y * step], w);
  200|  2.51k|        for (int x = 0; x < w; x++) {
  ------------------
  |  Branch (200:25): [True: 2.35k, False: 164]
  ------------------
  201|  2.35k|            mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
  202|  2.35k|            mask_h[off + x] = ii_weights_1d[x * step];
  203|  2.35k|        }
  204|    164|    }
  205|      9|}

cdef_tmpl.c:cdef_dsp_init_x86:
   46|  3.49k|static ALWAYS_INLINE void cdef_dsp_init_x86(Dav1dCdefDSPContext *const c) {
   47|  3.49k|    const unsigned flags = dav1d_get_cpu_flags();
   48|       |
   49|  3.49k|#if BITDEPTH == 8
   50|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (50:9): [True: 0, False: 3.49k]
  ------------------
   51|       |
   52|  3.49k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  3.49k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  3.49k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  3.49k|#endif
   56|       |
   57|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (57:9): [True: 0, False: 3.49k]
  ------------------
   58|       |
   59|  3.49k|    c->dir = BF(dav1d_cdef_dir, ssse3);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   60|  3.49k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, ssse3);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  3.49k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, ssse3);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  3.49k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, ssse3);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (64:9): [True: 0, False: 3.49k]
  ------------------
   65|       |
   66|  3.49k|    c->dir = BF(dav1d_cdef_dir, sse4);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|  3.49k|#if BITDEPTH == 8
   68|  3.49k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, sse4);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|  3.49k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, sse4);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|  3.49k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, sse4);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|  3.49k|#endif
   72|       |
   73|  3.49k|#if ARCH_X86_64
   74|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 3.49k]
  ------------------
   75|       |
   76|  3.49k|    c->dir = BF(dav1d_cdef_dir, avx2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  3.49k|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|  3.49k|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|  3.49k|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx2);
  ------------------
  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|       |
   81|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (81:9): [True: 3.49k, False: 0]
  ------------------
   82|       |
   83|      0|    c->fb[0] = BF(dav1d_cdef_filter_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   84|      0|    c->fb[1] = BF(dav1d_cdef_filter_4x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   85|      0|    c->fb[2] = BF(dav1d_cdef_filter_4x4, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   86|      0|#endif
   87|      0|}

dav1d_get_cpu_flags_x86:
   47|      1|COLD unsigned dav1d_get_cpu_flags_x86(void) {
   48|      1|    union {
   49|      1|        CpuidRegisters r;
   50|      1|        struct {
   51|      1|            uint32_t max_leaf;
   52|      1|            char vendor[12];
   53|      1|        };
   54|      1|    } cpu;
   55|      1|    dav1d_cpu_cpuid(&cpu.r, 0, 0);
   56|      1|    unsigned flags = dav1d_get_default_cpu_flags();
   57|       |
   58|      1|    if (cpu.max_leaf >= 1) {
  ------------------
  |  Branch (58:9): [True: 1, False: 0]
  ------------------
   59|      1|        CpuidRegisters r;
   60|      1|        dav1d_cpu_cpuid(&r, 1, 0);
   61|      1|        const unsigned family = ((r.eax >> 8) & 0x0f) + ((r.eax >> 20) & 0xff);
   62|       |
   63|      1|        if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   64|      1|            flags |= DAV1D_X86_CPU_FLAG_SSE2;
   65|      1|            if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   66|      1|                flags |= DAV1D_X86_CPU_FLAG_SSSE3;
   67|      1|                if (X(r.ecx, 0x00080000)) /* SSE4.1 */
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   68|      1|                    flags |= DAV1D_X86_CPU_FLAG_SSE41;
   69|      1|            }
   70|      1|        }
   71|      1|#if ARCH_X86_64
   72|       |        /* We only support >128-bit SIMD on x86-64. */
   73|      1|        if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   74|      1|            const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
   75|      1|            if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   76|      1|                if (cpu.max_leaf >= 7) {
  ------------------
  |  Branch (76:21): [True: 1, False: 0]
  ------------------
   77|      1|                    dav1d_cpu_cpuid(&r, 7, 0);
   78|      1|                    if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 1, False: 0]
  |  |  ------------------
  ------------------
   79|      1|                        flags |= DAV1D_X86_CPU_FLAG_AVX2;
   80|      1|                        if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
  ------------------
  |  |   45|      1|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 1]
  |  |  ------------------
  ------------------
   81|      0|                            if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                          if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
  ------------------
  |  |   45|      0|#define X(reg, mask) (((reg) & (mask)) == (mask))
  |  |  ------------------
  |  |  |  Branch (45:22): [True: 0, False: 0]
  |  |  ------------------
  ------------------
   82|      0|                                flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
   83|      0|                        }
   84|      1|                    }
   85|      1|                }
   86|      1|            }
   87|      1|        }
   88|      1|#endif
   89|      1|        if (!memcmp(cpu.vendor, "AuthenticAMD", sizeof(cpu.vendor))) {
  ------------------
  |  Branch (89:13): [True: 1, False: 0]
  ------------------
   90|      1|            if ((flags & DAV1D_X86_CPU_FLAG_AVX2) && family <= 0x19) {
  ------------------
  |  Branch (90:17): [True: 1, False: 0]
  |  Branch (90:54): [True: 1, False: 0]
  ------------------
   91|       |                /* Excavator, Zen, Zen+, Zen 2, Zen 3, Zen 3+, Zen 4 */
   92|      1|                flags |= DAV1D_X86_CPU_FLAG_SLOW_GATHER;
   93|      1|            }
   94|      1|        }
   95|      1|    }
   96|       |
   97|      1|    return flags;
   98|      1|}

filmgrain_tmpl.c:film_grain_dsp_init_x86:
   45|  9.21k|static ALWAYS_INLINE void film_grain_dsp_init_x86(Dav1dFilmGrainDSPContext *const c) {
   46|  9.21k|    const unsigned flags = dav1d_get_cpu_flags();
   47|       |
   48|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 9.21k]
  ------------------
   49|       |
   50|  9.21k|    c->generate_grain_y = BF(dav1d_generate_grain_y, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   51|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   52|  9.21k|    c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   53|  9.21k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   54|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  9.21k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  9.21k|    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  9.21k|#if ARCH_X86_64
   60|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (60:9): [True: 0, False: 9.21k]
  ------------------
   61|       |
   62|  9.21k|    c->generate_grain_y = BF(dav1d_generate_grain_y, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  9.21k|    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|       |
   67|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (67:9): [True: 0, False: 9.21k]
  ------------------
   68|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   69|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   70|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   71|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx2);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|      0|    }
   73|       |
   74|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (74:9): [True: 9.21k, False: 0]
  ------------------
   75|       |
   76|      0|    if (BITDEPTH == 8 || !(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (76:9): [True: 0, Folded]
  |  Branch (76:26): [True: 0, False: 0]
  ------------------
   77|      0|        c->fgy_32x32xn = BF(dav1d_fgy_32x32xn, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   78|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_fguv_32x32xn_i420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   79|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_fguv_32x32xn_i422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   80|      0|        c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_fguv_32x32xn_i444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   81|      0|    }
   82|      0|#endif
   83|      0|}

ipred_tmpl.c:intra_pred_dsp_init_x86:
   71|  9.21k|static ALWAYS_INLINE void intra_pred_dsp_init_x86(Dav1dIntraPredDSPContext *const c) {
   72|  9.21k|    const unsigned flags = dav1d_get_cpu_flags();
   73|       |
   74|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (74:9): [True: 0, False: 9.21k]
  ------------------
   75|       |
   76|  9.21k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   77|  9.21k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   78|  9.21k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   79|  9.21k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   80|  9.21k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   81|  9.21k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   82|  9.21k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   83|  9.21k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   84|  9.21k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   85|  9.21k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   86|  9.21k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   87|  9.21k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   88|  9.21k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   89|  9.21k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   ssse3);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   90|       |
   91|  9.21k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      ssse3);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   92|  9.21k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  ssse3);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   93|  9.21k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  ssse3);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   94|  9.21k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, ssse3);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   95|       |
   96|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, ssse3);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   97|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, ssse3);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   98|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, ssse3);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  9.21k|    c->pal_pred = BF(dav1d_pal_pred, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  101|       |
  102|  9.21k|#if ARCH_X86_64
  103|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (103:9): [True: 0, False: 9.21k]
  ------------------
  104|       |
  105|  9.21k|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  106|  9.21k|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  107|  9.21k|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  108|  9.21k|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  109|  9.21k|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  110|  9.21k|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  111|  9.21k|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  112|  9.21k|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  113|  9.21k|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  114|  9.21k|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|  9.21k|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  116|  9.21k|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  117|  9.21k|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  118|  9.21k|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx2);
  ------------------
  |  |   39|  9.21k|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  119|       |
  120|  9.21k|    init_cfl_pred_fn(DC_PRED,      ipred_cfl,      avx2);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  121|  9.21k|    init_cfl_pred_fn(DC_128_PRED,  ipred_cfl_128,  avx2);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  122|  9.21k|    init_cfl_pred_fn(TOP_DC_PRED,  ipred_cfl_top,  avx2);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  123|  9.21k|    init_cfl_pred_fn(LEFT_DC_PRED, ipred_cfl_left, avx2);
  ------------------
  |  |   41|  9.21k|    init_fn(cfl_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  124|       |
  125|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I420 - 1, ipred_cfl_ac_420, avx2);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  126|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I422 - 1, ipred_cfl_ac_422, avx2);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  127|  9.21k|    init_cfl_ac_fn(DAV1D_PIXEL_LAYOUT_I444 - 1, ipred_cfl_ac_444, avx2);
  ------------------
  |  |   43|  9.21k|    init_fn(cfl_ac, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  128|       |
  129|  9.21k|    c->pal_pred = BF(dav1d_pal_pred, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|       |
  131|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (131:9): [True: 9.21k, False: 0]
  ------------------
  132|       |
  133|      0|#if BITDEPTH == 8
  134|      0|    init_angular_ipred_fn(DC_PRED,       ipred_dc,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|  9.21k|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  135|      0|    init_angular_ipred_fn(DC_128_PRED,   ipred_dc_128,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  136|      0|    init_angular_ipred_fn(TOP_DC_PRED,   ipred_dc_top,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  137|      0|    init_angular_ipred_fn(LEFT_DC_PRED,  ipred_dc_left,  avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  138|      0|    init_angular_ipred_fn(HOR_PRED,      ipred_h,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  139|      0|    init_angular_ipred_fn(VERT_PRED,     ipred_v,        avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  140|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  141|      0|#endif
  142|      0|    init_angular_ipred_fn(PAETH_PRED,    ipred_paeth,    avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  143|      0|    init_angular_ipred_fn(SMOOTH_PRED,   ipred_smooth,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  144|      0|    init_angular_ipred_fn(SMOOTH_H_PRED, ipred_smooth_h, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  145|      0|    init_angular_ipred_fn(SMOOTH_V_PRED, ipred_smooth_v, avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  146|      0|    init_angular_ipred_fn(Z1_PRED,       ipred_z1,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  147|      0|    init_angular_ipred_fn(Z2_PRED,       ipred_z2,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  148|      0|    init_angular_ipred_fn(Z3_PRED,       ipred_z3,       avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  149|      0|    init_angular_ipred_fn(FILTER_PRED,   ipred_filter,   avx512icl);
  ------------------
  |  |   39|      0|    init_fn(intra_pred, type, name, suffix)
  |  |  ------------------
  |  |  |  |   36|      0|    c->type0[type1] = BF(dav1d_##name, suffix)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  150|       |
  151|      0|    c->pal_pred = BF(dav1d_pal_pred, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  152|      0|#endif
  153|      0|}

itx_tmpl.c:itx_dsp_init_x86:
  112|  3.49k|{
  113|  3.49k|#define assign_itx_bpc_fn(pfx, w, h, type, type_enum, bpc, ext) \
  114|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  115|  3.49k|        BF_BPC(dav1d_inv_txfm_add_##type##_##w##x##h, bpc, ext)
  116|       |
  117|  3.49k|#define assign_itx1_bpc_fn(pfx, w, h, bpc, ext) \
  118|  3.49k|    assign_itx_bpc_fn(pfx, w, h, dct_dct,           DCT_DCT,           bpc, ext)
  119|       |
  120|  3.49k|#define assign_itx2_bpc_fn(pfx, w, h, bpc, ext) \
  121|  3.49k|    assign_itx1_bpc_fn(pfx, w, h, bpc, ext); \
  122|  3.49k|    assign_itx_bpc_fn(pfx, w, h, identity_identity, IDTX,              bpc, ext)
  123|       |
  124|  3.49k|#define assign_itx12_bpc_fn(pfx, w, h, bpc, ext) \
  125|  3.49k|    assign_itx2_bpc_fn(pfx, w, h, bpc, ext); \
  126|  3.49k|    assign_itx_bpc_fn(pfx, w, h, dct_adst,          ADST_DCT,          bpc, ext); \
  127|  3.49k|    assign_itx_bpc_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      bpc, ext); \
  128|  3.49k|    assign_itx_bpc_fn(pfx, w, h, dct_identity,      H_DCT,             bpc, ext); \
  129|  3.49k|    assign_itx_bpc_fn(pfx, w, h, adst_dct,          DCT_ADST,          bpc, ext); \
  130|  3.49k|    assign_itx_bpc_fn(pfx, w, h, adst_adst,         ADST_ADST,         bpc, ext); \
  131|  3.49k|    assign_itx_bpc_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     bpc, ext); \
  132|  3.49k|    assign_itx_bpc_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      bpc, ext); \
  133|  3.49k|    assign_itx_bpc_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     bpc, ext); \
  134|  3.49k|    assign_itx_bpc_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, bpc, ext); \
  135|  3.49k|    assign_itx_bpc_fn(pfx, w, h, identity_dct,      V_DCT,             bpc, ext)
  136|       |
  137|  3.49k|#define assign_itx16_bpc_fn(pfx, w, h, bpc, ext) \
  138|  3.49k|    assign_itx12_bpc_fn(pfx, w, h, bpc, ext); \
  139|  3.49k|    assign_itx_bpc_fn(pfx, w, h, adst_identity,     H_ADST,            bpc, ext); \
  140|  3.49k|    assign_itx_bpc_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        bpc, ext); \
  141|  3.49k|    assign_itx_bpc_fn(pfx, w, h, identity_adst,     V_ADST,            bpc, ext); \
  142|  3.49k|    assign_itx_bpc_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        bpc, ext)
  143|       |
  144|  3.49k|    const unsigned flags = dav1d_get_cpu_flags();
  145|       |
  146|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (146:9): [True: 0, False: 3.49k]
  ------------------
  147|       |
  148|  3.49k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, sse2);
  ------------------
  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  149|       |
  150|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (150:9): [True: 0, False: 3.49k]
  ------------------
  151|       |
  152|  3.49k|#if BITDEPTH == 8
  153|  3.49k|    assign_itx16_fn(,   4,  4, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  154|  3.49k|    assign_itx16_fn(R,  4,  8, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  155|  3.49k|    assign_itx16_fn(R,  8,  4, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  156|  3.49k|    assign_itx16_fn(,   8,  8, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  157|  3.49k|    assign_itx16_fn(R,  4, 16, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  158|  3.49k|    assign_itx16_fn(R, 16,  4, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  159|  3.49k|    assign_itx16_fn(R,  8, 16, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  160|  3.49k|    assign_itx16_fn(R, 16,  8, ssse3);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  161|  3.49k|    assign_itx12_fn(,  16, 16, ssse3);
  ------------------
  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  162|  3.49k|    assign_itx2_fn (R,  8, 32, ssse3);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  163|  3.49k|    assign_itx2_fn (R, 32,  8, ssse3);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  164|  3.49k|    assign_itx2_fn (R, 16, 32, ssse3);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  165|  3.49k|    assign_itx2_fn (R, 32, 16, ssse3);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  166|  3.49k|    assign_itx2_fn (,  32, 32, ssse3);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  167|  3.49k|    assign_itx1_fn (R, 16, 64, ssse3);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  168|  3.49k|    assign_itx1_fn (R, 32, 64, ssse3);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  169|  3.49k|    assign_itx1_fn (R, 64, 16, ssse3);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  170|  3.49k|    assign_itx1_fn (R, 64, 32, ssse3);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  171|  3.49k|    assign_itx1_fn ( , 64, 64, ssse3);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  172|  3.49k|    *all_simd = 1;
  173|  3.49k|#endif
  174|       |
  175|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (175:9): [True: 0, False: 3.49k]
  ------------------
  176|       |
  177|       |#if BITDEPTH == 16
  178|       |    if (bpc == 10) {
  179|       |        assign_itx16_fn(,   4,  4, sse4);
  180|       |        assign_itx16_fn(R,  4,  8, sse4);
  181|       |        assign_itx16_fn(R,  4, 16, sse4);
  182|       |        assign_itx16_fn(R,  8,  4, sse4);
  183|       |        assign_itx16_fn(,   8,  8, sse4);
  184|       |        assign_itx16_fn(R,  8, 16, sse4);
  185|       |        assign_itx16_fn(R, 16,  4, sse4);
  186|       |        assign_itx16_fn(R, 16,  8, sse4);
  187|       |        assign_itx12_fn(,  16, 16, sse4);
  188|       |        assign_itx2_fn (R,  8, 32, sse4);
  189|       |        assign_itx2_fn (R, 32,  8, sse4);
  190|       |        assign_itx2_fn (R, 16, 32, sse4);
  191|       |        assign_itx2_fn (R, 32, 16, sse4);
  192|       |        assign_itx2_fn (,  32, 32, sse4);
  193|       |        assign_itx1_fn (R, 16, 64, sse4);
  194|       |        assign_itx1_fn (R, 32, 64, sse4);
  195|       |        assign_itx1_fn (R, 64, 16, sse4);
  196|       |        assign_itx1_fn (R, 64, 32, sse4);
  197|       |        assign_itx1_fn (,  64, 64, sse4);
  198|       |        *all_simd = 1;
  199|       |    }
  200|       |#endif
  201|       |
  202|  3.49k|#if ARCH_X86_64
  203|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (203:9): [True: 0, False: 3.49k]
  ------------------
  204|       |
  205|  3.49k|    assign_itx_fn(, 4, 4, wht_wht, WHT_WHT, avx2);
  ------------------
  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  ------------------
  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  206|       |
  207|  3.49k|#if BITDEPTH == 8
  208|  3.49k|    assign_itx16_fn( ,  4,  4, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  209|  3.49k|    assign_itx16_fn(R,  4,  8, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  210|  3.49k|    assign_itx16_fn(R,  4, 16, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  211|  3.49k|    assign_itx16_fn(R,  8,  4, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  212|  3.49k|    assign_itx16_fn( ,  8,  8, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  213|  3.49k|    assign_itx16_fn(R,  8, 16, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  214|  3.49k|    assign_itx2_fn (R,  8, 32, avx2);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  215|  3.49k|    assign_itx16_fn(R, 16,  4, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  216|  3.49k|    assign_itx16_fn(R, 16,  8, avx2);
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|  3.49k|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|  3.49k|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  217|  3.49k|    assign_itx12_fn( , 16, 16, avx2);
  ------------------
  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|  3.49k|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|  3.49k|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|  3.49k|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|  3.49k|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|  3.49k|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|  3.49k|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|  3.49k|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  218|  3.49k|    assign_itx2_fn (R, 16, 32, avx2);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  219|  3.49k|    assign_itx1_fn (R, 16, 64, avx2);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  220|  3.49k|    assign_itx2_fn (R, 32,  8, avx2);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  221|  3.49k|    assign_itx2_fn (R, 32, 16, avx2);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  222|  3.49k|    assign_itx2_fn ( , 32, 32, avx2);
  ------------------
  |  |   84|  3.49k|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  223|  3.49k|    assign_itx1_fn (R, 32, 64, avx2);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  224|  3.49k|    assign_itx1_fn (R, 64, 16, avx2);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  225|  3.49k|    assign_itx1_fn (R, 64, 32, avx2);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  226|  3.49k|    assign_itx1_fn ( , 64, 64, avx2);
  ------------------
  |  |   81|  3.49k|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  227|       |#else
  228|       |    if (bpc == 10) {
  229|       |        assign_itx16_bpc_fn( ,  4,  4, 10, avx2);
  230|       |        assign_itx16_bpc_fn(R,  4,  8, 10, avx2);
  231|       |        assign_itx16_bpc_fn(R,  4, 16, 10, avx2);
  232|       |        assign_itx16_bpc_fn(R,  8,  4, 10, avx2);
  233|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx2);
  234|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx2);
  235|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx2);
  236|       |        assign_itx16_bpc_fn(R, 16,  4, 10, avx2);
  237|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx2);
  238|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx2);
  239|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx2);
  240|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx2);
  241|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx2);
  242|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx2);
  243|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx2);
  244|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx2);
  245|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx2);
  246|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx2);
  247|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx2);
  248|       |    } else {
  249|       |        assign_itx16_bpc_fn( ,  4,  4, 12, avx2);
  250|       |        assign_itx16_bpc_fn(R,  4,  8, 12, avx2);
  251|       |        assign_itx16_bpc_fn(R,  4, 16, 12, avx2);
  252|       |        assign_itx16_bpc_fn(R,  8,  4, 12, avx2);
  253|       |        assign_itx16_bpc_fn( ,  8,  8, 12, avx2);
  254|       |        assign_itx16_bpc_fn(R,  8, 16, 12, avx2);
  255|       |        assign_itx2_bpc_fn (R,  8, 32, 12, avx2);
  256|       |        assign_itx16_bpc_fn(R, 16,  4, 12, avx2);
  257|       |        assign_itx16_bpc_fn(R, 16,  8, 12, avx2);
  258|       |        assign_itx12_bpc_fn( , 16, 16, 12, avx2);
  259|       |        assign_itx2_bpc_fn (R, 32,  8, 12, avx2);
  260|       |        assign_itx_bpc_fn(R, 16, 32, identity_identity, IDTX, 12, avx2);
  261|       |        assign_itx_bpc_fn(R, 32, 16, identity_identity, IDTX, 12, avx2);
  262|       |        assign_itx_bpc_fn( , 32, 32, identity_identity, IDTX, 12, avx2);
  263|       |    }
  264|       |#endif
  265|       |
  266|  3.49k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (266:9): [True: 3.49k, False: 0]
  ------------------
  267|       |
  268|      0|#if BITDEPTH == 8
  269|  3.49k|    assign_itx16_fn( ,  4,  4, avx512icl); // no wht
  ------------------
  |  |  101|  3.49k|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|  3.49k|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|  3.49k|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|  3.49k|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|  3.49k|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|  3.49k|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|  3.49k|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|  3.49k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  270|      0|    assign_itx16_fn(R,  4,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  271|      0|    assign_itx16_fn(R,  4, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  272|      0|    assign_itx16_fn(R,  8,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  273|      0|    assign_itx16_fn( ,  8,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  274|      0|    assign_itx16_fn(R,  8, 16, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  275|      0|    assign_itx2_fn (R,  8, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  276|      0|    assign_itx16_fn(R, 16,  4, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  277|      0|    assign_itx16_fn(R, 16,  8, avx512icl);
  ------------------
  |  |  101|      0|    assign_itx12_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  102|      0|    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  103|      0|    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  104|      0|    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  105|      0|    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  278|      0|    assign_itx12_fn( , 16, 16, avx512icl);
  ------------------
  |  |   88|      0|    assign_itx2_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   89|      0|    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   90|      0|    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   91|      0|    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   92|      0|    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   93|      0|    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   94|      0|    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   95|      0|    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   96|      0|    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   97|      0|    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   98|      0|    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  279|      0|    assign_itx2_fn (R, 16, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  280|      0|    assign_itx1_fn (R, 16, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  281|      0|    assign_itx2_fn (R, 32,  8, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  282|      0|    assign_itx2_fn (R, 32, 16, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  283|      0|    assign_itx2_fn ( , 32, 32, avx512icl);
  ------------------
  |  |   84|      0|    assign_itx1_fn(pfx, w, h, ext); \
  |  |  ------------------
  |  |  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |   85|      0|    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  284|      0|    assign_itx1_fn (R, 32, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  285|      0|    assign_itx1_fn (R, 64, 16, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  286|      0|    assign_itx1_fn (R, 64, 32, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  287|      0|    assign_itx1_fn ( , 64, 64, avx512icl);
  ------------------
  |  |   81|      0|    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
  |  |  ------------------
  |  |  |  |   77|      0|    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
  |  |  |  |   78|      0|        BF(dav1d_inv_txfm_add_##type##_##w##x##h, ext)
  |  |  |  |  ------------------
  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  288|       |#else
  289|       |    if (bpc == 10) {
  290|       |        assign_itx16_bpc_fn( ,  8,  8, 10, avx512icl);
  291|       |        assign_itx16_bpc_fn(R,  8, 16, 10, avx512icl);
  292|       |        assign_itx2_bpc_fn (R,  8, 32, 10, avx512icl);
  293|       |        assign_itx16_bpc_fn(R, 16,  8, 10, avx512icl);
  294|       |        assign_itx12_bpc_fn( , 16, 16, 10, avx512icl);
  295|       |        assign_itx2_bpc_fn (R, 16, 32, 10, avx512icl);
  296|       |        assign_itx2_bpc_fn (R, 32,  8, 10, avx512icl);
  297|       |        assign_itx2_bpc_fn (R, 32, 16, 10, avx512icl);
  298|       |        assign_itx2_bpc_fn ( , 32, 32, 10, avx512icl);
  299|       |        assign_itx1_bpc_fn (R, 16, 64, 10, avx512icl);
  300|       |        assign_itx1_bpc_fn (R, 32, 64, 10, avx512icl);
  301|       |        assign_itx1_bpc_fn (R, 64, 16, 10, avx512icl);
  302|       |        assign_itx1_bpc_fn (R, 64, 32, 10, avx512icl);
  303|       |        assign_itx1_bpc_fn ( , 64, 64, 10, avx512icl);
  304|       |    }
  305|       |#endif
  306|      0|#endif
  307|      0|}

loopfilter_tmpl.c:loop_filter_dsp_init_x86:
   41|  9.21k|static ALWAYS_INLINE void loop_filter_dsp_init_x86(Dav1dLoopFilterDSPContext *const c) {
   42|  9.21k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 9.21k]
  ------------------
   45|       |
   46|  9.21k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   47|  9.21k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   48|  9.21k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   49|  9.21k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   50|       |
   51|  9.21k|#if ARCH_X86_64
   52|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 9.21k]
  ------------------
   53|       |
   54|  9.21k|    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   55|  9.21k|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  9.21k|    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  9.21k|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   58|       |
   59|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (59:9): [True: 9.21k, False: 0]
  ------------------
   60|       |
   61|      0|    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|      0|    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   63|       |
   64|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (64:9): [True: 0, False: 0]
  ------------------
   65|      0|        c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|      0|        c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   67|      0|    }
   68|      0|#endif
   69|      0|}

looprestoration_tmpl.c:loop_restoration_dsp_init_x86:
   50|  9.21k|static ALWAYS_INLINE void loop_restoration_dsp_init_x86(Dav1dLoopRestorationDSPContext *const c, const int bpc) {
   51|  9.21k|    const unsigned flags = dav1d_get_cpu_flags();
   52|       |
   53|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (53:9): [True: 0, False: 9.21k]
  ------------------
   54|  9.21k|#if BITDEPTH == 8
   55|  9.21k|    c->wiener[0] = BF(dav1d_wiener_filter7, sse2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   56|  9.21k|    c->wiener[1] = BF(dav1d_wiener_filter5, sse2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   57|  9.21k|#endif
   58|       |
   59|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (59:9): [True: 0, False: 9.21k]
  ------------------
   60|  9.21k|    c->wiener[0] = BF(dav1d_wiener_filter7, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   61|  9.21k|    c->wiener[1] = BF(dav1d_wiener_filter5, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   62|  9.21k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (62:9): [True: 3.49k, Folded]
  |  Branch (62:26): [True: 2.32k, False: 3.39k]
  ------------------
   63|  5.82k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, ssse3);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   64|  5.82k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, ssse3);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   65|  5.82k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, ssse3);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   66|  5.82k|    }
   67|       |
   68|  9.21k|#if ARCH_X86_64
   69|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (69:9): [True: 0, False: 9.21k]
  ------------------
   70|       |
   71|  9.21k|    c->wiener[0] = BF(dav1d_wiener_filter7, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   72|  9.21k|    c->wiener[1] = BF(dav1d_wiener_filter5, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   73|  9.21k|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (73:9): [True: 3.49k, Folded]
  |  Branch (73:26): [True: 2.32k, False: 3.39k]
  ------------------
   74|  5.82k|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx2);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   75|  5.82k|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx2);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   76|  5.82k|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx2);
  ------------------
  |  |   52|  5.82k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   77|  5.82k|    }
   78|       |
   79|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (79:9): [True: 9.21k, False: 0]
  ------------------
   80|       |
   81|      0|    c->wiener[0] = BF(dav1d_wiener_filter7, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   82|      0|#if BITDEPTH == 8
   83|       |    /* With VNNI we don't need a 5-tap version. */
   84|      0|    c->wiener[1] = c->wiener[0];
   85|       |#else
   86|       |    c->wiener[1] = BF(dav1d_wiener_filter5, avx512icl);
   87|       |#endif
   88|      0|    if (BITDEPTH == 8 || bpc == 10) {
  ------------------
  |  Branch (88:9): [True: 0, Folded]
  |  Branch (88:26): [True: 0, False: 0]
  ------------------
   89|      0|        c->sgr[0] = BF(dav1d_sgr_filter_5x5, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   90|      0|        c->sgr[1] = BF(dav1d_sgr_filter_3x3, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   91|      0|        c->sgr[2] = BF(dav1d_sgr_filter_mix, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
   92|      0|    }
   93|      0|#endif
   94|      0|}

mc_tmpl.c:mc_dsp_init_x86:
   92|  9.21k|static ALWAYS_INLINE void mc_dsp_init_x86(Dav1dMCDSPContext *const c) {
   93|  9.21k|    const unsigned flags = dav1d_get_cpu_flags();
   94|       |
   95|  9.21k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
  ------------------
  |  Branch (95:8): [True: 0, False: 9.21k]
  ------------------
   96|      0|        return;
   97|       |
   98|  9.21k|    init_8tap_fns(ssse3);
  ------------------
  |  |  143|  9.21k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  9.21k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
   99|       |
  100|  9.21k|    init_mc_fn(FILTER_2D_BILINEAR,             bilin,               ssse3);
  ------------------
  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  101|  9.21k|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
  ------------------
  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  102|       |
  103|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  104|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  105|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  106|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  107|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  108|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  109|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  110|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  111|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  112|  9.21k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  113|       |
  114|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  115|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  116|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  117|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  118|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  119|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  120|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  121|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  122|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  123|  9.21k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  124|       |
  125|  9.21k|    c->avg = BF(dav1d_avg, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  126|  9.21k|    c->w_avg = BF(dav1d_w_avg, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  127|  9.21k|    c->mask = BF(dav1d_mask, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  128|  9.21k|    c->w_mask[0] = BF(dav1d_w_mask_444, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  129|  9.21k|    c->w_mask[1] = BF(dav1d_w_mask_422, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  130|  9.21k|    c->w_mask[2] = BF(dav1d_w_mask_420, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  131|  9.21k|    c->blend = BF(dav1d_blend, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  132|  9.21k|    c->blend_v = BF(dav1d_blend_v, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  133|  9.21k|    c->blend_h = BF(dav1d_blend_h, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  134|  9.21k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  135|  9.21k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  136|  9.21k|    c->emu_edge = BF(dav1d_emu_edge, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  137|  9.21k|    c->resize = BF(dav1d_resize, ssse3);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  138|       |
  139|  9.21k|    if(!(flags & DAV1D_X86_CPU_FLAG_SSE41))
  ------------------
  |  Branch (139:8): [True: 0, False: 9.21k]
  ------------------
  140|      0|        return;
  141|       |
  142|  9.21k|#if BITDEPTH == 8
  143|  9.21k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, sse4);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  144|  9.21k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, sse4);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  145|  9.21k|#endif
  146|       |
  147|  9.21k|#if ARCH_X86_64
  148|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
  ------------------
  |  Branch (148:9): [True: 0, False: 9.21k]
  ------------------
  149|      0|        return;
  150|       |
  151|  9.21k|    init_8tap_fns(avx2);
  ------------------
  |  |  143|  9.21k|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|  9.21k|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  152|       |
  153|  9.21k|    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
  ------------------
  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  154|  9.21k|    init_mct_fn(FILTER_2D_BILINEAR,           bilin,               avx2);
  ------------------
  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  155|       |
  156|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  157|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  158|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  159|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  160|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  161|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  162|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  163|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  164|  9.21k|    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  165|  9.21k|    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   40|  9.21k|    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  166|       |
  167|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  168|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  169|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  170|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  171|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  172|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  173|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  174|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  175|  9.21k|    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  176|  9.21k|    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
  ------------------
  |  |   42|  9.21k|    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  177|       |
  178|  9.21k|    c->avg = BF(dav1d_avg, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  179|  9.21k|    c->w_avg = BF(dav1d_w_avg, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  180|  9.21k|    c->mask = BF(dav1d_mask, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  181|  9.21k|    c->w_mask[0] = BF(dav1d_w_mask_444, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  182|  9.21k|    c->w_mask[1] = BF(dav1d_w_mask_422, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  183|  9.21k|    c->w_mask[2] = BF(dav1d_w_mask_420, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  184|  9.21k|    c->blend = BF(dav1d_blend, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  185|  9.21k|    c->blend_v = BF(dav1d_blend_v, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  186|  9.21k|    c->blend_h = BF(dav1d_blend_h, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  187|  9.21k|    c->warp8x8  = BF(dav1d_warp_affine_8x8, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  188|  9.21k|    c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  189|  9.21k|    c->emu_edge = BF(dav1d_emu_edge, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  190|  9.21k|    c->resize = BF(dav1d_resize, avx2);
  ------------------
  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  191|       |
  192|  9.21k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL))
  ------------------
  |  Branch (192:9): [True: 9.21k, False: 0]
  ------------------
  193|  9.21k|        return;
  194|       |
  195|  9.21k|    init_8tap_fns(avx512icl);
  ------------------
  |  |  143|      0|    init_8tap_gen(mc,  opt); \
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|      0|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|      0|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   36|  9.21k|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |  144|      0|    init_8tap_gen(mct, opt)
  |  |  ------------------
  |  |  |  |  132|      0|    init_##name##_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  133|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  134|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  135|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  136|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  137|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  138|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  139|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   opt); \
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  140|  9.21k|    init_##name##_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          opt)
  |  |  |  |  ------------------
  |  |  |  |  |  |   38|  9.21k|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |   52|  9.21k|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  196|       |
  197|      0|    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   36|      0|    c->mc[type] = BF(dav1d_put_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  198|      0|    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx512icl);
  ------------------
  |  |   38|      0|    c->mct[type] = BF(dav1d_prep_##name, suffix)
  |  |  ------------------
  |  |  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  |  |  ------------------
  ------------------
  199|       |
  200|      0|    c->avg = BF(dav1d_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  201|      0|    c->w_avg = BF(dav1d_w_avg, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  202|      0|    c->mask = BF(dav1d_mask, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  203|      0|    c->w_mask[0] = BF(dav1d_w_mask_444, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  204|      0|    c->w_mask[1] = BF(dav1d_w_mask_422, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  205|      0|    c->w_mask[2] = BF(dav1d_w_mask_420, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  206|      0|    c->blend = BF(dav1d_blend, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  207|      0|    c->blend_v = BF(dav1d_blend_v, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  208|      0|    c->blend_h = BF(dav1d_blend_h, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  209|       |
  210|      0|    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
  ------------------
  |  Branch (210:9): [True: 0, False: 0]
  ------------------
  211|      0|        c->resize = BF(dav1d_resize, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  212|      0|        c->warp8x8  = BF(dav1d_warp_affine_8x8, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  213|      0|        c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx512icl);
  ------------------
  |  |   52|      0|#define BF(x, suffix) x##_8bpc_##suffix
  ------------------
  214|      0|    }
  215|      0|#endif
  216|      0|}

msac.c:msac_init_x86:
   59|   245k|static ALWAYS_INLINE void msac_init_x86(MsacContext *const s) {
   60|   245k|    const unsigned flags = dav1d_get_cpu_flags();
   61|       |
   62|   245k|    if (flags & DAV1D_X86_CPU_FLAG_SSE2) {
  ------------------
  |  Branch (62:9): [True: 245k, False: 18.4E]
  ------------------
   63|   245k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_sse2;
   64|   245k|    }
   65|       |
   66|   245k|    if (flags & DAV1D_X86_CPU_FLAG_AVX2) {
  ------------------
  |  Branch (66:9): [True: 245k, False: 18.4E]
  ------------------
   67|   245k|        s->symbol_adapt16 = dav1d_msac_decode_symbol_adapt16_avx2;
   68|   245k|    }
   69|   245k|}

pal.c:pal_dsp_init_x86:
   34|  10.2k|static ALWAYS_INLINE void pal_dsp_init_x86(Dav1dPalDSPContext *const c) {
   35|  10.2k|    const unsigned flags = dav1d_get_cpu_flags();
   36|       |
   37|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (37:9): [True: 0, False: 10.2k]
  ------------------
   38|       |
   39|  10.2k|    c->pal_idx_finish = dav1d_pal_idx_finish_ssse3;
   40|       |
   41|  10.2k|#if ARCH_X86_64
   42|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (42:9): [True: 0, False: 10.2k]
  ------------------
   43|       |
   44|  10.2k|    c->pal_idx_finish = dav1d_pal_idx_finish_avx2;
   45|       |
   46|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (46:9): [True: 10.2k, False: 0]
  ------------------
   47|       |
   48|      0|    c->pal_idx_finish = dav1d_pal_idx_finish_avx512icl;
   49|      0|#endif
   50|      0|}

refmvs.c:refmvs_dsp_init_x86:
   41|  10.2k|static ALWAYS_INLINE void refmvs_dsp_init_x86(Dav1dRefmvsDSPContext *const c) {
   42|  10.2k|    const unsigned flags = dav1d_get_cpu_flags();
   43|       |
   44|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE2)) return;
  ------------------
  |  Branch (44:9): [True: 0, False: 10.2k]
  ------------------
   45|       |
   46|  10.2k|    c->splat_mv = dav1d_splat_mv_sse2;
   47|       |
   48|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
  ------------------
  |  Branch (48:9): [True: 0, False: 10.2k]
  ------------------
   49|       |
   50|  10.2k|    c->save_tmvs = dav1d_save_tmvs_ssse3;
   51|       |
   52|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
  ------------------
  |  Branch (52:9): [True: 0, False: 10.2k]
  ------------------
   53|  10.2k|#if ARCH_X86_64
   54|  10.2k|    c->load_tmvs = dav1d_load_tmvs_sse4;
   55|       |
   56|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
  ------------------
  |  Branch (56:9): [True: 0, False: 10.2k]
  ------------------
   57|       |
   58|  10.2k|    c->save_tmvs = dav1d_save_tmvs_avx2;
   59|  10.2k|    c->splat_mv = dav1d_splat_mv_avx2;
   60|       |
   61|  10.2k|    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL)) return;
  ------------------
  |  Branch (61:9): [True: 10.2k, False: 0]
  ------------------
   62|       |
   63|      0|    c->save_tmvs = dav1d_save_tmvs_avx512icl;
   64|      0|    c->splat_mv = dav1d_splat_mv_avx512icl;
   65|      0|#endif
   66|      0|}

LLVMFuzzerInitialize:
   59|      2|int LLVMFuzzerInitialize(int *argc, char ***argv) {
   60|      2|    int i = 1;
   61|     11|    for (; i < *argc; i++) {
  ------------------
  |  Branch (61:12): [True: 9, False: 2]
  ------------------
   62|      9|        if (!strcmp((*argv)[i], "--cpumask")) {
  ------------------
  |  Branch (62:13): [True: 0, False: 9]
  ------------------
   63|      0|            const char * cpumask = (*argv)[i+1];
   64|      0|            if (cpumask) {
  ------------------
  |  Branch (64:17): [True: 0, False: 0]
  ------------------
   65|      0|                char *end;
   66|      0|                unsigned res;
   67|      0|                if (!strncmp(cpumask, "0x", 2)) {
  ------------------
  |  Branch (67:21): [True: 0, False: 0]
  ------------------
   68|      0|                    cpumask += 2;
   69|      0|                    res = (unsigned) strtoul(cpumask, &end, 16);
   70|      0|                } else {
   71|      0|                    res = (unsigned) strtoul(cpumask, &end, 0);
   72|      0|                }
   73|      0|                if (end != cpumask && !end[0]) {
  ------------------
  |  Branch (73:21): [True: 0, False: 0]
  |  Branch (73:39): [True: 0, False: 0]
  ------------------
   74|      0|                    dav1d_set_cpu_flags_mask(res);
   75|      0|                }
   76|      0|            }
   77|      0|            break;
   78|      0|        }
   79|      9|    }
   80|       |
   81|      2|    for (; i < *argc - 2; i++) {
  ------------------
  |  Branch (81:12): [True: 0, False: 2]
  ------------------
   82|      0|        (*argv)[i] = (*argv)[i + 2];
   83|      0|    }
   84|       |
   85|      2|    *argc = i;
   86|       |
   87|      2|    return 0;
   88|      2|}
LLVMFuzzerTestOneInput:
   94|  10.2k|{
   95|  10.2k|    Dav1dSettings settings = { 0 };
   96|  10.2k|    Dav1dContext * ctx = NULL;
   97|  10.2k|    Dav1dPicture pic;
   98|  10.2k|    const uint8_t *ptr = data;
   99|  10.2k|    int have_seq_hdr = 0;
  100|  10.2k|    int err;
  101|       |
  102|  10.2k|    dav1d_version();
  103|       |
  104|  10.2k|    if (size < 32) goto end;
  ------------------
  |  Branch (104:9): [True: 5, False: 10.2k]
  ------------------
  105|       |#ifdef DAV1D_ALLOC_FAIL
  106|       |    unsigned h = djb_xor(ptr, 32);
  107|       |    unsigned seed = h;
  108|       |    unsigned probability = h > (RAND_MAX >> 5) ? RAND_MAX >> 5 : h;
  109|       |    int max_frame_delay = (h & 0xf) + 1;
  110|       |    int n_threads = ((h >> 4) & 0x7) + 1;
  111|       |    if (max_frame_delay > 5) max_frame_delay = 1;
  112|       |    if (n_threads > 3) n_threads = 1;
  113|       |#endif
  114|  10.2k|    ptr += 32; // skip ivf header
  115|       |
  116|  10.2k|    dav1d_default_settings(&settings);
  117|       |
  118|  10.2k|#ifdef DAV1D_MT_FUZZING
  119|  10.2k|    settings.max_frame_delay = settings.n_threads = 4;
  120|       |#elif defined(DAV1D_ALLOC_FAIL)
  121|       |    settings.max_frame_delay = max_frame_delay;
  122|       |    settings.n_threads = n_threads;
  123|       |    dav1d_setup_alloc_fail(seed, probability);
  124|       |#else
  125|       |    settings.max_frame_delay = settings.n_threads = 1;
  126|       |#endif
  127|  10.2k|#if defined(DAV1D_FUZZ_MAX_SIZE)
  128|  10.2k|    settings.frame_size_limit = DAV1D_FUZZ_MAX_SIZE;
  ------------------
  |  |   56|  10.2k|#define DAV1D_FUZZ_MAX_SIZE 4096 * 4096
  ------------------
  129|  10.2k|#endif
  130|       |
  131|  10.2k|    err = dav1d_open(&ctx, &settings);
  132|  10.2k|    if (err < 0) goto end;
  ------------------
  |  Branch (132:9): [True: 0, False: 10.2k]
  ------------------
  133|       |
  134|   349k|    while (ptr <= data + size - 12) {
  ------------------
  |  Branch (134:12): [True: 341k, False: 8.06k]
  ------------------
  135|   341k|        Dav1dData buf;
  136|   341k|        uint8_t *p;
  137|       |
  138|   341k|        size_t frame_size = r32le(ptr);
  139|   341k|        ptr += 12;
  140|       |
  141|   341k|        if (frame_size > size || ptr > data + size - frame_size)
  ------------------
  |  Branch (141:13): [True: 1.55k, False: 340k]
  |  Branch (141:34): [True: 581, False: 339k]
  ------------------
  142|  2.13k|            break;
  143|       |
  144|   339k|        if (!frame_size) continue;
  ------------------
  |  Branch (144:13): [True: 2.21k, False: 337k]
  ------------------
  145|       |
  146|   337k|        if (!have_seq_hdr) {
  ------------------
  |  Branch (146:13): [True: 12.7k, False: 324k]
  ------------------
  147|  12.7k|            Dav1dSequenceHeader seq;
  148|  12.7k|            int err = dav1d_parse_sequence_header(&seq, ptr, frame_size);
  149|       |            // skip frames until we see a sequence header
  150|  12.7k|            if  (err != 0) {
  ------------------
  |  Branch (150:18): [True: 2.80k, False: 9.91k]
  ------------------
  151|  2.80k|                ptr += frame_size;
  152|  2.80k|                continue;
  153|  2.80k|            }
  154|  9.91k|            have_seq_hdr = 1;
  155|  9.91k|        }
  156|       |
  157|       |        // copy frame data to a new buffer to catch reads past the end of input
  158|   334k|        p = dav1d_data_create(&buf, frame_size);
  159|   334k|        if (!p) goto cleanup;
  ------------------
  |  Branch (159:13): [True: 0, False: 334k]
  ------------------
  160|   334k|        memcpy(p, ptr, frame_size);
  161|   334k|        ptr += frame_size;
  162|       |
  163|   341k|        do {
  164|   341k|            if ((err = dav1d_send_data(ctx, &buf)) < 0) {
  ------------------
  |  Branch (164:17): [True: 47.7k, False: 293k]
  ------------------
  165|  47.7k|                if (err != DAV1D_ERR(EAGAIN))
  ------------------
  |  |   58|  47.7k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (165:21): [True: 39.7k, False: 7.99k]
  ------------------
  166|  39.7k|                    break;
  167|  47.7k|            }
  168|   301k|            memset(&pic, 0, sizeof(pic));
  169|   301k|            err = dav1d_get_picture(ctx, &pic);
  170|   301k|            if (err == 0) {
  ------------------
  |  Branch (170:17): [True: 123k, False: 177k]
  ------------------
  171|   123k|                dav1d_picture_unref(&pic);
  172|   177k|            } else if (err != DAV1D_ERR(EAGAIN)) {
  ------------------
  |  |   58|   177k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (172:24): [True: 139k, False: 37.8k]
  ------------------
  173|   139k|                break;
  174|   139k|            }
  175|   301k|        } while (buf.sz > 0);
  ------------------
  |  Branch (175:18): [True: 6.82k, False: 154k]
  ------------------
  176|       |
  177|   334k|        if (buf.sz > 0)
  ------------------
  |  Branch (177:13): [True: 40.9k, False: 293k]
  ------------------
  178|  40.9k|            dav1d_data_unref(&buf);
  179|   334k|    }
  180|       |
  181|  10.2k|    memset(&pic, 0, sizeof(pic));
  182|  10.2k|    if ((err = dav1d_get_picture(ctx, &pic)) == 0) {
  ------------------
  |  Branch (182:9): [True: 6.07k, False: 4.13k]
  ------------------
  183|       |        /* Test calling dav1d_picture_unref() after dav1d_close() */
  184|  13.0k|        do {
  185|  13.0k|            Dav1dPicture pic2 = { 0 };
  186|  13.0k|            if ((err = dav1d_get_picture(ctx, &pic2)) == 0)
  ------------------
  |  Branch (186:17): [True: 4.52k, False: 8.57k]
  ------------------
  187|  4.52k|                dav1d_picture_unref(&pic2);
  188|  13.0k|        } while (err != DAV1D_ERR(EAGAIN));
  ------------------
  |  |   58|  13.0k|#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
  ------------------
  |  Branch (188:18): [True: 7.02k, False: 6.07k]
  ------------------
  189|       |
  190|  6.07k|        dav1d_close(&ctx);
  191|  6.07k|        dav1d_picture_unref(&pic);
  192|  6.07k|        return 0;
  193|  6.07k|    }
  194|       |
  195|  4.13k|cleanup:
  196|  4.13k|    dav1d_close(&ctx);
  197|  4.13k|end:
  198|  4.13k|    return 0;
  199|  4.13k|}
dav1d_fuzzer.c:r32le:
   52|   341k|static unsigned r32le(const uint8_t *const p) {
   53|   341k|    return ((uint32_t)p[3] << 24U) | (p[2] << 16U) | (p[1] << 8U) | p[0];
   54|   341k|}

